Skip to content

Commit

Permalink
IMPALA-11208: Fix uninitialized counter of CollectionItemsRead in orc…
Browse files Browse the repository at this point in the history
…-scanner

CollectionItemsRead in the runtime profile counts the total number of
nested collection items read by the scan node. Only created for scans
that support nested types, e.g. Parquet or ORC.

Each scanner thread maintains its local counter and merges it into
HdfsScanNode counter for each row batch. However, the local counter in
orc-scanner is uninitialized, leading to weird values. This patch simply
initializes it to 0 and adds test coverage.

Tests:
Add profile verification for this counter on some existing query tests.
Note that there are some implementation difference between Parquet and
ORC scanners (e.g. in predicate pushdown). So we will see different
counter results in some query. I just pick some queries that have
consistent counters.

Change-Id: Id7783d1460ac9b98e94d3a31028b43f5a9884f99
Reviewed-on: http://gerrit.cloudera.org:8080/18528
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
  • Loading branch information
stiga-huang authored and Impala Public Jenkins committed May 18, 2022
1 parent 1f89627 commit 6ea1540
Show file tree
Hide file tree
Showing 25 changed files with 73 additions and 19 deletions.
2 changes: 1 addition & 1 deletion be/src/exec/hdfs-orc-scanner.h
Expand Up @@ -299,7 +299,7 @@ class HdfsOrcScanner : public HdfsColumnarScanner {
/// used to reduce the frequency of updating HdfsScanNode counter. It is updated by the
/// callees of AssembleRows() and is merged into the HdfsScanNode counter at the end of
/// AssembleRows() and then is reset to 0.
int64_t coll_items_read_counter_;
int64_t coll_items_read_counter_ = 0;

const char *filename() const { return metadata_range_->file(); }

Expand Down
Expand Up @@ -15,6 +15,10 @@ select id, a.item from complextypestbl t, t.int_array a
8,-1
---- TYPES
bigint,int
---- RUNTIME_PROFILE
row_regex: .*CollectionItemsRead: 5 \(5\).*
row_regex: .*CollectionItemsRead: 1 \(1\).*
row_regex: .*CollectionItemsRead: 9 \(9\).*
====
---- QUERY
-- Materialize array (for now, may be optimized away someday)
Expand All @@ -32,6 +36,10 @@ NULL
-1
---- TYPES
int
---- RUNTIME_PROFILE
row_regex: .*CollectionItemsRead: 5 \(5\).*
row_regex: .*CollectionItemsRead: 1 \(1\).*
row_regex: .*CollectionItemsRead: 9 \(9\).*
====
---- QUERY
-- Materialize scalar and array
Expand Down
Expand Up @@ -15,6 +15,8 @@ from customer c, c.c_orders o, o.o_lineitems i
6001215
---- TYPES
bigint
---- RUNTIME_PROFILE
row_regex: .*CollectionItemsRead: 2.50M \(2500405\).*
====
---- QUERY
select count(l_linenumber)
Expand Down
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q1.test
Expand Up @@ -29,4 +29,6 @@ order by
'R','F',37719753.00,56568041380.90,53741292684.6040,55889619119.831932,25.505794,38250.854626,0.050009,1478870
---- TYPES
string, string, decimal, decimal, decimal, decimal, decimal, decimal, decimal, bigint
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
2 changes: 2 additions & 0 deletions testdata/workloads/tpch/queries/tpch-q10.test
Expand Up @@ -59,4 +59,6 @@ limit 20
23431,'Customer#000023431',554269.5360,3381.86,'ROMANIA','HgiV0phqhaIa9aydNoIlb','29-915-458-2654','nusual, even instructions: furiously stealthy n'
---- TYPES
bigint, string, decimal, decimal, string, string, string, string
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q11.test
Expand Up @@ -1085,4 +1085,6 @@ order by
5182,7874521.73
---- TYPES
BIGINT, decimal
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q12.test
Expand Up @@ -34,4 +34,6 @@ order by
'SHIP',6200,9262
---- TYPES
string, bigint, bigint
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q13.test
Expand Up @@ -66,4 +66,6 @@ order by
39,1
---- TYPES
BIGINT, BIGINT
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q14.test
Expand Up @@ -18,4 +18,6 @@ where
16.380779
---- TYPES
decimal
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
2 changes: 2 additions & 0 deletions testdata/workloads/tpch/queries/tpch-q15.test
Expand Up @@ -35,4 +35,6 @@ order by
8449,'Supplier#000008449','Wp34zim9qYFbVctdW','20-469-856-8873',1772627.2087
---- TYPES
BIGINT, STRING, STRING, STRING, DECIMAL
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q16.test
Expand Up @@ -18348,4 +18348,6 @@ order by
'Brand#55','STANDARD PLATED TIN',49,3
---- TYPES
STRING, STRING, INT, BIGINT
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q17.test
Expand Up @@ -22,4 +22,6 @@ where
348406.054286
---- TYPES
decimal
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
2 changes: 2 additions & 0 deletions testdata/workloads/tpch/queries/tpch-q18.test
Expand Up @@ -97,4 +97,6 @@ limit 100
'Customer#000088703',88703,2995076,'1994-01-30',363812.12,302.00
---- TYPES
STRING, BIGINT, BIGINT, STRING, DECIMAL, DECIMAL
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q19.test
Expand Up @@ -40,4 +40,6 @@ where
3083843.0578
---- TYPES
decimal
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q2.test
Expand Up @@ -148,4 +148,6 @@ limit 100
7843.52,'Supplier#000006683','FRANCE',11680,'Manufacturer#4','2Z0JGkiv01Y00oCFwUGfviIbhzCdy','16-464-517-8943',' express, final pinto beans x-ray slyly asymptotes. unusual, unusual'
---- TYPES
DECIMAL, STRING, STRING, BIGINT, STRING, STRING, STRING, STRING
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q20.test
Expand Up @@ -226,4 +226,6 @@ order by
'Supplier#000009974','7wJ,J5DKcxSU4Kp1cQLpbcAvB5AsvKT'
---- TYPES
string, string
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q21.test
Expand Up @@ -144,4 +144,6 @@ limit 100
'Supplier#000002483',12
---- TYPES
string, bigint
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q22.test
Expand Up @@ -45,4 +45,6 @@ order by
'31',922,6806670.18
---- TYPES
string, bigint, decimal
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
2 changes: 2 additions & 0 deletions testdata/workloads/tpch/queries/tpch-q3.test
Expand Up @@ -39,4 +39,6 @@ limit 10
2300070,367371.1452,'1995-03-13',0
---- TYPES
BIGINT, DECIMAL, STRING, INT
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q4.test
Expand Up @@ -30,4 +30,6 @@ order by
'5-LOW',10487
---- TYPES
string, bigint
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q5.test
Expand Up @@ -33,4 +33,6 @@ order by
'JAPAN',45410175.6954
---- TYPES
string, decimal
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q6.test
Expand Up @@ -14,4 +14,6 @@ where
123141078.2283
---- TYPES
decimal
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q7.test
Expand Up @@ -46,4 +46,6 @@ order by
'GERMANY','FRANCE',1996,52520549.0224
---- TYPES
STRING, STRING, INT, DECIMAL
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q8.test
Expand Up @@ -43,4 +43,6 @@ order by
1996,0.041486
---- TYPES
int, decimal
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====
4 changes: 3 additions & 1 deletion testdata/workloads/tpch/queries/tpch-q9.test
Expand Up @@ -210,4 +210,6 @@ order by
'VIETNAM',1992,47846355.6485
---- TYPES
STRING, INT, DECIMAL
====
---- RUNTIME_PROFILE
!row_regex: .*CollectionItemsRead: [^0].*
====

0 comments on commit 6ea1540

Please sign in to comment.