apache · vineetgarg02 · Jul 17, 2019 · Aug 6, 2019 · jcamachor · Mar 1, 2020
diff --git a/...g/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectFilterPullUpConstantsRule.java b/...g/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectFilterPullUpConstantsRule.java
@@ -119,7 +119,7 @@ private static List<RexNode> rewriteProjects(List<RexNode> projects,
         case IS_NULL:
           conditions.put(conjCall.operands.get(0).toString(),
                   relBuilder.getRexBuilder().makeNullLiteral(
-                          conjCall.operands.get(0).getType().getSqlTypeName()));
+                          conjCall.operands.get(0).getType()));
       }
     }
 

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
@@ -243,6 +243,7 @@ public static ASTNode literal(RexLiteral literal) {
     case INTERVAL_SECOND:
     case INTERVAL_YEAR:
     case INTERVAL_YEAR_MONTH:
+    case ROW:
       if (literal.getValue() == null) {
         return ASTBuilder.construct(HiveParser.TOK_NULL, "TOK_NULL").node();
       }
@@ -364,8 +365,9 @@ public static ASTNode literal(RexLiteral literal) {
       type = HiveParser.TOK_NULL;
       break;
 
-    //binary type should not be seen.
+    //binary, ROW type should not be seen.
     case BINARY:
+    case ROW:
     default:
       throw new RuntimeException("Unsupported Type: " + sqlType);
     }

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java
@@ -160,7 +160,8 @@ public static RelDataType convert(TypeInfo type, RelDataTypeFactory dtFactory)
       convertedType = convert((UnionTypeInfo) type, dtFactory);
       break;
     }
-    return convertedType;
+    // hive does not have concept of not nullable types
+    return dtFactory.createTypeWithNullability(convertedType, true);
   }
 
   public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) {

diff --git a/ql/src/test/queries/clientpositive/structin.q b/ql/src/test/queries/clientpositive/structin.q
@@ -21,3 +21,19 @@ IN (
 struct('1234-1111-0074578664','3'),
 struct('1234-1111-0074578695',1)
 );
+
+CREATE TABLE test_struct
+(
+  f1 string,
+  demo_struct struct<f1:string, f2:string, f3:string>,
+  datestr string
+);
+
+insert into test_struct values('s1', named_struct('f1','1', 'f2','2', 'f3','3'), '02-02-2020');
+insert into test_struct values('s2', named_struct('f1',cast(null as string),'f2', cast(null as string),'f3', cast(null as string)), '02-02-2020');
+insert into test_struct values('s4', named_struct('f1','100', 'f2','200', 'f3','300'), '02-02-2020');
+
+explain select * from test_struct where datestr='02-02-2020' and demo_struct is not null;
+select * from test_struct where datestr='02-02-2020' and demo_struct is not null;
+
+DROP TABLE test_struct;
diff --git a/ql/src/test/results/clientpositive/intersect_all_rj.q.out b/ql/src/test/results/clientpositive/intersect_all_rj.q.out
@@ -178,16 +178,15 @@ HiveProject($f0=[$1])
             HiveUnion(all=[true])
               HiveProject($f0=[$0], $f1=[$1])
                 HiveAggregate(group=[{0}], agg#0=[count()])
-                  HiveProject($f0=[$0])
-                    HiveAggregate(group=[{0}])
-                      HiveProject($f0=[CASE(IS NOT NULL($3), $3, if($1, $4, $2))])
-                        HiveJoin(condition=[>=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
-                          HiveProject(bigint_col_3=[$1])
-                            HiveFilter(condition=[IS NOT NULL($1)])
-                              HiveTableScan(table=[[default, table_7]], table:alias=[a3])
-                          HiveProject(boolean_col_16=[$0], timestamp_col_5=[$1], timestamp_col_15=[$2], timestamp_col_30=[$3], CAST=[CAST($4):BIGINT])
-                            HiveFilter(condition=[IS NOT NULL(CAST($4):BIGINT)])
-                              HiveTableScan(table=[[default, table_10]], table:alias=[a4])
+                  HiveProject(CASE=[$0])
+                    HiveAggregate(group=[{1}])
+                      HiveJoin(condition=[>=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+                        HiveProject(bigint_col_3=[$1])
+                          HiveFilter(condition=[IS NOT NULL($1)])
+                            HiveTableScan(table=[[default, table_7]], table:alias=[a3])
+                        HiveProject(CASE=[CASE(IS NOT NULL($2), $2, if($0, $3, $1))], CAST=[CAST($4):BIGINT])
+                          HiveFilter(condition=[IS NOT NULL(CAST($4):BIGINT)])
+                            HiveTableScan(table=[[default, table_10]], table:alias=[a4])
               HiveProject($f0=[$0], $f1=[$1])
                 HiveAggregate(group=[{0}], agg#0=[count()])
                   HiveProject($f0=[$0])

diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
@@ -4028,13 +4028,13 @@ STAGE PLANS:
                     Select Operator
                       expressions: p_name (type: string), p_type (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col1 (type: string)
                         null sort order: z
                         sort order: +
                         Map-reduce partition columns: _col1 (type: string)
-                        Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col0 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
@@ -4087,10 +4087,10 @@ STAGE PLANS:
                 keys:
                   0 _col1 (type: string)
                   1 _col0 (type: string)
-                outputColumnNames: _col0, _col4
+                outputColumnNames: _col0, _col3
                 Statistics: Num rows: 28 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
-                  expressions: _col0 (type: string), _col4 (type: int)
+                  expressions: _col0 (type: string), _col3 (type: int)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 28 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator

diff --git a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out
@@ -172,13 +172,13 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumnNums: [0, 1]
-                      Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Left Outer Join 0 to 1
                         keys:
                           0 _col0 (type: bigint)
-                          1 _col3 (type: bigint)
+                          1 UDFToLong(_col2) (type: bigint)
                         Map Join Vectorization:
                             bigTableKeyColumns: 0:bigint
                             bigTableRetainColumnNums: [1]
@@ -189,7 +189,7 @@ STAGE PLANS:
                             projectedOutput: 1:int, 4:int, 5:smallint, 6:int
                             smallTableValueMapping: 4:int, 5:smallint, 6:int
                             hashTableImplementationType: OPTIMIZED
-                        outputColumnNames: _col1, _col2, _col3, _col4
+                        outputColumnNames: _col1, _col3, _col4, _col5
                         input vertices:
                           1 Map 3
                         Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
@@ -198,15 +198,16 @@ STAGE PLANS:
                               className: VectorFilterOperator
                               native: true
                               predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:int, val 5), FilterLongColEqualLongScalar(col 6:int, val 10), FilterLongColEqualLongScalar(col 6:bigint, val 571)(children: col 6:int))
-                          predicate: ((_col1 = 5) or (_col4 = 10) or (UDFToLong(_col4) = 571L)) (type: boolean)
+                          predicate: ((_col1 = 5) or (_col5 = 10) or (UDFToLong(_col5) = 571L)) (type: boolean)
                           Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
                           Select Operator
-                            expressions: _col2 (type: int), _col3 (type: smallint), _col4 (type: int)
+                            expressions: _col3 (type: int), _col5 (type: int), CASE WHEN (_col3 is not null) THEN (_col3) ELSE (UDFToInteger(_col4)) END (type: int)
                             outputColumnNames: _col0, _col1, _col2
                             Select Vectorization:
                                 className: VectorSelectOperator
                                 native: true
-                                projectedOutputColumnNums: [4, 5, 6]
+                                projectedOutputColumnNums: [4, 6, 8]
+                                selectExpressions: IfExprColumnCondExpr(col 7:boolean, col 4:intcol 5:smallint)(children: IsNotNull(col 4:int) -> 7:boolean, col 4:int, col 5:smallint) -> 8:int
                             Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
                             Map Join Operator
                               condition map:
@@ -215,13 +216,13 @@ STAGE PLANS:
                                 0 
                                 1 
                               Map Join Vectorization:
-                                  bigTableRetainColumnNums: [4, 5, 6]
-                                  bigTableValueColumns: 4:int, 5:smallint, 6:int
+                                  bigTableRetainColumnNums: [4, 6, 8]
+                                  bigTableValueColumns: 4:int, 6:int, 8:int
                                   className: VectorMapJoinInnerBigOnlyMultiKeyOperator
                                   native: true
                                   nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
                                   nonOuterSmallTableKeyMapping: []
-                                  projectedOutput: 4:int, 5:smallint, 6:int
+                                  projectedOutput: 4:int, 6:int, 8:int
                                   hashTableImplementationType: OPTIMIZED
                               outputColumnNames: _col0, _col1, _col2
                               input vertices:
@@ -238,13 +239,12 @@ STAGE PLANS:
                                     keyExpressions: col 4:int
                                     native: true
                                 Select Operator
-                                  expressions: _col0 (type: int), CASE WHEN (_col0 is not null) THEN (_col0) ELSE (UDFToInteger(_col1)) END (type: int), _col2 (type: int)
+                                  expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int)
                                   outputColumnNames: _col0, _col1, _col2
                                   Select Vectorization:
                                       className: VectorSelectOperator
                                       native: true
                                       projectedOutputColumnNums: [4, 8, 6]
-                                      selectExpressions: IfExprColumnCondExpr(col 7:boolean, col 4:intcol 5:smallint)(children: IsNotNull(col 4:int) -> 7:boolean, col 4:int, col 5:smallint) -> 8:int
                                   Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
                                   Reduce Output Operator
                                     key expressions: _col0 (type: int)
@@ -293,26 +293,26 @@ STAGE PLANS:
                     predicate: UDFToLong(CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END) is not null (type: boolean)
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: int_col_5 (type: int), smallint_col_22 (type: smallint), CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END (type: int), UDFToLong(CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END) (type: bigint)
-                      outputColumnNames: _col0, _col1, _col2, _col3
+                      expressions: int_col_5 (type: int), smallint_col_22 (type: smallint), CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END (type: int)
+                      outputColumnNames: _col0, _col1, _col2
                       Select Vectorization:
                           className: VectorSelectOperator
                           native: true
-                          projectedOutputColumnNums: [1, 0, 6, 8]
-                          selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 1:intcol 0:smallint)(children: IsNotNull(col 1:int) -> 5:boolean, col 1:int, col 0:smallint) -> 6:int, IfExprColumnCondExpr(col 7:boolean, col 1:intcol 0:smallint)(children: IsNotNull(col 1:int) -> 7:boolean, col 1:int, col 0:smallint) -> 8:int
-                      Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+                          projectedOutputColumnNums: [1, 0, 6]
+                          selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 1:intcol 0:smallint)(children: IsNotNull(col 1:int) -> 5:boolean, col 1:int, col 0:smallint) -> 6:int
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col3 (type: bigint)
+                        key expressions: UDFToLong(_col2) (type: bigint)
                         null sort order: z
                         sort order: +
-                        Map-reduce partition columns: _col3 (type: bigint)
+                        Map-reduce partition columns: UDFToLong(_col2) (type: bigint)
                         Reduce Sink Vectorization:
                             className: VectorReduceSinkLongOperator
-                            keyColumns: 8:bigint
+                            keyColumns: 6:bigint
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                             valueColumns: 1:int, 0:smallint, 6:int
-                        Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col0 (type: int), _col1 (type: smallint), _col2 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
@@ -330,7 +330,7 @@ STAGE PLANS:
                     includeColumns: [0, 1]
                     dataColumns: smallint_col_22:smallint, int_col_5:int
                     partitionColumnCount: 0
-                    scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint]
+                    scratchColumnTypeNames: [bigint, bigint, bigint, bigint]
         Map 4 
             Map Operator Tree:
                 TableScan