From c41a81be21b0e7d719ee69893bba4446118076cf Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Sat, 18 Apr 2026 18:08:16 +0530 Subject: [PATCH 01/16] HIVE-29551: Avoid quadratic runtime in ColumnStatsSemanticAnalyzer#getColumnTypes --- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index ee80fc475299..9f59f3ed466a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -24,6 +24,7 @@ import com.google.common.base.Preconditions; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -220,24 +221,29 @@ private static String getColTypeOf(Table tbl, String partKey) { protected static List getColumnTypes(Table tbl, List colNames) { List colTypes = new ArrayList<>(); List cols = tbl.getCols(); - List copyColNames = new ArrayList<>(colNames); - - for (String colName : copyColNames) { - for (FieldSchema col : cols) { - if (colName.equalsIgnoreCase(col.getName())) { - String type = col.getType(); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); - boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); - if (!isSupported) { - logTypeWarning(colName, type); - colNames.remove(colName); - } else { - colTypes.add(type); - } + Map colTypeMap = new HashMap<>(); + + for (FieldSchema col : cols) { + colTypeMap.put(col.getName().toLowerCase(), col.getType()); + } + + List nonPrimColNames = new ArrayList<>(); + for (String colName : colNames) { + String type = colTypeMap.get(colName.toLowerCase()); + if (type != null) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); + boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); + if (!isSupported) { + logTypeWarning(colName, type); + } else { + nonPrimColNames.add(colName); + colTypes.add(type); } } } + colNames.clear(); + colNames.addAll(nonPrimColNames); return colTypes; } From 6ddb7ea74c1e73e865b5c43f9a7c1036333fe0be Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Sun, 19 Apr 2026 13:03:02 +0530 Subject: [PATCH 02/16] Update the wrong column name used --- .../hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 9f59f3ed466a..deb980c56cbf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -227,7 +227,7 @@ protected static List getColumnTypes(Table tbl, List colNames) { colTypeMap.put(col.getName().toLowerCase(), col.getType()); } - List nonPrimColNames = new ArrayList<>(); + List primColNames = new ArrayList<>(); for (String colName : colNames) { String type = colTypeMap.get(colName.toLowerCase()); if (type != null) { @@ -236,14 +236,14 @@ protected static List getColumnTypes(Table tbl, List colNames) { if (!isSupported) { logTypeWarning(colName, type); } else { - nonPrimColNames.add(colName); + primColNames.add(colName); colTypes.add(type); } } } colNames.clear(); - colNames.addAll(nonPrimColNames); + colNames.addAll(primColNames); return colTypes; } From be572d3a73930375491db0063890e1f0dcc7726e Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Sun, 26 Apr 2026 13:17:51 +0530 Subject: [PATCH 03/16] Refactor code to incorporate logic for different ast children values --- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 80 ++++++++++++------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index deb980c56cbf..3f9c4489adc0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -103,38 +103,53 @@ private boolean shouldRewrite(ASTNode tree) { return rwt; } + private static final class StatsEligibleColumns { + private final List columnNames; + private final List columnTypes; + + private StatsEligibleColumns(List columnNames, List columnTypes) { + this.columnNames = columnNames; + this.columnTypes = columnTypes; + } + + List getColumnNames() { + return columnNames; + } + + List getColumnTypes() { + return columnTypes; + } + } + /** - * Get the names of the columns that support column statistics. + * Get the names and types of the columns that support column statistics. */ - private static List getColumnNamesSupportingStats(Table tbl) { + private static StatsEligibleColumns getStatsEligibleColumns(Table tbl) { List colNames = new ArrayList<>(); + List colTypes = new ArrayList<>(); for (FieldSchema col : tbl.getCols()) { String type = col.getType(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); if (isSupported) { colNames.add(col.getName()); + colTypes.add(col.getType()); } } - return colNames; + return new StatsEligibleColumns(colNames, colTypes); } private List getColumnName(ASTNode tree) throws SemanticException { - - switch (tree.getChildCount()) { - case 2: - return getColumnNamesSupportingStats(tbl); - case 3: - int numCols = tree.getChild(2).getChildCount(); - List colName = new ArrayList<>(numCols); - for (int i = 0; i < numCols; i++) { - colName.add(getUnescapedName((ASTNode) tree.getChild(2).getChild(i))); - } - return colName; - default: + if (tree.getChildCount() != 3) { throw new SemanticException("Internal error. Expected number of children of ASTNode to be" + " either 2 or 3. Found : " + tree.getChildCount()); } + int numCols = tree.getChild(2).getChildCount(); + List colName = new ArrayList<>(numCols); + for (int i = 0; i < numCols; i++) { + colName.add(getUnescapedName((ASTNode) tree.getChild(2).getChild(i))); + } + return colName; } private void handlePartialPartitionSpec(Map partSpec, ColumnStatsAutoGatherContext context) throws @@ -218,7 +233,7 @@ private static String getColTypeOf(Table tbl, String partKey) { throw new RuntimeException("Unknown partition key : " + partKey); } - protected static List getColumnTypes(Table tbl, List colNames) { + protected static List getColumnTypesByName(Table tbl, List colNames) { List colTypes = new ArrayList<>(); List cols = tbl.getCols(); Map colTypeMap = new HashMap<>(); @@ -263,10 +278,10 @@ private String genRewrittenQuery(List colNames, List colTypes, H protected static String genRewrittenQuery(Table tbl, HiveConf conf, List partTransformSpec, Map partSpec, boolean isPartitionStats) { - List colNames = getColumnNamesSupportingStats(tbl); - List colTypes = ColumnStatsSemanticAnalyzer.getColumnTypes(tbl, colNames); + StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); return ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, colNames, colTypes, conf, partTransformSpec, -1, partSpec, isPartitionStats, true); + tbl, statsCols.getColumnNames(), statsCols.getColumnTypes(), conf, partTransformSpec, -1, partSpec, + isPartitionStats, true); } private static String genRewrittenQuery(Table tbl, List colNames, List colTypes, @@ -640,7 +655,13 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { */ if (shouldRewrite(ast)) { tbl = AnalyzeCommandUtils.getTable(ast, this); - colNames = getColumnName(ast); + StatsEligibleColumns statsCols = null; + if (ast.getChildCount() == 2) { + statsCols = getStatsEligibleColumns(tbl); + colNames = statsCols.getColumnNames(); + } else { + colNames = getColumnName(ast); + } // Save away the original AST originalTree = ast; boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast) @@ -659,7 +680,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { partTransformSpecs = tbl.getStorageHandler().getPartitionTransformSpecs(tbl); } } - colType = getColumnTypes(tbl, colNames); + colType = ast.getChildCount() == 2 ? statsCols.getColumnTypes() : getColumnTypesByName(tbl, colNames); isTableLevel = !isPartitionStats; rewrittenQuery = String.join(" union all ", @@ -715,7 +736,13 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) tbl = AnalyzeCommandUtils.getTable(ast, this); - colNames = getColumnName(ast); + StatsEligibleColumns statsCols = null; + if (ast.getChildCount() == 2) { + statsCols = getStatsEligibleColumns(tbl); + colNames = statsCols.getColumnNames(); + } else { + colNames = getColumnName(ast); + } boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast) || StatsUtils.isPartitionStats(tbl, conf); @@ -732,7 +759,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) partTransformSpec = tbl.getStorageHandler().getPartitionTransformSpec(tbl); } } - colType = getColumnTypes(tbl, colNames); + colType = ast.getChildCount() == 2 ? statsCols.getColumnTypes() : getColumnTypesByName(tbl, colNames); isTableLevel = !isPartitionStats; rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partTransformSpec, -1, @@ -755,10 +782,9 @@ static AnalyzeRewriteContext genAnalyzeRewriteContext(HiveConf conf, Table tbl) AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(!(conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned())); - List colNames = getColumnNamesSupportingStats(tbl); - List colTypes = getColumnTypes(tbl, colNames); - analyzeRewrite.setColName(colNames); - analyzeRewrite.setColType(colTypes); + StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); + analyzeRewrite.setColName(statsCols.getColumnNames()); + analyzeRewrite.setColType(statsCols.getColumnTypes()); return analyzeRewrite; } From 699be1e4ae57d6893c4120edf744fc381287af2d Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Mon, 27 Apr 2026 23:18:04 +0530 Subject: [PATCH 04/16] Fix sonarqube issue - 1 --- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 31 +++++-------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 3f9c4489adc0..428e16090539 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -103,22 +103,7 @@ private boolean shouldRewrite(ASTNode tree) { return rwt; } - private static final class StatsEligibleColumns { - private final List columnNames; - private final List columnTypes; - - private StatsEligibleColumns(List columnNames, List columnTypes) { - this.columnNames = columnNames; - this.columnTypes = columnTypes; - } - - List getColumnNames() { - return columnNames; - } - - List getColumnTypes() { - return columnTypes; - } + private record StatsEligibleColumns(List columnNames, List columnTypes) { } /** @@ -280,7 +265,7 @@ protected static String genRewrittenQuery(Table tbl, boolean isPartitionStats) { StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); return ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, statsCols.getColumnNames(), statsCols.getColumnTypes(), conf, partTransformSpec, -1, partSpec, + tbl, statsCols.columnNames(), statsCols.columnTypes(), conf, partTransformSpec, -1, partSpec, isPartitionStats, true); } @@ -658,7 +643,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { StatsEligibleColumns statsCols = null; if (ast.getChildCount() == 2) { statsCols = getStatsEligibleColumns(tbl); - colNames = statsCols.getColumnNames(); + colNames = statsCols.columnNames(); } else { colNames = getColumnName(ast); } @@ -680,7 +665,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { partTransformSpecs = tbl.getStorageHandler().getPartitionTransformSpecs(tbl); } } - colType = ast.getChildCount() == 2 ? statsCols.getColumnTypes() : getColumnTypesByName(tbl, colNames); + colType = ast.getChildCount() == 2 ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); isTableLevel = !isPartitionStats; rewrittenQuery = String.join(" union all ", @@ -739,7 +724,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) StatsEligibleColumns statsCols = null; if (ast.getChildCount() == 2) { statsCols = getStatsEligibleColumns(tbl); - colNames = statsCols.getColumnNames(); + colNames = statsCols.columnNames(); } else { colNames = getColumnName(ast); } @@ -759,7 +744,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) partTransformSpec = tbl.getStorageHandler().getPartitionTransformSpec(tbl); } } - colType = ast.getChildCount() == 2 ? statsCols.getColumnTypes() : getColumnTypesByName(tbl, colNames); + colType = ast.getChildCount() == 2 ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); isTableLevel = !isPartitionStats; rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partTransformSpec, -1, @@ -783,8 +768,8 @@ static AnalyzeRewriteContext genAnalyzeRewriteContext(HiveConf conf, Table tbl) analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(!(conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned())); StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); - analyzeRewrite.setColName(statsCols.getColumnNames()); - analyzeRewrite.setColType(statsCols.getColumnTypes()); + analyzeRewrite.setColName(statsCols.columnNames()); + analyzeRewrite.setColType(statsCols.columnTypes()); return analyzeRewrite; } From 7941487e880b47b4407fea8bc25cbde6a1204f57 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Mon, 27 Apr 2026 23:34:46 +0530 Subject: [PATCH 05/16] Fix sonarqube issue - 2 --- .../hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 428e16090539..14c88d5763f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -665,7 +665,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { partTransformSpecs = tbl.getStorageHandler().getPartitionTransformSpecs(tbl); } } - colType = ast.getChildCount() == 2 ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); + colType = genRewrittenColumnTypes(ast, statsCols); isTableLevel = !isPartitionStats; rewrittenQuery = String.join(" union all ", @@ -744,7 +744,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) partTransformSpec = tbl.getStorageHandler().getPartitionTransformSpec(tbl); } } - colType = ast.getChildCount() == 2 ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); + colType = genRewrittenColumnTypes(ast, statsCols); isTableLevel = !isPartitionStats; rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partTransformSpec, -1, @@ -773,6 +773,10 @@ static AnalyzeRewriteContext genAnalyzeRewriteContext(HiveConf conf, Table tbl) return analyzeRewrite; } + private List genRewrittenColumnTypes(ASTNode ast, StatsEligibleColumns statsCols) { + return (ast.getChildCount() == 2) ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); + } + @Override public void setQueryType(ASTNode tree) { queryProperties.setQueryType(QueryProperties.QueryType.STATS); From c903dbbb0f26e1430c16f90c5277f9d303668d1c Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Wed, 29 Apr 2026 00:41:31 +0530 Subject: [PATCH 06/16] Refactor code to address review comments --- .../apache/hadoop/hive/ql/exec/Utilities.java | 8 ++ .../ql/parse/ColumnStatsSemanticAnalyzer.java | 110 ++++++++---------- 2 files changed, 58 insertions(+), 60 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 37e91652fb88..be5ebaf70e36 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2286,6 +2286,14 @@ public static List getColumnNamesFromFieldSchema(List partC return names; } + public static List getColumnTypesFromFieldSchema(List fieldSchemas) { + List types = new ArrayList(); + for (FieldSchema fs : fieldSchemas) { + types.add(fs.getType()); + } + return types; + } + public static List getInternalColumnNamesFromSignature(List colInfos) { List names = new ArrayList(); for (ColumnInfo ci : colInfos) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 14c88d5763f7..c9dfadb6ae75 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -103,31 +103,26 @@ private boolean shouldRewrite(ASTNode tree) { return rwt; } - private record StatsEligibleColumns(List columnNames, List columnTypes) { - } - /** - * Get the names and types of the columns that support column statistics. + * Get the Field Schemas of the columns that support column statistics. */ - private static StatsEligibleColumns getStatsEligibleColumns(Table tbl) { - List colNames = new ArrayList<>(); - List colTypes = new ArrayList<>(); + private static List getStatsEligibleFieldSchemas(Table tbl) { + List result = new ArrayList<>(); for (FieldSchema col : tbl.getCols()) { String type = col.getType(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); if (isSupported) { - colNames.add(col.getName()); - colTypes.add(col.getType()); + result.add(col); } } - return new StatsEligibleColumns(colNames, colTypes); + return result; } - private List getColumnName(ASTNode tree) throws SemanticException { + private List getExplicitColumnNamesFromAst(ASTNode tree) throws SemanticException { if (tree.getChildCount() != 3) { - throw new SemanticException("Internal error. Expected number of children of ASTNode to be" - + " either 2 or 3. Found : " + tree.getChildCount()); + throw new SemanticException("Internal error. Expected number of children of ASTNode should be 3. Found : " + + tree.getChildCount()); } int numCols = tree.getChild(2).getChildCount(); List colName = new ArrayList<>(numCols); @@ -218,33 +213,27 @@ private static String getColTypeOf(Table tbl, String partKey) { throw new RuntimeException("Unknown partition key : " + partKey); } - protected static List getColumnTypesByName(Table tbl, List colNames) { - List colTypes = new ArrayList<>(); + protected static List getFieldSchemasByColName(Table tbl, List colNames) { List cols = tbl.getCols(); - Map colTypeMap = new HashMap<>(); - + Map colFsMap = new HashMap<>(); for (FieldSchema col : cols) { - colTypeMap.put(col.getName().toLowerCase(), col.getType()); + colFsMap.put(col.getName().toLowerCase(), col); } - - List primColNames = new ArrayList<>(); + List result = new ArrayList<>(); for (String colName : colNames) { - String type = colTypeMap.get(colName.toLowerCase()); - if (type != null) { + FieldSchema fs = colFsMap.get(colName.toLowerCase()); + if (fs != null) { + String type = fs.getType(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); if (!isSupported) { logTypeWarning(colName, type); } else { - primColNames.add(colName); - colTypes.add(type); + result.add(fs); } } } - - colNames.clear(); - colNames.addAll(primColNames); - return colTypes; + return result; } private String genRewrittenQuery(List colNames, List colTypes, HiveConf conf, @@ -263,9 +252,10 @@ private String genRewrittenQuery(List colNames, List colTypes, H protected static String genRewrittenQuery(Table tbl, HiveConf conf, List partTransformSpec, Map partSpec, boolean isPartitionStats) { - StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); + List columnSchemas = getStatsEligibleFieldSchemas(tbl); return ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, statsCols.columnNames(), statsCols.columnTypes(), conf, partTransformSpec, -1, partSpec, + tbl, Utilities.getColumnNamesFromFieldSchema(columnSchemas), + Utilities.getColumnTypesFromFieldSchema(columnSchemas), conf, partTransformSpec, -1, partSpec, isPartitionStats, true); } @@ -640,14 +630,6 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { */ if (shouldRewrite(ast)) { tbl = AnalyzeCommandUtils.getTable(ast, this); - StatsEligibleColumns statsCols = null; - if (ast.getChildCount() == 2) { - statsCols = getStatsEligibleColumns(tbl); - colNames = statsCols.columnNames(); - } else { - colNames = getColumnName(ast); - } - // Save away the original AST originalTree = ast; boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast) || StatsUtils.isPartitionStats(tbl, conf); @@ -655,9 +637,8 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { Map> partTransformSpecs = Collections.singletonMap(-1, null); Map partSpec = (isPartitionStats) ? AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf) : null; - checkForPartitionColumns( - colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); - validateSpecifiedColumnNames(colNames); + + List columnSchemas = getColumns(ast); if (isPartitionStats) { handlePartialPartitionSpec(partSpec, null); @@ -665,7 +646,8 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { partTransformSpecs = tbl.getStorageHandler().getPartitionTransformSpecs(tbl); } } - colType = genRewrittenColumnTypes(ast, statsCols); + colNames = Utilities.getColumnNamesFromFieldSchema(columnSchemas); + colType = Utilities.getColumnTypesFromFieldSchema(columnSchemas); isTableLevel = !isPartitionStats; rewrittenQuery = String.join(" union all ", @@ -721,21 +703,13 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) tbl = AnalyzeCommandUtils.getTable(ast, this); - StatsEligibleColumns statsCols = null; - if (ast.getChildCount() == 2) { - statsCols = getStatsEligibleColumns(tbl); - colNames = statsCols.columnNames(); - } else { - colNames = getColumnName(ast); - } boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast) || StatsUtils.isPartitionStats(tbl, conf); List partTransformSpec = null; Map partSpec = null; - checkForPartitionColumns(colNames, - Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); - validateSpecifiedColumnNames(colNames); + + List columnSchemas = getColumns(ast); if (isPartitionStats) { partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf); @@ -744,7 +718,8 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) partTransformSpec = tbl.getStorageHandler().getPartitionTransformSpec(tbl); } } - colType = genRewrittenColumnTypes(ast, statsCols); + colNames = Utilities.getColumnNamesFromFieldSchema(columnSchemas); + colType = Utilities.getColumnTypesFromFieldSchema(columnSchemas); isTableLevel = !isPartitionStats; rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partTransformSpec, -1, @@ -754,6 +729,25 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) return rewrittenTree; } + protected List getColumns(ASTNode ast) throws SemanticException { + List statsEligibleFS = null; + List colNames; + if (ast.getChildCount() == 2) { + statsEligibleFS = getStatsEligibleFieldSchemas(tbl); + colNames = Utilities.getColumnNamesFromFieldSchema(statsEligibleFS); + } else{ + colNames = getExplicitColumnNamesFromAst(ast); + } + + checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); + validateSpecifiedColumnNames(colNames); + + if (statsEligibleFS != null) { + return statsEligibleFS; + } + return getFieldSchemasByColName(tbl, colNames); + } + AnalyzeRewriteContext getAnalyzeRewriteContext() { AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); @@ -767,16 +761,12 @@ static AnalyzeRewriteContext genAnalyzeRewriteContext(HiveConf conf, Table tbl) AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(!(conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned())); - StatsEligibleColumns statsCols = getStatsEligibleColumns(tbl); - analyzeRewrite.setColName(statsCols.columnNames()); - analyzeRewrite.setColType(statsCols.columnTypes()); + List columnSchemas = getStatsEligibleFieldSchemas(tbl); + analyzeRewrite.setColName(Utilities.getColumnNamesFromFieldSchema(columnSchemas)); + analyzeRewrite.setColType(Utilities.getColumnTypesFromFieldSchema(columnSchemas)); return analyzeRewrite; } - private List genRewrittenColumnTypes(ASTNode ast, StatsEligibleColumns statsCols) { - return (ast.getChildCount() == 2) ? statsCols.columnTypes() : getColumnTypesByName(tbl, colNames); - } - @Override public void setQueryType(ASTNode tree) { queryProperties.setQueryType(QueryProperties.QueryType.STATS); From cc7cee36efe95a74b464b7737672830ac277a8f7 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Wed, 29 Apr 2026 03:33:31 +0530 Subject: [PATCH 07/16] Fix SonarQube issue - 3 --- .../apache/hadoop/hive/ql/exec/Utilities.java | 2 +- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index be5ebaf70e36..7fa8f3557722 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2287,7 +2287,7 @@ public static List getColumnNamesFromFieldSchema(List partC } public static List getColumnTypesFromFieldSchema(List fieldSchemas) { - List types = new ArrayList(); + List types = new ArrayList<>(); for (FieldSchema fs : fieldSchemas) { types.add(fs.getType()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index c9dfadb6ae75..d999712fe4a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -638,7 +638,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { Map partSpec = (isPartitionStats) ? AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf) : null; - List columnSchemas = getColumns(ast); + List columnSchemas = getColumnsFromAst(ast); if (isPartitionStats) { handlePartialPartitionSpec(partSpec, null); @@ -709,7 +709,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) List partTransformSpec = null; Map partSpec = null; - List columnSchemas = getColumns(ast); + List columnSchemas = getColumnsFromAst(ast); if (isPartitionStats) { partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf); @@ -729,23 +729,23 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) return rewrittenTree; } - protected List getColumns(ASTNode ast) throws SemanticException { + protected List getColumnsFromAst(ASTNode ast) throws SemanticException { List statsEligibleFS = null; - List colNames; + List columnNames; if (ast.getChildCount() == 2) { statsEligibleFS = getStatsEligibleFieldSchemas(tbl); - colNames = Utilities.getColumnNamesFromFieldSchema(statsEligibleFS); + columnNames = Utilities.getColumnNamesFromFieldSchema(statsEligibleFS); } else{ - colNames = getExplicitColumnNamesFromAst(ast); + columnNames = getExplicitColumnNamesFromAst(ast); } - checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); - validateSpecifiedColumnNames(colNames); + checkForPartitionColumns(columnNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); + validateSpecifiedColumnNames(columnNames); if (statsEligibleFS != null) { return statsEligibleFS; } - return getFieldSchemasByColName(tbl, colNames); + return getFieldSchemasByColName(tbl, columnNames); } AnalyzeRewriteContext getAnalyzeRewriteContext() { From 06479a40d2e868ec08be57d26049baa9c2a12657 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Wed, 29 Apr 2026 12:13:00 +0530 Subject: [PATCH 08/16] Fix for column names being lowercased --- .../hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index d999712fe4a4..6fde2b5980b9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -229,7 +229,7 @@ protected static List getFieldSchemasByColName(Table tbl, List Date: Mon, 11 May 2026 17:02:59 +0530 Subject: [PATCH 09/16] Fix formatting issues --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java | 6 +----- .../hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java | 7 ++----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 7fa8f3557722..a29e532b113b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2287,11 +2287,7 @@ public static List getColumnNamesFromFieldSchema(List partC } public static List getColumnTypesFromFieldSchema(List fieldSchemas) { - List types = new ArrayList<>(); - for (FieldSchema fs : fieldSchemas) { - types.add(fs.getType()); - } - return types; + return fieldSchemas.stream().map(FieldSchema::getType).toList(); } public static List getInternalColumnNamesFromSignature(List colInfos) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 6fde2b5980b9..a7b3d1594dc6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -735,17 +735,14 @@ protected List getColumnsFromAst(ASTNode ast) throws SemanticExcept if (ast.getChildCount() == 2) { statsEligibleFS = getStatsEligibleFieldSchemas(tbl); columnNames = Utilities.getColumnNamesFromFieldSchema(statsEligibleFS); - } else{ + } else { columnNames = getExplicitColumnNamesFromAst(ast); } checkForPartitionColumns(columnNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); validateSpecifiedColumnNames(columnNames); - if (statsEligibleFS != null) { - return statsEligibleFS; - } - return getFieldSchemasByColName(tbl, columnNames); + return statsEligibleFS != null ? statsEligibleFS : getFieldSchemasByColName(tbl, columnNames); } AnalyzeRewriteContext getAnalyzeRewriteContext() { From a269464e8e49570a18125abddfc12c3102f933ca Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Mon, 11 May 2026 17:08:43 +0530 Subject: [PATCH 10/16] Update genRewrittenQuery to use ColumnSchemas --- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index a7b3d1594dc6..a195be982bba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -236,10 +236,10 @@ protected static List getFieldSchemasByColName(Table tbl, List colNames, List colTypes, HiveConf conf, + private String genRewrittenQuery(List columnSchemas, HiveConf conf, List partTransformSpec, int specId, Map partSpec, boolean isPartitionStats) { - String rewritten = genRewrittenQuery(tbl, colNames, colTypes, conf, partTransformSpec, specId, partSpec, + String rewritten = genRewrittenQuery(tbl, columnSchemas, conf, partTransformSpec, specId, partSpec, isPartitionStats, false); isRewritten = true; return rewritten; @@ -252,29 +252,27 @@ private String genRewrittenQuery(List colNames, List colTypes, H protected static String genRewrittenQuery(Table tbl, HiveConf conf, List partTransformSpec, Map partSpec, boolean isPartitionStats) { - List columnSchemas = getStatsEligibleFieldSchemas(tbl); return ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, Utilities.getColumnNamesFromFieldSchema(columnSchemas), - Utilities.getColumnTypesFromFieldSchema(columnSchemas), conf, partTransformSpec, -1, partSpec, - isPartitionStats, true); + tbl, getStatsEligibleFieldSchemas(tbl), conf, partTransformSpec, -1, partSpec, isPartitionStats, true); } - private static String genRewrittenQuery(Table tbl, List colNames, List colTypes, + private static String genRewrittenQuery(Table tbl, List columnSchemas, HiveConf conf, List partTransformSpec, int specId, Map partSpec, boolean isPartitionStats, boolean useTableValues) { StringBuilder rewrittenQueryBuilder = new StringBuilder("select "); StringBuilder columnNamesBuilder = new StringBuilder(); StringBuilder columnDummyValuesBuilder = new StringBuilder(); - for (int i = 0; i < colNames.size(); i++) { + for (int i = 0; i < columnSchemas.size(); i++) { if (i > 0) { rewrittenQueryBuilder.append(", "); columnNamesBuilder.append(", "); columnDummyValuesBuilder.append(", "); } - final String columnName = unparseIdentifier(colNames.get(i), conf); - final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colTypes.get(i)); + FieldSchema columnSchema = columnSchemas.get(i); + final String columnName = unparseIdentifier(columnSchema.getName(), conf); + final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(columnSchema.getType()); try { genComputeStats(rewrittenQueryBuilder, conf, i, columnName, typeInfo); @@ -652,7 +650,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { rewrittenQuery = String.join(" union all ", Maps.transformEntries(partTransformSpecs, (specId, partTransformSpec) -> - genRewrittenQuery(colNames, colType, conf, partTransformSpec, specId, partSpec, isPartitionStats)) + genRewrittenQuery(columnSchemas, conf, partTransformSpec, specId, partSpec, isPartitionStats)) .values()); rewrittenTree = genRewrittenTree(rewrittenQuery); @@ -722,7 +720,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) colType = Utilities.getColumnTypesFromFieldSchema(columnSchemas); isTableLevel = !isPartitionStats; - rewrittenQuery = genRewrittenQuery(colNames, colType, conf, partTransformSpec, -1, + rewrittenQuery = genRewrittenQuery(columnSchemas, conf, partTransformSpec, -1, partSpec, isPartitionStats); rewrittenTree = genRewrittenTree(rewrittenQuery); From afc818817675c4362556ee80b1e7e08ac2892b22 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Mon, 11 May 2026 17:19:49 +0530 Subject: [PATCH 11/16] Refactor analyzeRewrite to use columnSchemas --- .../hive/ql/parse/BaseSemanticAnalyzer.java | 21 +++++++++---------- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 19 ++++++----------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 986dcb7fcbbb..054563fa9aa6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -1416,8 +1416,7 @@ public String toString() { public static class AnalyzeRewriteContext { private String tableName; - private List colName; - private List colType; + private List columnSchemas; private boolean tblLvl; public String getTableName() { @@ -1428,12 +1427,16 @@ public void setTableName(String tableName) { this.tableName = tableName; } - public List getColName() { - return colName; + public List getColumnSchemas() { + return columnSchemas; + } + + public void setColumnSchemas(List columnSchemas) { + this.columnSchemas = columnSchemas; } - public void setColName(List colName) { - this.colName = colName; + public List getColName() { + return columnSchemas == null ? null : Utilities.getColumnNamesFromFieldSchema(columnSchemas); } public boolean isTblLvl() { @@ -1445,11 +1448,7 @@ public void setTblLvl(boolean isTblLvl) { } public List getColType() { - return colType; - } - - public void setColType(List colType) { - this.colType = colType; + return columnSchemas == null ? null : Utilities.getColumnTypesFromFieldSchema(columnSchemas); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index a195be982bba..3c7b69beb812 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -77,8 +77,7 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer { private boolean isRewritten; private boolean isTableLevel; - private List colNames; - private List colType; + private List rewrittenColumnSchemas; private Table tbl; public ColumnStatsSemanticAnalyzer(QueryState queryState) throws SemanticException { @@ -644,8 +643,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { partTransformSpecs = tbl.getStorageHandler().getPartitionTransformSpecs(tbl); } } - colNames = Utilities.getColumnNamesFromFieldSchema(columnSchemas); - colType = Utilities.getColumnTypesFromFieldSchema(columnSchemas); + rewrittenColumnSchemas = columnSchemas; isTableLevel = !isPartitionStats; rewrittenQuery = String.join(" union all ", @@ -669,8 +667,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(isTableLevel); - analyzeRewrite.setColName(colNames); - analyzeRewrite.setColType(colType); + analyzeRewrite.setColumnSchemas(rewrittenColumnSchemas); qbp.setAnalyzeRewrite(analyzeRewrite); origCtx.addSubContext(ctx); initCtx(ctx); @@ -716,8 +713,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) partTransformSpec = tbl.getStorageHandler().getPartitionTransformSpec(tbl); } } - colNames = Utilities.getColumnNamesFromFieldSchema(columnSchemas); - colType = Utilities.getColumnTypesFromFieldSchema(columnSchemas); + rewrittenColumnSchemas = columnSchemas; isTableLevel = !isPartitionStats; rewrittenQuery = genRewrittenQuery(columnSchemas, conf, partTransformSpec, -1, @@ -747,8 +743,7 @@ AnalyzeRewriteContext getAnalyzeRewriteContext() { AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(isTableLevel); - analyzeRewrite.setColName(colNames); - analyzeRewrite.setColType(colType); + analyzeRewrite.setColumnSchemas(rewrittenColumnSchemas); return analyzeRewrite; } @@ -756,9 +751,7 @@ static AnalyzeRewriteContext genAnalyzeRewriteContext(HiveConf conf, Table tbl) AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(!(conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned())); - List columnSchemas = getStatsEligibleFieldSchemas(tbl); - analyzeRewrite.setColName(Utilities.getColumnNamesFromFieldSchema(columnSchemas)); - analyzeRewrite.setColType(Utilities.getColumnTypesFromFieldSchema(columnSchemas)); + analyzeRewrite.setColumnSchemas(getStatsEligibleFieldSchemas(tbl)); return analyzeRewrite; } From 2c7c593fa9f83c135312ef24f0e2b1018a9da0da Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Mon, 11 May 2026 18:44:14 +0530 Subject: [PATCH 12/16] Add comment in getExplicitColumnNamesFromAst to explain the need for 3 children of ASTNode --- .../hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 3c7b69beb812..46eac238a659 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -119,6 +119,10 @@ private static List getStatsEligibleFieldSchemas(Table tbl) { } private List getExplicitColumnNamesFromAst(ASTNode tree) throws SemanticException { + // The parser stores this statement as three pieces in order: which table (or partition) to + // analyze, a flag that this is column-level stats (not scanning the whole table for table + // stats alone), then the listed column names from "FOR COLUMNS (a, b, ...)". That layout is the reason + // we expect exactly three children and read the identifiers from the last one. if (tree.getChildCount() != 3) { throw new SemanticException("Internal error. Expected number of children of ASTNode should be 3. Found : " + tree.getChildCount()); From 2b1f556c0e3373d385a7eff5b7ab27fde5b686bc Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Mon, 11 May 2026 22:37:21 +0530 Subject: [PATCH 13/16] Refactor approach to use a container FieldSchemas --- .../hive/ql/parse/BaseSemanticAnalyzer.java | 55 ++++++++++++++----- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 23 ++++---- .../hadoop/hive/ql/parse/TaskCompiler.java | 6 +- .../hadoop/hive/ql/plan/ColumnStatsDesc.java | 24 +++----- 4 files changed, 64 insertions(+), 44 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 054563fa9aa6..d56f3249cce3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse; import java.io.IOException; +import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collection; @@ -1413,10 +1414,46 @@ public String toString() { } } + /** + * Holds table column {@link FieldSchema} entries and lazily derived parallel name/type string + * lists for analyze / column-stats compilation. + */ + public static final class FieldSchemas implements Serializable { + + private static final long serialVersionUID = 1L; + + private final List schemas; + + private transient List colNames; + private transient List colTypes; + + public FieldSchemas(List schemas) { + this.schemas = schemas != null ? schemas : Collections.emptyList(); + } + + public List getSchemas() { + return schemas; + } + + public List getColName() { + if (colNames == null) { + colNames = Utilities.getColumnNamesFromFieldSchema(schemas); + } + return colNames; + } + + public List getColType() { + if (colTypes == null) { + colTypes = Utilities.getColumnTypesFromFieldSchema(schemas); + } + return colTypes; + } + } + public static class AnalyzeRewriteContext { private String tableName; - private List columnSchemas; + private FieldSchemas fieldSchemas; private boolean tblLvl; public String getTableName() { @@ -1427,16 +1464,12 @@ public void setTableName(String tableName) { this.tableName = tableName; } - public List getColumnSchemas() { - return columnSchemas; + public FieldSchemas getFieldSchemas() { + return fieldSchemas; } - public void setColumnSchemas(List columnSchemas) { - this.columnSchemas = columnSchemas; - } - - public List getColName() { - return columnSchemas == null ? null : Utilities.getColumnNamesFromFieldSchema(columnSchemas); + public void setFieldSchemas(FieldSchemas fieldSchemas) { + this.fieldSchemas = fieldSchemas; } public boolean isTblLvl() { @@ -1447,10 +1480,6 @@ public void setTblLvl(boolean isTblLvl) { this.tblLvl = isTblLvl; } - public List getColType() { - return columnSchemas == null ? null : Utilities.getColumnTypesFromFieldSchema(columnSchemas); - } - } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 46eac238a659..18a47606cbac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -77,7 +77,7 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer { private boolean isRewritten; private boolean isTableLevel; - private List rewrittenColumnSchemas; + private FieldSchemas rewrittenColumnSchemas; private Table tbl; public ColumnStatsSemanticAnalyzer(QueryState queryState) throws SemanticException { @@ -105,7 +105,7 @@ private boolean shouldRewrite(ASTNode tree) { /** * Get the Field Schemas of the columns that support column statistics. */ - private static List getStatsEligibleFieldSchemas(Table tbl) { + private static FieldSchemas getStatsEligibleFieldSchemas(Table tbl) { List result = new ArrayList<>(); for (FieldSchema col : tbl.getCols()) { String type = col.getType(); @@ -115,7 +115,7 @@ private static List getStatsEligibleFieldSchemas(Table tbl) { result.add(col); } } - return result; + return new FieldSchemas(result); } private List getExplicitColumnNamesFromAst(ASTNode tree) throws SemanticException { @@ -256,7 +256,7 @@ protected static String genRewrittenQuery(Table tbl, HiveConf conf, List partTransformSpec, Map partSpec, boolean isPartitionStats) { return ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, getStatsEligibleFieldSchemas(tbl), conf, partTransformSpec, -1, partSpec, isPartitionStats, true); + tbl, getStatsEligibleFieldSchemas(tbl).getSchemas(), conf, partTransformSpec, -1, partSpec, isPartitionStats, true); } private static String genRewrittenQuery(Table tbl, List columnSchemas, @@ -647,7 +647,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { partTransformSpecs = tbl.getStorageHandler().getPartitionTransformSpecs(tbl); } } - rewrittenColumnSchemas = columnSchemas; + rewrittenColumnSchemas = new FieldSchemas(columnSchemas); isTableLevel = !isPartitionStats; rewrittenQuery = String.join(" union all ", @@ -671,7 +671,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(isTableLevel); - analyzeRewrite.setColumnSchemas(rewrittenColumnSchemas); + analyzeRewrite.setFieldSchemas(rewrittenColumnSchemas); qbp.setAnalyzeRewrite(analyzeRewrite); origCtx.addSubContext(ctx); initCtx(ctx); @@ -717,7 +717,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) partTransformSpec = tbl.getStorageHandler().getPartitionTransformSpec(tbl); } } - rewrittenColumnSchemas = columnSchemas; + rewrittenColumnSchemas = new FieldSchemas(columnSchemas); isTableLevel = !isPartitionStats; rewrittenQuery = genRewrittenQuery(columnSchemas, conf, partTransformSpec, -1, @@ -731,8 +731,9 @@ protected List getColumnsFromAst(ASTNode ast) throws SemanticExcept List statsEligibleFS = null; List columnNames; if (ast.getChildCount() == 2) { - statsEligibleFS = getStatsEligibleFieldSchemas(tbl); - columnNames = Utilities.getColumnNamesFromFieldSchema(statsEligibleFS); + FieldSchemas eligibleFS = getStatsEligibleFieldSchemas(tbl); + statsEligibleFS = eligibleFS.getSchemas(); + columnNames = eligibleFS.getColName(); } else { columnNames = getExplicitColumnNamesFromAst(ast); } @@ -747,7 +748,7 @@ AnalyzeRewriteContext getAnalyzeRewriteContext() { AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(isTableLevel); - analyzeRewrite.setColumnSchemas(rewrittenColumnSchemas); + analyzeRewrite.setFieldSchemas(rewrittenColumnSchemas); return analyzeRewrite; } @@ -755,7 +756,7 @@ static AnalyzeRewriteContext genAnalyzeRewriteContext(HiveConf conf, Table tbl) AnalyzeRewriteContext analyzeRewrite = new AnalyzeRewriteContext(); analyzeRewrite.setTableName(tbl.getFullyQualifiedName()); analyzeRewrite.setTblLvl(!(conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned())); - analyzeRewrite.setColumnSchemas(getStatsEligibleFieldSchemas(tbl)); + analyzeRewrite.setFieldSchemas(getStatsEligibleFieldSchemas(tbl)); return analyzeRewrite; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 11dda4ef638e..d661102a104c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -68,6 +68,7 @@ import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.FieldSchemas; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; @@ -672,8 +673,7 @@ protected void genColumnStatsTask(AnalyzeRewriteContext analyzeRewrite, int outerQueryLimit, int numBitVector) throws SemanticException { FetchWork fetch; String tableName = analyzeRewrite.getTableName(); - List colName = analyzeRewrite.getColName(); - List colType = analyzeRewrite.getColType(); + FieldSchemas fieldSchemas = analyzeRewrite.getFieldSchemas(); boolean isTblLevel = analyzeRewrite.isTblLvl(); String cols = loadFileWork.get(0).getColumns(); @@ -691,7 +691,7 @@ protected void genColumnStatsTask(AnalyzeRewriteContext analyzeRewrite, fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, outerQueryLimit); ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, - colName, colType, isTblLevel, numBitVector, fetch); + fieldSchemas, isTblLevel, numBitVector, fetch); StatsTask columnStatsTask = map.get(tableName); if (columnStatsTask == null) { throw new SemanticException("Can not find " + tableName + " in genColumnStatsTask"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java index 9a90aa2633a6..b5993f395014 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java @@ -20,6 +20,7 @@ import java.io.Serializable; import java.util.List; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.FieldSchemas; import org.apache.hadoop.hive.ql.plan.Explain.Level; /** @@ -34,15 +35,12 @@ public class ColumnStatsDesc implements Serializable, Cloneable { private int numBitVector; private boolean needMerge; private String tableName; - private List colName; - private List colType; + private FieldSchemas columnSchemas; - - public ColumnStatsDesc(String tableName, List colName, - List colType, boolean isTblLevel, int numBitVector, FetchWork fWork1) { + public ColumnStatsDesc(String tableName, FieldSchemas columnSchemas, boolean isTblLevel, + int numBitVector, FetchWork fWork1) { this.tableName = tableName; - this.colName = colName; - this.colType = colType; + this.columnSchemas = columnSchemas; this.isTblLevel = isTblLevel; this.numBitVector = numBitVector; this.needMerge = this.numBitVector != 0; @@ -69,20 +67,12 @@ public void setTblLevel(boolean isTblLevel) { @Explain(displayName = "Columns") public List getColName() { - return colName; - } - - public void setColName(List colName) { - this.colName = colName; + return columnSchemas == null ? null : columnSchemas.getColName(); } @Explain(displayName = "Column Types") public List getColType() { - return colType; - } - - public void setColType(List colType) { - this.colType = colType; + return columnSchemas == null ? null : columnSchemas.getColType(); } public int getNumBitVector() { From c030988b39fdef59d0bc2bbe92a4a0215c33e9a2 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Tue, 12 May 2026 10:36:09 +0530 Subject: [PATCH 14/16] Refactor genRewrittenQuery to use FieldSchemas --- .../hadoop/hive/ql/parse/BaseSemanticAnalyzer.java | 8 ++++++++ .../hive/ql/parse/ColumnStatsSemanticAnalyzer.java | 12 ++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index d56f3249cce3..f616049d8591 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -1435,6 +1435,14 @@ public List getSchemas() { return schemas; } + public int size() { + return schemas.size(); + } + + public FieldSchema get(int index) { + return schemas.get(index); + } + public List getColName() { if (colNames == null) { colNames = Utilities.getColumnNamesFromFieldSchema(schemas); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 18a47606cbac..ff7ecd26bf0a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -239,7 +239,7 @@ protected static List getFieldSchemasByColName(Table tbl, List columnSchemas, HiveConf conf, + private String genRewrittenQuery(FieldSchemas columnSchemas, HiveConf conf, List partTransformSpec, int specId, Map partSpec, boolean isPartitionStats) { String rewritten = genRewrittenQuery(tbl, columnSchemas, conf, partTransformSpec, specId, partSpec, @@ -255,11 +255,11 @@ private String genRewrittenQuery(List columnSchemas, HiveConf conf, protected static String genRewrittenQuery(Table tbl, HiveConf conf, List partTransformSpec, Map partSpec, boolean isPartitionStats) { - return ColumnStatsSemanticAnalyzer.genRewrittenQuery( - tbl, getStatsEligibleFieldSchemas(tbl).getSchemas(), conf, partTransformSpec, -1, partSpec, isPartitionStats, true); + return ColumnStatsSemanticAnalyzer.genRewrittenQuery(tbl, getStatsEligibleFieldSchemas(tbl), conf, + partTransformSpec, -1, partSpec, isPartitionStats, true); } - private static String genRewrittenQuery(Table tbl, List columnSchemas, + private static String genRewrittenQuery(Table tbl, FieldSchemas columnSchemas, HiveConf conf, List partTransformSpec, int specId, Map partSpec, boolean isPartitionStats, boolean useTableValues) { StringBuilder rewrittenQueryBuilder = new StringBuilder("select "); @@ -652,7 +652,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { rewrittenQuery = String.join(" union all ", Maps.transformEntries(partTransformSpecs, (specId, partTransformSpec) -> - genRewrittenQuery(columnSchemas, conf, partTransformSpec, specId, partSpec, isPartitionStats)) + genRewrittenQuery(rewrittenColumnSchemas, conf, partTransformSpec, specId, partSpec, isPartitionStats)) .values()); rewrittenTree = genRewrittenTree(rewrittenQuery); @@ -720,7 +720,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) rewrittenColumnSchemas = new FieldSchemas(columnSchemas); isTableLevel = !isPartitionStats; - rewrittenQuery = genRewrittenQuery(columnSchemas, conf, partTransformSpec, -1, + rewrittenQuery = genRewrittenQuery(rewrittenColumnSchemas, conf, partTransformSpec, -1, partSpec, isPartitionStats); rewrittenTree = genRewrittenTree(rewrittenQuery); From 68446bd50fcca103d8533386e936814f72f30725 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Tue, 12 May 2026 19:32:10 +0530 Subject: [PATCH 15/16] Refactor validateSpecifiedColumnNames & checkForPartitionColumns --- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index ff7ecd26bf0a..130f787df28b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -25,9 +25,11 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.stream.Collectors; import com.google.common.collect.Maps; @@ -583,27 +585,31 @@ private ASTNode genRewrittenTree(String rewrittenQuery) throws SemanticException } } - // fail early if the columns specified for column statistics are not valid - private void validateSpecifiedColumnNames(List specifiedCols) - throws SemanticException { - List tableCols = Utilities.getColumnNamesFromFieldSchema(tbl.getCols()); + private void validateSpecifiedColumnNames(List specifiedCols) throws SemanticException { + FieldSchemas tableCols = new FieldSchemas(tbl.getCols()); + Set tableColNamesLc = new HashSet<>(); + for (FieldSchema fs : tableCols.getSchemas()) { + tableColNamesLc.add(fs.getName().toLowerCase()); + } + List tableColNames = tableCols.getColName(); for (String sc : specifiedCols) { - if (!tableCols.contains(sc.toLowerCase())) { - String msg = "'" + sc + "' (possible columns are " + tableCols + ")"; + if (!tableColNamesLc.contains(sc.toLowerCase())) { + String msg = "'" + sc + "' (possible columns are " + tableColNames + ")"; throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(msg)); } } } - private void checkForPartitionColumns(List specifiedCols, List partCols) - throws SemanticException { - // Raise error if user has specified partition column for stats - for (String pc : partCols) { - for (String sc : specifiedCols) { - if (pc.equalsIgnoreCase(sc)) { - throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_COLUMN.getMsg() - + " [Try removing column '" + sc + "' from column list]"); - } + private void checkForPartitionColumns(List specifiedCols) throws SemanticException { + Map specifiedColsMap = new HashMap<>(); + for (String sc : specifiedCols) { + specifiedColsMap.put(sc.toLowerCase(), sc); + } + for (FieldSchema pk : tbl.getPartitionKeys()) { + String specifiedCol = specifiedColsMap.get(pk.getName().toLowerCase()); + if (specifiedCol != null) { + throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_COLUMN.getMsg() + + " [Try removing column '" + specifiedCol + "' from column list]"); } } } @@ -738,7 +744,7 @@ protected List getColumnsFromAst(ASTNode ast) throws SemanticExcept columnNames = getExplicitColumnNamesFromAst(ast); } - checkForPartitionColumns(columnNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); + checkForPartitionColumns(columnNames); validateSpecifiedColumnNames(columnNames); return statsEligibleFS != null ? statsEligibleFS : getFieldSchemasByColName(tbl, columnNames); From 37d1c2267a120a76d37e214c5e968ee2d2ef859e Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Fri, 15 May 2026 01:37:17 +0530 Subject: [PATCH 16/16] Merge checkForPartitionColumns & validateSpecifiedColumnNames in getFieldSchemasByColName --- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 83 ++++++++----------- 1 file changed, 34 insertions(+), 49 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 130f787df28b..e23e54aa5230 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -25,11 +25,9 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Set; import java.util.stream.Collectors; import com.google.common.collect.Maps; @@ -218,24 +216,43 @@ private static String getColTypeOf(Table tbl, String partKey) { throw new RuntimeException("Unknown partition key : " + partKey); } - protected static List getFieldSchemasByColName(Table tbl, List colNames) { - List cols = tbl.getCols(); - Map colFsMap = new HashMap<>(); - for (FieldSchema col : cols) { - colFsMap.put(col.getName().toLowerCase(), col); + protected static List getFieldSchemasByColName(Table tbl, List colNames) + throws SemanticException { + Map specifiedColsMap = new HashMap<>(); + for (String colName : colNames) { + specifiedColsMap.put(colName.toLowerCase(), new FieldSchema(colName, null, null)); + } + + for (FieldSchema pk : tbl.getPartitionKeys()) { + FieldSchema fs = specifiedColsMap.get(pk.getName().toLowerCase()); + if (fs != null) { + throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_COLUMN.getMsg() + + " [Try removing column '" + fs.getName() + "' from column list]"); + } + } + + for (FieldSchema col : tbl.getCols()) { + specifiedColsMap.computeIfPresent(col.getName().toLowerCase(), (key, value) -> col); } + List result = new ArrayList<>(); + List tableColNames = new FieldSchemas(tbl.getCols()).getColName(); for (String colName : colNames) { - FieldSchema fs = colFsMap.get(colName.toLowerCase()); - if (fs != null) { - String type = fs.getType(); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); - boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); - if (!isSupported) { - logTypeWarning(colName, type); - } else { - result.add(new FieldSchema(colName, type, fs.getComment())); - } + FieldSchema fs = specifiedColsMap.get(colName.toLowerCase()); + + // If the type is null, the column does not exist as its FieldSchema was not populated from tbl.getCols() + if (fs.getType() == null) { + String msg = "'" + colName + "' (possible columns are " + tableColNames + ")"; + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(msg)); + } + + String type = fs.getType(); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); + boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); + if (!isSupported) { + logTypeWarning(colName, type); + } else { + result.add(new FieldSchema(colName, type, fs.getComment())); } } return result; @@ -585,35 +602,6 @@ private ASTNode genRewrittenTree(String rewrittenQuery) throws SemanticException } } - private void validateSpecifiedColumnNames(List specifiedCols) throws SemanticException { - FieldSchemas tableCols = new FieldSchemas(tbl.getCols()); - Set tableColNamesLc = new HashSet<>(); - for (FieldSchema fs : tableCols.getSchemas()) { - tableColNamesLc.add(fs.getName().toLowerCase()); - } - List tableColNames = tableCols.getColName(); - for (String sc : specifiedCols) { - if (!tableColNamesLc.contains(sc.toLowerCase())) { - String msg = "'" + sc + "' (possible columns are " + tableColNames + ")"; - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(msg)); - } - } - } - - private void checkForPartitionColumns(List specifiedCols) throws SemanticException { - Map specifiedColsMap = new HashMap<>(); - for (String sc : specifiedCols) { - specifiedColsMap.put(sc.toLowerCase(), sc); - } - for (FieldSchema pk : tbl.getPartitionKeys()) { - String specifiedCol = specifiedColsMap.get(pk.getName().toLowerCase()); - if (specifiedCol != null) { - throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_COLUMN.getMsg() - + " [Try removing column '" + specifiedCol + "' from column list]"); - } - } - } - private static void logTypeWarning(String colName, String colType) { String warning = "Only primitive type arguments are accepted but " + colType + " is passed for " + colName + "."; @@ -744,9 +732,6 @@ protected List getColumnsFromAst(ASTNode ast) throws SemanticExcept columnNames = getExplicitColumnNamesFromAst(ast); } - checkForPartitionColumns(columnNames); - validateSpecifiedColumnNames(columnNames); - return statsEligibleFS != null ? statsEligibleFS : getFieldSchemasByColName(tbl, columnNames); }