diff --git a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_insert_overwrite_partition_transforms.q b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_insert_overwrite_partition_transforms.q index 3c968a163ac6..e838c7bd5693 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_insert_overwrite_partition_transforms.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_insert_overwrite_partition_transforms.q @@ -19,30 +19,6 @@ --! qt:replace:/(\S\"iceberg-version\\\":\\\")(\w+\s\w+\s\d+\.\d+\.\d+\s\(\w+\s\w+\))(\\\")/$1#Masked#$3/ set hive.explain.user=false; -create external table ice_parquet_date_transform_year( - bigintcol bigint, - intcol integer, - pcol date -) partitioned by spec (year(pcol)) -stored by iceberg; - -explain insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-05') values (1234567890123345, 2), (23456789012345678, 4); -insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-05') values (1234567890123345, 2), (23456789012345678, 4); -explain insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-26') values (1234567890123345, 3), (23456789012345678, 5); -insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-26') values (1234567890123345, 3), (23456789012345678, 5); -explain insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-12') values (3456789012345678, 4), (34567890123456789, 6); -insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-12') values (3456789012345678, 4), (34567890123456789, 6); - -select * from ice_parquet_date_transform_year; - -explain insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-13') select bigintcol, intcol from ice_parquet_date_transform_year; -insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-13') select bigintcol, intcol from ice_parquet_date_transform_year; -explain insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-02') select 234675894076895090, intcol from ice_parquet_date_transform_year; -insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-02') select 234675894076895090, intcol from ice_parquet_date_transform_year; - -describe formatted ice_parquet_date_transform_year; -select * from ice_parquet_date_transform_year; - create external table ice_parquet_date_transform_month( bigintcol bigint, pcol date, @@ -50,7 +26,6 @@ create external table ice_parquet_date_transform_month( ) partitioned by spec (month(pcol)) stored by iceberg; -explain insert overwrite table ice_parquet_date_transform_month partition (pcol = '1999-12-31') values (1234567890123345, 2), (23456789012345678, 4); insert overwrite table ice_parquet_date_transform_month partition (pcol = '1999-12-31') values (1234567890123345, 2), (23456789012345678, 4); explain insert overwrite table ice_parquet_date_transform_month partition (pcol = '1999-12-26') values (1234567890123345, 3), (23456789012345678, 5); insert overwrite table ice_parquet_date_transform_month partition (pcol = '1999-12-26') values (1234567890123345, 3), (23456789012345678, 5); diff --git a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out index fb1cdbcaf12a..b19a88b6eb90 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out @@ -109,13 +109,6 @@ POSTHOOK: query: DESCRIBE FORMATTED ice_t_transform POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_t_transform # col_name data_type comment -year_field date -month_field date -day_field date -hour_field timestamp -truncate_field string -bucket_field int -identity_field int # Partition Information # col_name data_type comment @@ -182,13 +175,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_t_transform_prop # col_name data_type comment id int -year_field date -month_field date -day_field date -hour_field timestamp -truncate_field string -bucket_field int -identity_field int # Partition Information # col_name data_type comment @@ -255,7 +241,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_t_identity_part # col_name data_type comment a int -b string # Partition Information # col_name data_type comment diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index e74885e57a3d..67e59155b67c 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -31,7 +31,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; @@ -232,7 +231,7 @@ static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveExcept if (paths.hasNext()) { PartitionDesc partDesc = mapWork.getPathToPartitionInfo().get(paths.next()); if (partDesc != null) { - LinkedHashMap partSpec = partDesc.getPartSpec(); + Map partSpec = partDesc.getPartSpec(); if (partSpec != null && !partSpec.isEmpty()) { partitionColumnCount = partSpec.size(); } diff --git a/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java b/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java index 120949fc9949..f4a008257bb8 100644 --- a/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java +++ b/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java @@ -158,13 +158,13 @@ public static final class Request { // Holds a hierarchical structure of DBs, tables and partitions such as: // { testdb : { testtab0 : [], testtab1 : [ {pk0 : p0v0, pk1 : p0v1}, {pk0 : p1v0, pk1 : p1v1} ] }, testdb2 : {} } - private final Map>>> entities; + private final Map>>> entities; - private Request(Map>>> entities) { + private Request(Map>>> entities) { this.entities = entities; } - public Map>>> getEntities() { + public Map>>> getEntities() { return entities; } @@ -191,21 +191,21 @@ public List toProtoRequests() List protoRequests = new LinkedList<>(); - for (Map.Entry>>> dbEntry : entities.entrySet()) { + for (Map.Entry>>> dbEntry : entities.entrySet()) { String dbName = dbEntry.getKey(); - Map>> tables = dbEntry.getValue(); + Map>> tables = dbEntry.getValue(); LlapDaemonProtocolProtos.EvictEntityRequestProto.Builder requestBuilder = LlapDaemonProtocolProtos.EvictEntityRequestProto.newBuilder(); LlapDaemonProtocolProtos.TableProto.Builder tableBuilder = null; requestBuilder.setDbName(dbName.toLowerCase()); - for (Map.Entry>> tableEntry : tables.entrySet()) { + for (Map.Entry>> tableEntry : tables.entrySet()) { String tableName = tableEntry.getKey(); tableBuilder = LlapDaemonProtocolProtos.TableProto.newBuilder(); tableBuilder.setTableName(tableName.toLowerCase()); - Set> partitions = tableEntry.getValue(); + Set> partitions = tableEntry.getValue(); Set partitionKeys = null; for (Map partitionSpec : partitions) { @@ -245,7 +245,7 @@ public boolean isTagMatch(CacheTag cacheTag) { return false; } - Map>> tables = entities.get(db); + Map>> tables = entities.get(db); // If true, must be a drop DB event and this cacheTag matches. if (tables.isEmpty()) { @@ -261,7 +261,7 @@ public boolean isTagMatch(CacheTag cacheTag) { for (String tableAndDbName : tables.keySet()) { if (tableAndDbName.equals(tagTableName.getNotEmptyDbTable())) { - Set> partDescs = tables.get(tableAndDbName); + Set> partDescs = tables.get(tableAndDbName); // If true, must be a drop table event, and this cacheTag matches. if (partDescs == null) { @@ -292,7 +292,7 @@ public String toString() { */ public static final class Builder { - private final Map>>> entities; + private final Map>>> entities; private Builder() { this.entities = new HashMap<>(); @@ -302,7 +302,7 @@ public static Builder create() { return new Builder(); } - public Builder addPartitionOfATable(String db, String tableName, LinkedHashMap partSpec) { + public Builder addPartitionOfATable(String db, String tableName, Map partSpec) { ensureDb(db); ensureTable(db, tableName); entities.get(db).get(tableName).add(partSpec); @@ -325,7 +325,7 @@ public Request build() { } private void ensureDb(String dbName) { - Map>> tables = entities.get(dbName); + Map>> tables = entities.get(dbName); if (tables == null) { tables = new HashMap<>(); entities.put(dbName, tables); @@ -334,9 +334,9 @@ private void ensureDb(String dbName) { private void ensureTable(String dbName, String tableName) { ensureDb(dbName); - Map>> tables = entities.get(dbName); + Map>> tables = entities.get(dbName); - Set> partitions = tables.get(tableName); + Set> partitions = tables.get(tableName); if (partitions == null) { partitions = new HashSet<>(); tables.put(tableName, partitions); @@ -352,7 +352,7 @@ public Builder fromProtoRequest(LlapDaemonProtocolProtos.EvictEntityRequestProto entities.clear(); String dbName = protoRequest.getDbName().toLowerCase(); - Map>> entitiesInDb = new HashMap<>(); + Map>> entitiesInDb = new HashMap<>(); List tables = protoRequest.getTableList(); if (tables != null && !tables.isEmpty()) { @@ -364,7 +364,7 @@ public Builder fromProtoRequest(LlapDaemonProtocolProtos.EvictEntityRequestProto entitiesInDb.put(dbAndTableName, null); continue; } - Set> partitions = new HashSet<>(); + Set> partitions = new HashSet<>(); LinkedHashMap partDesc = new LinkedHashMap<>(); for (int valIx = 0; valIx < table.getPartValCount(); ++valIx) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java index 289479b7ee79..87d115df72ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java @@ -66,10 +66,7 @@ private List getColumnsByPattern() throws HiveException { private List getCols() throws HiveException { Table table = context.getDb().getTable(desc.getTableName()); - List allColumns = new ArrayList<>(); - allColumns.addAll(table.getCols()); - allColumns.addAll(table.getPartCols()); - return allColumns; + return new ArrayList<>(table.getAllCols()); } private Matcher getMatcher() { @@ -94,13 +91,7 @@ private List filterColumns(List columns, Matcher match } if (desc.isSorted()) { - result.sort( - new Comparator() { - @Override - public int compare(FieldSchema f1, FieldSchema f2) { - return f1.getName().compareTo(f2.getName()); - } - }); + result.sort(Comparator.comparing(FieldSchema::getName)); } return result; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java index 75f39291cd07..dc6c3680ca1c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java @@ -59,7 +59,6 @@ import java.io.DataOutputStream; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; @@ -70,7 +69,6 @@ import java.util.Set; import java.util.TreeMap; import java.util.Map.Entry; -import java.util.stream.Collectors; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS; import static org.apache.hadoop.hive.ql.ddl.ShowUtils.ALIGNMENT; @@ -171,13 +169,7 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column boolean isFormatted, boolean isOutputPadded) throws IOException { String partitionData = ""; if (columnPath == null) { - List partitionColumns = null; - // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation - if (table.isPartitioned()) { - partitionColumns = table.hasNonNativePartitionSupport() ? - table.getStorageHandler().getPartitionKeys(table) : - table.getPartCols(); - } + List partitionColumns = table.isPartitioned() ? table.getPartCols() : null; if (CollectionUtils.isNotEmpty(partitionColumns) && conf.getBoolVar(ConfVars.HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY)) { TextMetaDataTable metaDataTable = new TextMetaDataTable(); @@ -204,13 +196,9 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column } private void addFormattedTableData(DataOutputStream out, Table table, Partition partition, boolean isOutputPadded) - throws IOException, UnsupportedEncodingException { - String formattedTableInfo = null; - if (partition != null) { - formattedTableInfo = getPartitionInformation(table, partition); - } else { - formattedTableInfo = getTableInformation(table, isOutputPadded); - } + throws IOException { + String formattedTableInfo = (partition != null) ? getPartitionInformation(table, partition) : + getTableInformation(table, isOutputPadded); if (table.getTableConstraintsInfo().isTableConstraintsInfoNotEmpty()) { formattedTableInfo += getConstraintsInformation(table); @@ -337,7 +325,7 @@ private void getStorageDescriptorInfo(StringBuilder tableInfo, Table table, Stor List skewedCoumnNames = storageDesc.getSkewedInfo().getSkewedColNames().stream() .sorted() - .collect(Collectors.toList()); + .toList(); formatOutput("Skewed Columns:", skewedCoumnNames.toString(), tableInfo); } @@ -345,16 +333,16 @@ private void getStorageDescriptorInfo(StringBuilder tableInfo, Table table, Stor List> skewedColumnValues = storageDesc.getSkewedInfo().getSkewedColValues().stream() .sorted(new VectorComparator()) - .collect(Collectors.toList()); + .toList(); formatOutput("Skewed Values:", skewedColumnValues.toString(), tableInfo); } - Map, String> skewedColMap = new TreeMap<>(new VectorComparator()); + Map, String> skewedColMap = new TreeMap<>(new VectorComparator<>()); skewedColMap.putAll(storageDesc.getSkewedInfo().getSkewedColValueLocationMaps()); if (MapUtils.isNotEmpty(skewedColMap)) { formatOutput("Skewed Value to Path:", skewedColMap.toString(), tableInfo); Map, String> truncatedSkewedColMap = - new TreeMap, String>(new VectorComparator()); + new TreeMap<>(new VectorComparator<>()); // walk through existing map to truncate path so that test won't mask it then we can verify location is right Set, String>> entries = skewedColMap.entrySet(); for (Entry, String> entry : entries) { @@ -403,7 +391,7 @@ private void getPartitionMetaDataInformation(StringBuilder tableInfo, Partition } } - private class VectorComparator> implements Comparator>{ + private static final class VectorComparator> implements Comparator> { @Override public int compare(List listA, List listB) { for (int i = 0; i < listA.size() && i < listB.size(); i++) { @@ -438,7 +426,7 @@ private void displayAllParameters(Map params, StringBuilder tabl private void displayAllParameters(Map params, StringBuilder tableInfo, boolean escapeUnicode, boolean isOutputPadded) { - List keys = new ArrayList(params.keySet()); + List keys = new ArrayList<>(params.keySet()); Collections.sort(keys); for (String key : keys) { String value = params.get(key); @@ -626,7 +614,7 @@ private void addExtendedTableData(DataOutputStream out, Table table, Partition p } private void addExtendedConstraintData(DataOutputStream out, Table table) - throws IOException, UnsupportedEncodingException { + throws IOException { if (table.getTableConstraintsInfo().isTableConstraintsInfoNotEmpty()) { out.write(("Constraints").getBytes(StandardCharsets.UTF_8)); out.write(Utilities.tabCode); @@ -658,7 +646,7 @@ private void addExtendedConstraintData(DataOutputStream out, Table table) } private void addExtendedStorageData(DataOutputStream out, Table table) - throws IOException, UnsupportedEncodingException { + throws IOException { if (table.getStorageHandlerInfo() != null) { out.write(("StorageHandlerInfo").getBytes(StandardCharsets.UTF_8)); out.write(Utilities.newLineCode); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java index db7a5dfcd3d0..5882e4616506 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java @@ -23,7 +23,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Map.Entry; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java index ebe8f2f52775..e333ed85f439 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java @@ -24,7 +24,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -254,7 +253,7 @@ public static String getPartialName(Partition p, int level) throws HiveException * @throws HiveException */ public static String conflictingArchiveNameOrNull(Hive db, Table tbl, - LinkedHashMap partSpec) + Map partSpec) throws HiveException { List partKeys = tbl.getPartitionKeys(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java index a5bc66733f46..9196a3441200 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java @@ -38,7 +38,6 @@ import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index a0906cfb0339..38c4dfb036b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.Arrays; -import java.util.LinkedHashMap; import java.util.Map; import java.util.stream.IntStream; @@ -287,7 +286,7 @@ public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, PartitionDesc partDesc, Object[] partitionValues) { - LinkedHashMap partSpec = partDesc.getPartSpec(); + Map partSpec = partDesc.getPartSpec(); for (int i = 0; i < vrbCtx.partitionColumnCount; i++) { Object objectValue; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java index c188eb09fdcf..9f871d05feb3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java @@ -90,10 +90,7 @@ public List getValues() { Table table = this.getTable(); values = new ArrayList<>(); - // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation - for (FieldSchema fs : table.hasNonNativePartitionSupport() - ? table.getStorageHandler().getPartitionKeys(table) - : table.getPartCols()) { + for (FieldSchema fs : table.getPartCols()) { String val = partSpec.get(fs.getName()); values.add(val); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 0cf02a95e392..ee55fde100f6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -20,7 +20,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -430,10 +429,10 @@ private static RelNode createMaterializedViewScan(HiveConf conf, Table viewTable // 1.2 Add column info corresponding to partition columns ArrayList partitionColumns = new ArrayList(); - for (FieldSchema part_col : viewTable.getPartCols()) { - colName = part_col.getName(); + for (FieldSchema partCol : viewTable.getPartCols()) { + colName = partCol.getName(); colInfo = new ColumnInfo(colName, - TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), null, true); + TypeInfoFactory.getPrimitiveTypeInfo(partCol.getType()), null, true); rr.put(null, colName, colInfo); cInfoLst.add(colInfo); partitionColumns.add(colInfo); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index 736e6e8c9f1a..4715775d3b4c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -23,7 +23,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -124,7 +123,7 @@ public Partition(Table tbl, Map partSpec, Path location) throws public static org.apache.hadoop.hive.metastore.api.Partition createMetaPartitionObject( Table tbl, Map partSpec, Path location) throws HiveException { - List pvals = new ArrayList(); + List pvals = new ArrayList<>(); for (FieldSchema field : tbl.getPartCols()) { String val = partSpec.get(field.getName()); if (val == null || val.isEmpty()) { @@ -173,7 +172,8 @@ protected void initialize(Table table, // set default if location is not set and this is a physical // table partition (not a view partition) if (table.getDataLocation() != null) { - Path partPath = new Path(table.getDataLocation(), Warehouse.makePartName(table.getPartCols(), tPartition.getValues())); + Path partPath = new Path(table.getDataLocation(), + Warehouse.makePartName(table.getPartCols(), tPartition.getValues())); tPartition.getSd().setLocation(partPath.toString()); } } @@ -416,7 +416,7 @@ public Path[] getPath(Sample s) throws HiveException { } int scount = s.getSampleFraction(); - ArrayList ret = new ArrayList(); + List ret = new ArrayList<>(); if (bcount == scount) { ret.add(getBucketPath(s.getSampleNum() - 1)); @@ -428,7 +428,7 @@ public Path[] getPath(Sample s) throws HiveException { } // undersampling a bucket ret.add(getBucketPath((s.getSampleNum() - 1) % bcount)); - } else if (bcount > scount) { + } else { if ((bcount / scount) * scount != bcount) { throw new HiveException("Sample Count" + scount + " is not a divisor of bucket count " + bcount + " for table " @@ -439,11 +439,11 @@ public Path[] getPath(Sample s) throws HiveException { ret.add(getBucketPath(i * scount + (s.getSampleNum() - 1))); } } - return (ret.toArray(new Path[ret.size()])); + return (ret.toArray(new Path[0])); } } - public LinkedHashMap getSpec() { + public Map getSpec() { return table.createSpec(tPartition); } @@ -542,7 +542,7 @@ public void setLocation(String location) { */ public void setValues(Map partSpec) throws HiveException { - List pvals = new ArrayList(); + List pvals = new ArrayList<>(); for (FieldSchema field : table.getPartCols()) { String val = partSpec.get(field.getName()); if (val == null) { @@ -582,12 +582,11 @@ public List getSkewedColNames() { return tPartition.getSd().getSkewedInfo().getSkewedColNames(); } - public void setSkewedValueLocationMap(List valList, String dirName) - throws HiveException { + public void setSkewedValueLocationMap(List valList, String dirName) { Map, String> mappings = tPartition.getSd().getSkewedInfo() .getSkewedColValueLocationMaps(); if (null == mappings) { - mappings = new HashMap, String>(); + mappings = new HashMap<>(); tPartition.getSd().getSkewedInfo().setSkewedColValueLocationMaps(mappings); } @@ -612,8 +611,7 @@ public int hashCode() { @Override public boolean equals(Object obj) { - if (obj instanceof Partition) { - Partition o = (Partition) obj; + if (obj instanceof Partition o) { return Objects.equals(tPartition, o.tPartition); } return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index f857e7d505f1..d4f93cdb037f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -19,22 +19,15 @@ package org.apache.hadoop.hive.ql.metadata; import java.io.IOException; +import java.io.Serial; import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Properties; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; + import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -47,6 +40,7 @@ import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils; import org.apache.hadoop.hive.metastore.api.SourceTable; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -100,6 +94,7 @@ */ public class Table implements Serializable { + @Serial private static final long serialVersionUID = 1L; static final private Logger LOG = LoggerFactory.getLogger("hive.ql.metadata.Table"); @@ -109,6 +104,10 @@ public class Table implements Serializable { /** * These fields are all cached fields. The information comes from tTable. */ + private List tablePartCols; + private List tableNonPartCols; + private List tableAllCols; + private Map> inputColumnIndexByName; private transient Deserializer deserializer; private Class outputFormatClass; private Class inputFormatClass; @@ -193,6 +192,9 @@ public Table makeCopy() { newTab.setMetaTable(this.getMetaTable()); newTab.setSnapshotRef(this.getSnapshotRef()); + if (this.tablePartCols != null) { + newTab.tablePartCols = new ArrayList<>(this.tablePartCols); + } return newTab; } @@ -225,11 +227,11 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { { sd.setSerdeInfo(new SerDeInfo()); sd.setNumBuckets(-1); - sd.setBucketCols(new ArrayList()); - sd.setCols(new ArrayList()); - sd.setParameters(new HashMap()); - sd.setSortCols(new ArrayList()); - sd.getSerdeInfo().setParameters(new HashMap()); + sd.setBucketCols(new ArrayList<>()); + sd.setCols(new ArrayList<>()); + sd.setParameters(new HashMap<>()); + sd.setSortCols(new ArrayList<>()); + sd.getSerdeInfo().setParameters(new HashMap<>()); // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does // not support a table with no columns. sd.getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName()); @@ -239,17 +241,17 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { sd.setInputFormat(SequenceFileInputFormat.class.getName()); sd.setOutputFormat(HiveSequenceFileOutputFormat.class.getName()); SkewedInfo skewInfo = new SkewedInfo(); - skewInfo.setSkewedColNames(new ArrayList()); - skewInfo.setSkewedColValues(new ArrayList>()); - skewInfo.setSkewedColValueLocationMaps(new HashMap, String>()); + skewInfo.setSkewedColNames(new ArrayList<>()); + skewInfo.setSkewedColValues(new ArrayList<>()); + skewInfo.setSkewedColValueLocationMaps(new HashMap<>()); sd.setSkewedInfo(skewInfo); } org.apache.hadoop.hive.metastore.api.Table t = new org.apache.hadoop.hive.metastore.api.Table(); { t.setSd(sd); - t.setPartitionKeys(new ArrayList()); - t.setParameters(new HashMap()); + t.setPartitionKeys(new ArrayList<>()); + t.setParameters(new HashMap<>()); t.setTableType(TableType.MANAGED_TABLE.toString()); t.setDbName(databaseName); t.setTableName(tableName); @@ -402,7 +404,7 @@ public void setStorageHandlerInfo(StorageHandlerInfo storageHandlerInfo) { this.storageHandlerInfo = storageHandlerInfo; } - final public Class getInputFormatClass() { + public final Class getInputFormatClass() { if (inputFormatClass == null) { try { String className = tTable.getSd().getInputFormat(); @@ -422,7 +424,7 @@ final public Class getInputFormatClass() { return inputFormatClass; } - final public Class getOutputFormatClass() { + public final Class getOutputFormatClass() { if (outputFormatClass == null) { try { String className = tTable.getSd().getOutputFormat(); @@ -456,7 +458,7 @@ public void setMaterializedTable(boolean materializedTable) { * Marker SemanticException, so that processing that allows for table validation failures * and appropriately handles them can recover from these types of SemanticExceptions */ - public class ValidationFailureSemanticException extends SemanticException{ + public static class ValidationFailureSemanticException extends SemanticException{ public ValidationFailureSemanticException(String s) { super(s); } @@ -526,9 +528,9 @@ public TableType getTableType() { return Enum.valueOf(TableType.class, tTable.getTableType()); } - public ArrayList getFields() { + public List getFields() { - ArrayList fields = new ArrayList(); + List fields = new ArrayList<>(); try { Deserializer decoder = getDeserializer(); @@ -594,7 +596,7 @@ public boolean equals(Object obj) { && Objects.equals(snapshotRef, other.snapshotRef); } - public List getPartCols() { + private List getNativePartCols() { List partKeys = tTable.getPartitionKeys(); if (partKeys == null) { partKeys = new ArrayList<>(); @@ -603,17 +605,38 @@ public List getPartCols() { return partKeys; } + /** + * Returns partition columns, consulting the storage handler for non-native tables (e.g. Iceberg) + * where partition columns are not stored in the metastore. + */ + public List getPartCols() { + if (tablePartCols != null) { + return tablePartCols; + } + if (isTableTypeSet() && hasNonNativePartitionSupport()) { + tablePartCols = getStorageHandler().getPartitionKeys(this); + } else { + tablePartCols = getNativePartCols(); + } + return tablePartCols; + } + + private boolean isTableTypeSet() { + if (tTable.getParameters() == null) { + return false; + } + String tableType = tTable.getParameters().get(HiveMetaHook.TABLE_TYPE); + return tableType != null; + } + public FieldSchema getPartColByName(String colName) { return getPartCols().stream() - .filter(key -> key.getName().toLowerCase().equals(colName)) - .findFirst().orElse(null); + .filter(key -> key.getName().toLowerCase().equals(colName)) + .findFirst().orElse(null); } public List getPartColNames() { - List partCols = hasNonNativePartitionSupport() ? - getStorageHandler().getPartitionKeys(this) : getPartCols(); - return partCols.stream().map(FieldSchema::getName) - .collect(Collectors.toList()); + return getPartCols().stream().map(FieldSchema::getName).toList(); } public boolean hasNonNativePartitionSupport() { @@ -621,7 +644,11 @@ public boolean hasNonNativePartitionSupport() { } public boolean isPartitionKey(String colName) { - return getPartColByName(colName) != null; + List partKeys = getPartitionKeys().stream().map(FieldSchema::getName).toList(); + if (partKeys.isEmpty()) { + return false; + } + return partKeys.contains(colName); } // TODO merge this with getBucketCols function @@ -671,7 +698,7 @@ public void setSkewedValueLocationMap(List valList, String dirName) { Map, String> mappings = tTable.getSd().getSkewedInfo() .getSkewedColValueLocationMaps(); if (null == mappings) { - mappings = new HashMap, String>(); + mappings = new HashMap<>(); tTable.getSd().getSkewedInfo().setSkewedColValueLocationMaps(mappings); } @@ -681,7 +708,7 @@ public void setSkewedValueLocationMap(List valList, String dirName) { public Map, String> getSkewedColValueLocationMaps() { return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColValueLocationMaps() : new HashMap, String>(); + .getSkewedColValueLocationMaps() : new HashMap<>(); } public void setSkewedColValues(List> skewedValues) { @@ -690,7 +717,7 @@ public void setSkewedColValues(List> skewedValues) { public List> getSkewedColValues(){ return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColValues() : new ArrayList>(); + .getSkewedColValues() : new ArrayList<>(); } public void setSkewedColNames(List skewedColNames) { @@ -699,7 +726,7 @@ public void setSkewedColNames(List skewedColNames) { public List getSkewedColNames() { return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColNames() : new ArrayList(); + .getSkewedColNames() : new ArrayList<>(); } public SkewedInfo getSkewedInfo() { @@ -727,8 +754,49 @@ private boolean isField(String col) { return false; } + private void fillColumnIndexByName() { + inputColumnIndexByName = new HashMap<>(); + List fsList = new ArrayList<>(getColsInternal(false)); + if (!isNonNative()) { + fsList.addAll(getNativePartCols()); + } + for (int i = 0; i < fsList.size(); i++) { + inputColumnIndexByName.put(fsList.get(i).getName(), Pair.of(i, fsList.get(i))); + } + } + + public int getColumnIndexByName(String colName) { + if (inputColumnIndexByName == null) { + fillColumnIndexByName(); + } + return inputColumnIndexByName.get(colName.toLowerCase()).getLeft(); + } + + public FieldSchema getFieldSchemaByName(String colName) { + if (inputColumnIndexByName == null) { + fillColumnIndexByName(); + } + return inputColumnIndexByName.get(colName).getRight(); + } + public List getCols() { - return getColsInternal(false); + if (tableNonPartCols != null) { + return tableNonPartCols; + } + tableNonPartCols = new ArrayList<>(); + if (!isNonNative()) { + tableNonPartCols.addAll(getColsInternal(false)); + } else { + List nonPartFields = new ArrayList<>(); + Set partFieldsName = getPartCols().stream().map(FieldSchema::getName).collect(Collectors.toSet()); + for (FieldSchema field : getColsInternal(false)) { + if (!partFieldsName.contains(field.getName())) { + nonPartFields.add(field); + } + } + tableNonPartCols = nonPartFields; + } + return tableNonPartCols; } public List getColsForMetastore() { @@ -761,10 +829,18 @@ private List getColsInternal(boolean forMs) { * @return List<FieldSchema> */ public List getAllCols() { - ArrayList f_list = new ArrayList(); - f_list.addAll(getCols()); - f_list.addAll(getPartCols()); - return f_list; + if (tableAllCols != null) { + return tableAllCols; + } + if (inputColumnIndexByName == null) { + fillColumnIndexByName(); + } + TreeMap orderedMap = new TreeMap<>(); + for (Map.Entry> e : inputColumnIndexByName.entrySet()) { + orderedMap.put(e.getValue().getLeft(), e.getValue().getRight()); + } + tableAllCols = orderedMap.values().stream().toList(); + return tableAllCols; } public void setPartCols(List partCols) { @@ -812,7 +888,7 @@ public void setOutputFormatClass(String name) throws HiveException { } public boolean isPartitioned() { - return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : + return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : CollectionUtils.isNotEmpty(getPartCols()); } @@ -1008,12 +1084,12 @@ public boolean isMaterializedView() { * Use the information from this partition. * @return Partition name to value mapping. */ - public LinkedHashMap createSpec( + public Map createSpec( org.apache.hadoop.hive.metastore.api.Partition tp) { List fsl = getPartCols(); List tpl = tp.getValues(); - LinkedHashMap spec = new LinkedHashMap(fsl.size()); + Map spec = LinkedHashMap.newLinkedHashMap(fsl.size()); for (int i = 0; i < fsl.size(); i++) { FieldSchema fs = fsl.get(i); String value = tpl.get(i); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 3d3e4ce7663f..0e914843e2e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -807,8 +807,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, for (FieldNode col : cols) { int index = originalOutputColumnNames.indexOf(col.getFieldName()); Table tab = cppCtx.getParseContext().getViewProjectToTableSchema().get(op); - List fullFieldList = new ArrayList(tab.getCols()); - fullFieldList.addAll(tab.getPartCols()); + List fullFieldList = new ArrayList<>(tab.getAllCols()); cppCtx.getParseContext().getColumnAccessInfo() .add(tab.getCompleteName(), fullFieldList.get(index).getName()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index dba737e382c0..e92efedc4615 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -194,7 +194,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje } else if (table.isNonNative() && table.getStorageHandler().addDynamicSplitPruningEdge(table, ctx.parent)) { generateEventOperatorPlan(ctx, parseContext, ts, column, - table.getCols().stream().filter(e -> e.getName().equals(column)). + table.getAllCols().stream().filter(e -> e.getName().equals(column)). map(e -> e.getType()).findFirst().get(), ctx.parent); } else { // semijoin LOG.debug("Column " + column + " is not a partition column"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index f4b4c2ff3bad..82f81861a4dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1767,7 +1767,7 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma * allColumnNameList and allTypeInfoList variables -- into the data and partition columns. */ - LinkedHashMap partSpec = partDesc.getPartSpec(); + Map partSpec = partDesc.getPartSpec(); if (partSpec != null && partSpec.size() > 0) { partitionColumnCount = partSpec.size(); dataColumnCount = dataAndPartColumnCount - partitionColumnCount; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java index 91340b1b76ef..d08fe92208ad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.optimizer.ppr; import java.util.ArrayList; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Properties; @@ -51,7 +50,7 @@ public class PartExprEvalUtils { * @throws HiveException */ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws HiveException { - LinkedHashMap partSpec = p.getSpec(); + Map partSpec = p.getSpec(); Properties partProps = p.getSchema(); String[] partKeyTypes; @@ -59,8 +58,8 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv if (!partSpec.keySet().containsAll(expr.getCols())) { return null; } - partKeyTypes = p.getTable().getStorageHandler().getPartitionKeys(p.getTable()).stream() - .map(FieldSchema::getType).toArray(String[]::new); + partKeyTypes = p.getTable().getPartCols().stream().map(FieldSchema::getType) + .toArray(String[]::new); } else { String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); partKeyTypes = pcolTypes.trim().split(":"); @@ -104,7 +103,7 @@ public static Pair prepareExpr( ExprNodeDesc expr, List partColumnNames, List partColumnTypeInfos) throws HiveException { // Create the row object - List partObjectInspectors = new ArrayList(); + List partObjectInspectors = new ArrayList<>(); for (int i = 0; i < partColumnNames.size(); i++) { partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( partColumnTypeInfos.get(i))); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java index 06912a1b3226..05f3b85f271f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java @@ -175,7 +175,8 @@ private void analyzeAcidExport(ASTNode ast, Table exportTable, ASTNode tokRefOrN //now generate insert statement //insert into newTableName select * from ts StringBuilder rewrittenQueryStr = generateExportQuery( - newTable.getPartCols(), tokRefOrNameExportTable, (ASTNode) tokRefOrNameExportTable.parent, newTableName); + newTable.getPartCols(), + tokRefOrNameExportTable, (ASTNode) tokRefOrNameExportTable.parent, newTableName); ReparseResult rr = ParseUtils.parseRewrittenQuery(ctx, rewrittenQueryStr); Context rewrittenCtx = rr.rewrittenCtx; rewrittenCtx.setIsUpdateDeleteMerge(false); //it's set in parseRewrittenQuery() diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 00cc988eb0a8..09eed2dc526b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -26,6 +26,7 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; import java.util.Map.Entry; import java.util.Optional; @@ -3013,14 +3014,16 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc ArrayList partitionColumns = new ArrayList(); // 3.2 Add column info corresponding to partition columns - for (FieldSchema part_col : tabMetaData.getPartCols()) { - colName = part_col.getName(); - colInfo = new ColumnInfo(colName, - TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), - isNullable(colName, nnc, pkc), tableAlias, true); - rr.put(tableAlias, colName, colInfo); - cInfoLst.add(colInfo); - partitionColumns.add(colInfo); + if (!tabMetaData.hasNonNativePartitionSupport()) { + for (FieldSchema part_col : tabMetaData.getPartCols()) { + colName = part_col.getName(); + colInfo = new ColumnInfo(colName, + TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), + isNullable(colName, nnc, pkc), tableAlias, true); + rr.put(tableAlias, colName, colInfo); + cInfoLst.add(colInfo); + partitionColumns.add(colInfo); + } } final TableType tableType = obtainTableType(tabMetaData); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index 9109f9cb6086..165c580db612 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -236,7 +236,7 @@ private Operator genSelOp(String command, boolean rewritten, Context origCtx) /** * @param operator : the select operator in the analyze statement * @param input : the operator right before FS in the insert overwrite statement - * @throws HiveException + * @throws HiveException */ private void replaceSelectOperatorProcess(SelectOperator operator, Operator input) throws HiveException { @@ -244,16 +244,11 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator signature = new ArrayList<>(); OpParseContext inputCtx = sa.opParseCtx.get(input); RowResolver inputRR = inputCtx.getRowResolver(); - List columns = inputRR.getColumnInfos(); + List inputColumns = inputRR.getColumnInfos(); List colList = new ArrayList(); List columnNames = new ArrayList(); Map columnExprMap = new HashMap(); - // the column positions in the operator should be like this - // <----non-partition columns---->|<--static partition columns-->|<--dynamic partition columns--> - // ExprNodeColumnDesc | ExprNodeConstantDesc | ExprNodeColumnDesc - // from input | generate itself | from input - // | // 1. deal with non-partition columns Map columnNameToIndex = new HashMap<>(); @@ -262,7 +257,12 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator= inputColumns.size()) { + continue; + } + ColumnInfo col = inputColumns.get(inputIdx); Integer selRSIdx = getSelRSColumnIndex(i, col, columnNameToIndex); if (selRSIdx == null) { continue; @@ -294,7 +294,12 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator= inputColumns.size()) { + throw new SemanticException("Unable to resolve dynamic partition column '" + partColName + + "' from input columns " + inputRR.getColumnInfos()); + } + ColumnInfo col = inputColumns.get(inputIdx); exprNodeDesc = new ExprNodeColumnDesc(col); srcType = col.getType(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index e23e54aa5230..1fd943cf1573 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -159,7 +159,7 @@ private void handlePartialPartitionSpec(Map partSpec, ColumnStat } // User might have only specified partial list of partition keys, in which case add other partition keys in partSpec - List partKeys = Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys()); + List partKeys = Utilities.getColumnNamesFromFieldSchema(tbl.getPartCols()); for (String partKey : partKeys) { if (!partSpec.containsKey(partKey)) { partSpec.put(partKey, null); @@ -174,14 +174,14 @@ private void handlePartialPartitionSpec(Map partSpec, ColumnStat } } - private static CharSequence genPartitionClause(Table tbl, List partTransformSpec, int specId, + private static CharSequence genPartitionClause(Table tbl, List partTransformSpec, int specId, Map partSpec, HiveConf conf) { boolean predPresent = partSpec.values().stream().anyMatch(Objects::nonNull); - + StringBuilder whereClause = new StringBuilder(" where ").append( partSpec.entrySet().stream() .filter(part -> part.getValue() != null) - .map(part -> unparseIdentifier(part.getKey(), conf) + " = " + .map(part -> unparseIdentifier(part.getKey(), conf) + " = " + genPartValueString(getColTypeOf(tbl, part.getKey()), part.getValue())) .collect(Collectors.joining(" and ")) ); @@ -207,8 +207,7 @@ private static CharSequence genPartitionClause(Table tbl, List pa private static String getColTypeOf(Table tbl, String partKey) { - for (FieldSchema fs : tbl.hasNonNativePartitionSupport() ? - tbl.getStorageHandler().getPartitionKeys(tbl) : tbl.getPartitionKeys()) { + for (FieldSchema fs : tbl.getPartCols()) { if (partKey.equalsIgnoreCase(fs.getName())) { return fs.getType().toLowerCase(); } @@ -272,7 +271,7 @@ private String genRewrittenQuery(FieldSchemas columnSchemas, HiveConf conf, * included in the input table. */ protected static String genRewrittenQuery(Table tbl, - HiveConf conf, List partTransformSpec, Map partSpec, + HiveConf conf, List partTransformSpec, Map partSpec, boolean isPartitionStats) { return ColumnStatsSemanticAnalyzer.genRewrittenQuery(tbl, getStatsEligibleFieldSchemas(tbl), conf, partTransformSpec, -1, partSpec, isPartitionStats, true); @@ -297,7 +296,7 @@ private static String genRewrittenQuery(Table tbl, FieldSchemas columnSchemas, final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(columnSchema.getType()); try { - genComputeStats(rewrittenQueryBuilder, conf, i, columnName, typeInfo); + genComputeStats(rewrittenQueryBuilder, conf, tbl.getColumnIndexByName(columnSchema.getName()), columnName, typeInfo); } catch (SemanticException e) { throw new RuntimeException(e); } @@ -309,17 +308,26 @@ private static String genRewrittenQuery(Table tbl, FieldSchemas columnSchemas, } if (isPartitionStats) { - if (partTransformSpec == null) { - for (FieldSchema fs : tbl.getPartCols()) { - String identifier = unparseIdentifier(fs.getName(), conf); - rewrittenQueryBuilder.append(", ").append(identifier); - columnNamesBuilder.append(", ").append(identifier); - - columnDummyValuesBuilder.append(", cast(null as ") - .append(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()).toString()) - .append(")"); + for (FieldSchema fs : tbl.getPartCols()) { + String identifier = unparseIdentifier(fs.getName(), conf); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fs.getType().toString()); + rewrittenQueryBuilder.append(", "); + if (tbl.hasNonNativePartitionSupport()) { + try { + genComputeStats(rewrittenQueryBuilder, conf, tbl.getColumnIndexByName(fs.getName()), identifier, typeInfo); + } catch (SemanticException e) { + throw new RuntimeException(e); + } + } else { + rewrittenQueryBuilder.append(identifier); } - } else { + columnNamesBuilder.append(", ").append(identifier); + + columnDummyValuesBuilder.append(", cast(null as ") + .append(typeInfo) + .append(")"); + } + if (partTransformSpec != null) { rewrittenQueryBuilder.append(", ") .append(TransformSpec.toNamedStruct(partTransformSpec, conf)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 4d4956fbec13..dcf197a2c201 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -37,7 +37,6 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.ReplChangeManager; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.ql.QueryState; @@ -89,7 +88,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.TreeMap; /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index eb4a73f1e5e9..9093570706f3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -561,7 +561,7 @@ private void reparseAndSuperAnalyze(Table table, URI fromURI) throws SemanticExc } rewrittenQueryStr.append(getFullTableNameForSQL((ASTNode)(tableTree.getChild(0)))); - addPartitionColsToInsert(table.getPartCols(), inpPartSpec, rewrittenQueryStr); + addPartitionColsToInsert(parts, inpPartSpec, rewrittenQueryStr); rewrittenQueryStr.append(" select * from "); rewrittenQueryStr.append(tempTblName); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java index 882840ffef5a..ddde37433b5a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.rewrite.MergeStatement; import org.apache.hadoop.hive.ql.parse.rewrite.RewriterFactory; -import org.apache.hadoop.hive.ql.plan.HiveOperation; import java.util.ArrayList; import java.util.HashMap; @@ -230,7 +229,7 @@ private MergeStatement.UpdateClause handleUpdate(ASTNode whenMatchedUpdateClause String deleteExtraPredicate) throws SemanticException { assert whenMatchedUpdateClause.getType() == HiveParser.TOK_MATCHED; assert getWhenClauseOperation(whenMatchedUpdateClause).getType() == HiveParser.TOK_UPDATE; - Map newValuesMap = new HashMap<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + Map newValuesMap = HashMap.newHashMap(targetTable.getAllCols().size()); ASTNode setClause = (ASTNode)getWhenClauseOperation(whenMatchedUpdateClause).getChild(0); //columns being updated -> update expressions; "setRCols" (last param) is null because we use actual expressions //before re-parsing, i.e. they are known to SemanticAnalyzer logic @@ -303,7 +302,7 @@ private List findWhenClauses(ASTNode tree, int start) throws SemanticEx "Unexpected node type found: " + whenClause.getType() + addParseInfo(whenClause); whenClauses.add(whenClause); } - if (whenClauses.size() <= 0) { + if (whenClauses.isEmpty()) { //Futureproofing: the parser will actually not allow this throw new SemanticException("Must have at least 1 WHEN clause in MERGE statement"); } @@ -499,11 +498,7 @@ private void handleUnresolvedColumns() { private void addColumn2Table(String tableName, String columnName) { tableName = tableName.toLowerCase(); //normalize name for mapping tableNamesFound.add(tableName); - List cols = table2column.get(tableName); - if (cols == null) { - cols = new ArrayList<>(); - table2column.put(tableName, cols); - } + List cols = table2column.computeIfAbsent(tableName, k -> new ArrayList<>()); //we want to preserve 'columnName' as it was in original input query so that rewrite //looks as much as possible like original query cols.add(columnName); @@ -526,7 +521,7 @@ private String getPredicate() { } StringBuilder sb = new StringBuilder(); for (String col : targetCols) { - if (sb.length() > 0) { + if (!sb.isEmpty()) { sb.append(" AND "); } //but preserve table name in SQL @@ -605,17 +600,15 @@ protected String getMatchedText(ASTNode n) { } protected boolean isAliased(ASTNode n) { - switch (n.getType()) { - case HiveParser.TOK_TABREF: - return findTabRefIdxs(n)[0] != 0; - case HiveParser.TOK_TABNAME: - return false; - case HiveParser.TOK_SUBQUERY: + return switch (n.getType()) { + case HiveParser.TOK_TABREF -> findTabRefIdxs(n)[0] != 0; + case HiveParser.TOK_TABNAME -> false; + case HiveParser.TOK_SUBQUERY -> { assert n.getChildCount() > 1 : "Expected Derived Table to be aliased"; - return true; - default: - throw raiseWrongType("TOK_TABREF|TOK_TABNAME", n); - } + yield true; + } + default -> throw raiseWrongType("TOK_TABREF|TOK_TABNAME", n); + }; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 9964b9369065..9edbe5b05e13 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -123,15 +123,11 @@ public static ASTNode parse( * @return boolean */ public static boolean isJoinToken(ASTNode node) { - switch (node.getToken().getType()) { - case HiveParser.TOK_JOIN: - case HiveParser.TOK_LEFTOUTERJOIN: - case HiveParser.TOK_RIGHTOUTERJOIN: - case HiveParser.TOK_FULLOUTERJOIN: - return true; - default: - return false; - } + return switch (node.getToken().getType()) { + case HiveParser.TOK_JOIN, HiveParser.TOK_LEFTOUTERJOIN, HiveParser.TOK_RIGHTOUTERJOIN, + HiveParser.TOK_FULLOUTERJOIN -> true; + default -> false; + }; } /** @@ -163,12 +159,10 @@ public static List validateColumnNameUniqueness( // but it should not be a major bottleneck as the number of columns are // anyway not so big Iterator iterCols = fieldSchemas.iterator(); - List colNames = new ArrayList(); + List colNames = new ArrayList<>(); while (iterCols.hasNext()) { String colName = iterCols.next().getName(); - Iterator iter = colNames.iterator(); - while (iter.hasNext()) { - String oldColName = iter.next(); + for (String oldColName : colNames) { if (colName.equalsIgnoreCase(oldColName)) { throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES .getMsg(oldColName)); @@ -286,7 +280,7 @@ public static Pair containsTokenOfType(ASTNode root, Integer .. final Set tokensToMatch = new HashSet<>(Arrays.asList(tokens)); final String[] matched = {null}; - boolean check = ParseUtils.containsTokenOfType(root, new PTFUtils.Predicate() { + boolean check = ParseUtils.containsTokenOfType(root, new PTFUtils.Predicate<>() { @Override public boolean apply(ASTNode node) { if (tokensToMatch.contains(node.getType())) { @@ -302,7 +296,7 @@ public boolean apply(ASTNode node) { } public static boolean containsTokenOfType(ASTNode root, PTFUtils.Predicate predicate) { - Queue queue = new ArrayDeque(); + Queue queue = new ArrayDeque<>(); // BFS queue.add(root); @@ -535,7 +529,7 @@ public static String getKeywords(Set excludes) { if (excludes != null && excludes.contains(name)) { continue; } - if (sb.length() > 0) { + if (!sb.isEmpty()) { sb.append(","); } sb.append(name); @@ -581,8 +575,7 @@ public static Map> getFullPartitionSpecs( CommonTree ast, Table table, Configuration conf, boolean canGroupExprs) throws SemanticException { String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULT_PARTITION_NAME); Map colTypes = new HashMap<>(); - List partitionKeys = table.hasNonNativePartitionSupport() ? - table.getStorageHandler().getPartitionKeys(table) : table.getPartitionKeys(); + List partitionKeys = table.getPartCols(); for (FieldSchema fs : partitionKeys) { colTypes.put(fs.getName().toLowerCase(), fs.getType()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f8ec90287202..4823ebad9578 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12040,10 +12040,12 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { } // Hack!! - refactor once the metadata APIs with types are ready // Finally add the partitioning columns - for (FieldSchema part_col : tab.getPartCols()) { - LOG.trace("Adding partition col: " + part_col); - rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), - TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true)); + if(!tab.hasNonNativePartitionSupport()){ + for (FieldSchema partCol : tab.getPartCols()) { + LOG.trace("Adding partition col: " + partCol); + rwsch.put(alias, partCol.getName(), new ColumnInfo(partCol.getName(), + TypeInfoFactory.getPrimitiveTypeInfo(partCol.getType()), alias, true)); + } } // put virtual columns into RowResolver. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java index b72f2496d938..b7335473da85 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java @@ -202,7 +202,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau sqlGenerator.append(hintStr); hintStr = null; } - List values = new ArrayList<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + List values = new ArrayList<>(targetTable.getAllCols().size()); values.addAll(sqlGenerator.getDeleteValues(Context.Operation.MERGE)); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), values); addValuesForRowLineageForCopyOnMerge(isRowLineageSupported, values, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java index 3ec2e580f046..cb759a11d080 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java @@ -224,7 +224,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau sqlGenerator.append(" -- update clause").append("\n"); List valuesAndAcidSortKeys = new ArrayList<>( - targetTable.getCols().size() + targetTable.getPartCols().size() + 1); + targetTable.getAllCols().size() + 1); valuesAndAcidSortKeys.addAll(sqlGenerator.getSortKeys(Operation.MERGE)); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), valuesAndAcidSortKeys); sqlGenerator.appendInsertBranch(hintStr, valuesAndAcidSortKeys); @@ -249,7 +249,7 @@ protected void addValues(Table targetTable, String targetAlias, Map values.add( formatter.apply(fieldSchema.getName()))); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java index 84fcf186f6b7..06edaca90f0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java @@ -58,7 +58,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau String onClauseAsString = mergeStatement.getOnClauseAsText(); sqlGenerator.append(" -- update clause (insert part)\n"); - List values = new ArrayList<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + List values = new ArrayList<>(targetTable.getAllCols().size()); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), values); addRowLineageColumnsForWhenMatchedUpdateClause(isRowLineageSupported, values, targetAlias, conf); sqlGenerator.appendInsertBranch(hintStr, values); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 0dcfe72d7f5b..b5b4662a1491 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -57,7 +57,7 @@ public class PartitionDesc implements Serializable, Cloneable { private static final Interner> CLASS_INTERNER = Interners.newWeakInterner(); private TableDesc tableDesc; - private LinkedHashMap partSpec; + private Map partSpec; private Class inputFileFormatClass; private Class outputFileFormatClass; private Properties properties; @@ -73,7 +73,7 @@ public void setBaseFileName(String baseFileName) { public PartitionDesc() { } - public PartitionDesc(final TableDesc table, final LinkedHashMap partSpec) { + public PartitionDesc(final TableDesc table, final Map partSpec) { this.tableDesc = table; setPartSpec(partSpec); } @@ -138,11 +138,11 @@ public void setTableDesc(TableDesc tableDesc) { } @Explain(displayName = "partition values", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) - public LinkedHashMap getPartSpec() { + public Map getPartSpec() { return partSpec; } - public void setPartSpec(final LinkedHashMap partSpec) { + public void setPartSpec(final Map partSpec) { StringInternUtils.internValuesInMap(partSpec); this.partSpec = partSpec; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java index be62d94019ed..4ff462d45ceb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java @@ -145,6 +145,24 @@ private boolean constructColumnStatsFromPackedRows(Table tbl, List partVals = new ArrayList<>(); if (tbl.hasNonNativePartitionSupport()) { + for (FieldSchema partCol : tbl.getPartCols()) { + PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(partCol.getType()); + List columnStatsFields = ColumnStatsType.getColumnStats(typeInfo); + columnStatsFields = ColumnStatsType.removeDisabledStatistics(conf, columnStatsFields); + try { + ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveColumnStatistics( + partCol.getName(), partCol.getType(), columnStatsFields, pos, fields, values); + statsObjs.add(statObj); + numStats++; + } catch (Exception e) { + if (isStatsReliable) { + throw new HiveException("Statistics collection failed while (hive.stats.reliable)", e); + } else { + LOG.debug("Because {} is infinite or NaN, we skip stats.", partCol.getName(), e); + } + } + pos += columnStatsFields.size(); + } ObjectInspector inspector = fields.get(pos).getFieldObjectInspector(); if (inspector.getCategory() == ObjectInspector.Category.STRUCT) { Object obj = values.get(pos); diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java b/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java index 0f5d7b915168..f81f8e9ec816 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java +++ b/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java @@ -82,7 +82,7 @@ public static final CacheTag build(String tableName) { return new TableCacheTag(tableName); } - public static final CacheTag build(String tableName, LinkedHashMap partDescMap) { + public static final CacheTag build(String tableName, Map partDescMap) { if (StringUtils.isEmpty(tableName) || partDescMap == null || partDescMap.isEmpty()) { throw new IllegalArgumentException(); }