From 5ff17aee94575b08c59dc149891b334560f718da Mon Sep 17 00:00:00 2001 From: Pavithra Ramachandran Date: Tue, 17 Dec 2019 10:44:33 +0530 Subject: [PATCH] the column statistics received from hive is compared in a case sensitive manner.Converting the column name which is the key to lower case and searching using lower case whatever may be the input value. --- .../spark/sql/execution/command/tables.scala | 5 +- .../inputs/describe-table-column.sql | 13 +++++ .../results/describe-table-column.sql.out | 51 ++++++++++++++++++- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 71500c304bd48..8dc07b7b3ce6b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.DescribeTableSchema import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier} +import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap} import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils} import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat import org.apache.spark.sql.execution.datasources.json.JsonFileFormat @@ -720,7 +720,8 @@ case class DescribeColumnCommand( } val catalogTable = catalog.getTempViewOrPermanentTableMetadata(table) - val colStats = catalogTable.stats.map(_.colStats).getOrElse(Map.empty) + val colStatsMap = catalogTable.stats.map(_.colStats).getOrElse(Map.empty) + val colStats = if (conf.caseSensitiveAnalysis) colStatsMap else CaseInsensitiveMap(colStatsMap) val cs = colStats.get(field.name) val comment = if (field.metadata.contains("comment")) { diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql b/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql index 2d180d118da7a..821cb473751eb 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql @@ -49,3 +49,16 @@ DROP VIEW desc_col_temp_view; DROP TABLE desc_col_table; DROP TABLE desc_complex_col_table; + +--Test case insensitive + +CREATE TABLE customer(CName STRING); + +INSERT INTO customer VALUES('Maria'); + +ANALYZE TABLE customer COMPUTE STATISTICS FOR COLUMNS cname; + +DESC EXTENDED customer cname; + +DROP TABLE customer; + diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out index 6ef8af6574e98..a5471c0f1c8ec 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 23 +-- Number of queries: 28 -- !query 0 @@ -264,3 +264,52 @@ DROP TABLE desc_complex_col_table struct<> -- !query 22 output + + +-- !query 23 +CREATE TABLE customer(CName STRING) +-- !query 23 schema +struct<> +-- !query 23 output + + + +-- !query 24 +INSERT INTO customer VALUES('Maria') +-- !query 24 schema +struct<> +-- !query 24 output + + + +-- !query 25 +ANALYZE TABLE customer COMPUTE STATISTICS FOR COLUMNS cname +-- !query 25 schema +struct<> +-- !query 25 output + + + +-- !query 26 +DESC EXTENDED customer cname +-- !query 26 schema +struct +-- !query 26 output +col_name cname +data_type string +comment NULL +min NULL +max NULL +num_nulls 0 +distinct_count 1 +avg_col_len 5 +max_col_len 5 +histogram NULL + + +-- !query 27 +DROP TABLE customer +-- !query 27 schema +struct<> +-- !query 27 output +