From 5c73b72b917ad0cb16b76411f961731527022e36 Mon Sep 17 00:00:00 2001 From: chirag Date: Mon, 6 Oct 2014 16:40:30 +0530 Subject: [PATCH 1/4] SPARK-3807: SparkSql does not work for tables created using custom serde --- .../apache/spark/sql/hive/execution/HiveTableScan.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala index 577ca928b43b6..ee6b8fd770346 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala @@ -80,10 +80,15 @@ case class HiveTableScan( ColumnProjectionUtils.appendReadColumnIDs(hiveConf, neededColumnIDs) ColumnProjectionUtils.appendReadColumnNames(hiveConf, attributes.map(_.name)) + val td = relation.tableDesc + val deClass = td.getDeserializerClass; + val de = deClass.newInstance(); + de.initialize(hiveConf, td.getProperties); + // Specifies types and object inspectors of columns to be scanned. val structOI = ObjectInspectorUtils .getStandardObjectInspector( - relation.tableDesc.getDeserializer.getObjectInspector, + de.getObjectInspector, ObjectInspectorCopyOption.JAVA) .asInstanceOf[StructObjectInspector] From ba4bc0c5307e3a6655ffc878207d4748bb493693 Mon Sep 17 00:00:00 2001 From: chirag Date: Wed, 8 Oct 2014 10:38:17 +0530 Subject: [PATCH 2/4] SPARK-3807: SparkSql does not work for tables created using custom serde --- .../scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 6571c35499ef4..7f5a2640817f9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -313,7 +313,7 @@ private[hive] case class MetastoreRelation val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute) /** Non-partitionKey attributes */ - val attributes = table.getSd.getCols.map(_.toAttribute) + val attributes = hiveQlTable.getCols.map(_.toAttribute) val output = attributes ++ partitionKeys } From 1f26805e714049e83ed9399c8ca8361da459777c Mon Sep 17 00:00:00 2001 From: chirag Date: Fri, 10 Oct 2014 10:28:35 +0530 Subject: [PATCH 3/4] SPARK-3807: SparkSql does not work for tables created using custom serde (Incorporated Review Comments) --- .../apache/spark/sql/hive/execution/HiveTableScan.scala | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala index ee6b8fd770346..1d13d35a35ad1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala @@ -80,15 +80,14 @@ case class HiveTableScan( ColumnProjectionUtils.appendReadColumnIDs(hiveConf, neededColumnIDs) ColumnProjectionUtils.appendReadColumnNames(hiveConf, attributes.map(_.name)) - val td = relation.tableDesc - val deClass = td.getDeserializerClass; - val de = deClass.newInstance(); - de.initialize(hiveConf, td.getProperties); + val tableDesc = relation.tableDesc + val deserializer = tableDesc.getDeserializerClass.newInstance + deserializer.initialize(hiveConf, tableDesc.getProperties) // Specifies types and object inspectors of columns to be scanned. val structOI = ObjectInspectorUtils .getStandardObjectInspector( - de.getObjectInspector, + deserializer.getObjectInspector, ObjectInspectorCopyOption.JAVA) .asInstanceOf[StructObjectInspector] From 370c31b58d15c0549176939509ddc44e9f5f677e Mon Sep 17 00:00:00 2001 From: chirag Date: Fri, 10 Oct 2014 17:35:03 +0530 Subject: [PATCH 4/4] SPARK-3807: Add a test case to validate the fix. --- .../org/apache/spark/sql/hive/execution/HiveQuerySuite.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index c4abb3eb4861f..d2587431951b8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -581,6 +581,9 @@ class HiveQuerySuite extends HiveComparisonTest { clear() } + createQueryTest("select from thrift based table", + "SELECT * from src_thrift") + // Put tests that depend on specific Hive settings before these last two test, // since they modify /clear stuff. }