From fa902d6d3fb635236ac01ee5b43470359f16cfdd Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Fri, 5 Jan 2018 21:20:53 +0800 Subject: [PATCH 1/6] [SPARK-22972] Couldn't find corresponding Hive SerDe for data source provider org.apache.spark.sql.hive.orc. --- .../apache/spark/sql/internal/HiveSerDe.scala | 2 +- .../spark/sql/hive/orc/HiveOrcSourceSuite.scala | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala index b9515ec7bca2a..22f46877e527f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala @@ -72,7 +72,7 @@ object HiveSerDe { def sourceToSerDe(source: String): Option[HiveSerDe] = { val key = source.toLowerCase(Locale.ROOT) match { case s if s.startsWith("org.apache.spark.sql.parquet") => "parquet" - case s if s.startsWith("org.apache.spark.sql.orc") => "orc" + case s if s.startsWith("org.apache.spark.sql.hive.orc") => "orc" case s if s.equals("orcfile") => "orc" case s if s.equals("parquetfile") => "parquet" case s if s.equals("avrofile") => "avro" diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index 17b7d8cfe127e..5d0f6bf5b97d7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -22,6 +22,7 @@ import java.io.File import org.apache.spark.sql.Row import org.apache.spark.sql.execution.datasources.orc.OrcSuite import org.apache.spark.sql.hive.test.TestHiveSingleton +import org.apache.spark.sql.internal.HiveSerDe import org.apache.spark.util.Utils class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { @@ -62,6 +63,22 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { """.stripMargin) } + test("SPARK-22972: hive orc source") { + spark.sql( + s"""CREATE TABLE normal_orc_as_source_hive + |USING org.apache.spark.sql.hive.orc + |OPTIONS ( + | PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}' + |) + """. + stripMargin) + spark.sql( + "desc formatted normal_orc_as_source_hive").show() + checkAnswer(sql("SELECT COUNT(*) FROM normal_orc_as_source_hive"), Row(10)) + assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.hive.orc") + .equals(HiveSerDe.sourceToSerDe("orc"))) + } + test("SPARK-19459/SPARK-18220: read char/varchar column written by Hive") { val location = Utils.createTempDir() val uri = location.toURI From cf7cbce6061894eacbfd334f75476268068446c9 Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Fri, 5 Jan 2018 21:49:48 +0800 Subject: [PATCH 2/6] keep the original one and update code style --- .../scala/org/apache/spark/sql/internal/HiveSerDe.scala | 1 + .../apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala index 22f46877e527f..dac463641cfab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala @@ -72,6 +72,7 @@ object HiveSerDe { def sourceToSerDe(source: String): Option[HiveSerDe] = { val key = source.toLowerCase(Locale.ROOT) match { case s if s.startsWith("org.apache.spark.sql.parquet") => "parquet" + case s if s.startsWith("org.apache.spark.sql.orc") => "orc" case s if s.startsWith("org.apache.spark.sql.hive.orc") => "orc" case s if s.equals("orcfile") => "orc" case s if s.equals("parquetfile") => "parquet" diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index 5d0f6bf5b97d7..ae870722d8e16 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -70,13 +70,14 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { |OPTIONS ( | PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}' |) - """. - stripMargin) - spark.sql( - "desc formatted normal_orc_as_source_hive").show() + """.stripMargin) + + spark.sql("desc formatted normal_orc_as_source_hive").show() checkAnswer(sql("SELECT COUNT(*) FROM normal_orc_as_source_hive"), Row(10)) assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.hive.orc") .equals(HiveSerDe.sourceToSerDe("orc"))) + assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.orc") + .equals(HiveSerDe.sourceToSerDe("orc"))) } test("SPARK-19459/SPARK-18220: read char/varchar column written by Hive") { From b64ce532d36442cde636db54d8ecbc08d6030825 Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Fri, 5 Jan 2018 22:05:47 +0800 Subject: [PATCH 3/6] fix scala style: Whitespace at end of line --- .../scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index ae870722d8e16..c3d447148b708 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -71,7 +71,6 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { | PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}' |) """.stripMargin) - spark.sql("desc formatted normal_orc_as_source_hive").show() checkAnswer(sql("SELECT COUNT(*) FROM normal_orc_as_source_hive"), Row(10)) assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.hive.orc") From 1cfa72f12c417ad949abcf3344be2a461c38b246 Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Mon, 8 Jan 2018 09:40:48 +0800 Subject: [PATCH 4/6] add withTable and tableSchema --- .../sql/hive/orc/HiveOrcSourceSuite.scala | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index c3d447148b708..4b26964825ed4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.orc import java.io.File import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.datasources.orc.OrcSuite import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.HiveSerDe @@ -64,19 +65,28 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { } test("SPARK-22972: hive orc source") { - spark.sql( - s"""CREATE TABLE normal_orc_as_source_hive - |USING org.apache.spark.sql.hive.orc - |OPTIONS ( - | PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}' - |) + val tableName = "normal_orc_as_source_hive" + withTable(tableName) { + sql( + s"""CREATE TABLE $tableName + |USING org.apache.spark.sql.hive.orc + |OPTIONS ( + | PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}' + |) """.stripMargin) - spark.sql("desc formatted normal_orc_as_source_hive").show() - checkAnswer(sql("SELECT COUNT(*) FROM normal_orc_as_source_hive"), Row(10)) - assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.hive.orc") - .equals(HiveSerDe.sourceToSerDe("orc"))) - assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.orc") - .equals(HiveSerDe.sourceToSerDe("orc"))) + val tableMetadata = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(tableName)) + assert(tableMetadata.storage.inputFormat == + Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat")) + assert(tableMetadata.storage.outputFormat == + Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat")) + assert(tableMetadata.storage.serde == + Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde")) + assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.hive.orc") + .equals(HiveSerDe.sourceToSerDe("orc"))) + assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.orc") + .equals(HiveSerDe.sourceToSerDe("orc"))) + } } test("SPARK-19459/SPARK-18220: read char/varchar column written by Hive") { From cc4dd13c8d65aec3758126c7e163fde56bf6033f Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Mon, 8 Jan 2018 09:47:37 +0800 Subject: [PATCH 5/6] add a line --- .../org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index 4b26964825ed4..105037fc12df6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -67,6 +67,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("SPARK-22972: hive orc source") { val tableName = "normal_orc_as_source_hive" withTable(tableName) { + sql( s"""CREATE TABLE $tableName |USING org.apache.spark.sql.hive.orc @@ -74,6 +75,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { | PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}' |) """.stripMargin) + val tableMetadata = spark.sessionState.catalog.getTableMetadata( TableIdentifier(tableName)) assert(tableMetadata.storage.inputFormat == From 3eafdfba0cbd50bc1e987f69b87331b162aef0ec Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Mon, 8 Jan 2018 20:48:27 +0800 Subject: [PATCH 6/6] update style --- .../spark/sql/hive/orc/HiveOrcSourceSuite.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index 105037fc12df6..d556a030e2186 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -67,14 +67,14 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("SPARK-22972: hive orc source") { val tableName = "normal_orc_as_source_hive" withTable(tableName) { - sql( - s"""CREATE TABLE $tableName - |USING org.apache.spark.sql.hive.orc - |OPTIONS ( - | PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}' - |) - """.stripMargin) + s""" + |CREATE TABLE $tableName + |USING org.apache.spark.sql.hive.orc + |OPTIONS ( + | PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}' + |) + """.stripMargin) val tableMetadata = spark.sessionState.catalog.getTableMetadata( TableIdentifier(tableName))