From 6b235d060cf97cedc0fee374c88ab02e39a56528 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 2 Jul 2015 13:15:47 +0100 Subject: [PATCH 1/3] SPARK-8789 improve SQLQuerySuite resilience by dropping tables in setup --- .../sql/hive/execution/SQLQuerySuite.scala | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 6d645393a6da1..619e36fb81270 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -62,6 +62,11 @@ class MyDialect extends DefaultParserDialect * valid, but Hive currently cannot execute it. */ class SQLQuerySuite extends QueryTest { + + def dropTable(table: String): Unit = { + sql(s"DROP TABLE IF EXISTS $table") + } + test("SPARK-6835: udtf in lateral view") { val df = Seq((1, 1)).toDF("c1", "c2") df.registerTempTable("table1") @@ -196,6 +201,7 @@ class SQLQuerySuite extends QueryTest { val originalConf = convertCTAS setConf(HiveContext.CONVERT_CTAS, true) + dropTable("ctas1") sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value") sql("CREATE TABLE IF NOT EXISTS ctas1 AS SELECT key k, value FROM src ORDER BY k, value") @@ -270,6 +276,11 @@ class SQLQuerySuite extends QueryTest { } test("CTAS with serde") { + dropTable("ctas1") + dropTable("ctas2") + dropTable("ctas3") + dropTable("ctas4") + dropTable("ctas5") sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value").collect() sql( """CREATE TABLE ctas2 @@ -363,6 +374,8 @@ class SQLQuerySuite extends QueryTest { } test("specifying the column list for CTAS") { + dropTable("gen__tmp") + dropTable("mytable1") Seq((1, "111111"), (2, "222222")).toDF("key", "value").registerTempTable("mytable1") sql("create table gen__tmp(a int, b string) as select key, value from mytable1") @@ -434,6 +447,7 @@ class SQLQuerySuite extends QueryTest { } test("test CTAS") { + dropTable("test_ctas_123") checkAnswer(sql("CREATE TABLE test_ctas_123 AS SELECT key, value FROM src"), Seq.empty[Row]) checkAnswer( sql("SELECT key, value FROM test_ctas_123 ORDER BY key"), @@ -441,6 +455,8 @@ class SQLQuerySuite extends QueryTest { } test("SPARK-4825 save join to table") { + dropTable("test1") + dropTable("test2") val testData = sparkContext.parallelize(1 to 10).map(i => TestData(i, i.toString)).toDF() sql("CREATE TABLE test1 (key INT, value STRING)") testData.write.mode(SaveMode.Append).insertInto("test1") @@ -528,6 +544,7 @@ class SQLQuerySuite extends QueryTest { val rowRdd = sparkContext.parallelize(row :: Nil) TestHive.createDataFrame(rowRdd, schema).registerTempTable("testTable") + dropTable("nullValuesInInnerComplexTypes") sql( """CREATE TABLE nullValuesInInnerComplexTypes @@ -607,6 +624,7 @@ class SQLQuerySuite extends QueryTest { read.json(rdd).registerTempTable("data") val originalConf = convertCTAS setConf(HiveContext.CONVERT_CTAS, false) + sql("DROP TABLE IF EXISTS explodeTest") sql("CREATE TABLE explodeTest (key bigInt)") table("explodeTest").queryExecution.analyzed match { @@ -629,11 +647,14 @@ class SQLQuerySuite extends QueryTest { test("sanity test for SPARK-6618") { (1 to 100).par.map { i => val tableName = s"SPARK_6618_table_$i" - sql(s"CREATE TABLE $tableName (col1 string)") - catalog.lookupRelation(Seq(tableName)) - table(tableName) - tables() - sql(s"DROP TABLE $tableName") + try { + sql(s"CREATE TABLE $tableName (col1 string)") + catalog.lookupRelation(Seq(tableName)) + table(tableName) + tables() + } finally { + sql(s"DROP TABLE IF EXISTS $tableName") + } } } From 723c59e296e91480cd4b5143266ec593f6eed88e Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 2 Jul 2015 19:06:25 +0100 Subject: [PATCH 2/3] SPARK-8789 SQLQuerySuite to use withTable. This is a copy of the SQLTestUtils one -that expected a sqlContext, whereas in this test it all appears to be implicit --- .../sql/hive/execution/SQLQuerySuite.scala | 420 +++++++++--------- 1 file changed, 215 insertions(+), 205 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 619e36fb81270..b2932fa3a905f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -63,8 +63,17 @@ class MyDialect extends DefaultParserDialect */ class SQLQuerySuite extends QueryTest { - def dropTable(table: String): Unit = { - sql(s"DROP TABLE IF EXISTS $table") + /** + * Drops table `tableName` after calling `f`. + * This is the `SQLTestUtils` function without + * an explicit sql context + */ + protected def withTable(tableNames: String*)(f: => Unit): Unit = { + try f finally { + tableNames.foreach { name => + sql(s"DROP TABLE IF EXISTS $name") + } + } } test("SPARK-6835: udtf in lateral view") { @@ -198,52 +207,53 @@ class SQLQuerySuite extends QueryTest { } } - val originalConf = convertCTAS + withTable("ctas1") { + val originalConf = convertCTAS - setConf(HiveContext.CONVERT_CTAS, true) - dropTable("ctas1") + setConf(HiveContext.CONVERT_CTAS, true) - sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value") - sql("CREATE TABLE IF NOT EXISTS ctas1 AS SELECT key k, value FROM src ORDER BY k, value") - var message = intercept[AnalysisException] { sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value") - }.getMessage - assert(message.contains("ctas1 already exists")) - checkRelation("ctas1", true) - sql("DROP TABLE ctas1") - - // Specifying database name for query can be converted to data source write path - // is not allowed right now. - message = intercept[AnalysisException] { - sql("CREATE TABLE default.ctas1 AS SELECT key k, value FROM src ORDER BY k, value") - }.getMessage - assert( - message.contains("Cannot specify database name in a CTAS statement"), - "When spark.sql.hive.convertCTAS is true, we should not allow " + - "database name specified.") - - sql("CREATE TABLE ctas1 stored as textfile AS SELECT key k, value FROM src ORDER BY k, value") - checkRelation("ctas1", true) - sql("DROP TABLE ctas1") + sql("CREATE TABLE IF NOT EXISTS ctas1 AS SELECT key k, value FROM src ORDER BY k, value") + var message = intercept[AnalysisException] { + sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value") + }.getMessage + assert(message.contains("ctas1 already exists")) + checkRelation("ctas1", true) + sql("DROP TABLE ctas1") + + // Specifying database name for query can be converted to data source write path + // is not allowed right now. + message = intercept[AnalysisException] { + sql("CREATE TABLE default.ctas1 AS SELECT key k, value FROM src ORDER BY k, value") + }.getMessage + assert( + message.contains("Cannot specify database name in a CTAS statement"), + "When spark.sql.hive.convertCTAS is true, we should not allow " + + "database name specified.") + + sql("CREATE TABLE ctas1 stored as textfile AS SELECT key k, value FROM src ORDER BY k, value") + checkRelation("ctas1", true) + sql("DROP TABLE ctas1") - sql( - "CREATE TABLE ctas1 stored as sequencefile AS SELECT key k, value FROM src ORDER BY k, value") - checkRelation("ctas1", true) - sql("DROP TABLE ctas1") + sql( + "CREATE TABLE ctas1 stored as sequencefile AS SELECT key k, value FROM src ORDER BY k, value") + checkRelation("ctas1", true) + sql("DROP TABLE ctas1") - sql("CREATE TABLE ctas1 stored as rcfile AS SELECT key k, value FROM src ORDER BY k, value") - checkRelation("ctas1", false) - sql("DROP TABLE ctas1") + sql("CREATE TABLE ctas1 stored as rcfile AS SELECT key k, value FROM src ORDER BY k, value") + checkRelation("ctas1", false) + sql("DROP TABLE ctas1") - sql("CREATE TABLE ctas1 stored as orc AS SELECT key k, value FROM src ORDER BY k, value") - checkRelation("ctas1", false) - sql("DROP TABLE ctas1") + sql("CREATE TABLE ctas1 stored as orc AS SELECT key k, value FROM src ORDER BY k, value") + checkRelation("ctas1", false) + sql("DROP TABLE ctas1") - sql("CREATE TABLE ctas1 stored as parquet AS SELECT key k, value FROM src ORDER BY k, value") - checkRelation("ctas1", false) - sql("DROP TABLE ctas1") + sql("CREATE TABLE ctas1 stored as parquet AS SELECT key k, value FROM src ORDER BY k, value") + checkRelation("ctas1", false) + sql("DROP TABLE ctas1") - setConf(HiveContext.CONVERT_CTAS, originalConf) + setConf(HiveContext.CONVERT_CTAS, originalConf) + } } test("SQL Dialect Switching") { @@ -276,121 +286,118 @@ class SQLQuerySuite extends QueryTest { } test("CTAS with serde") { - dropTable("ctas1") - dropTable("ctas2") - dropTable("ctas3") - dropTable("ctas4") - dropTable("ctas5") - sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value").collect() - sql( - """CREATE TABLE ctas2 - | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" - | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2") - | STORED AS RCFile - | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22") - | AS - | SELECT key, value - | FROM src - | ORDER BY key, value""".stripMargin).collect() - sql( - """CREATE TABLE ctas3 - | ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\012' - | STORED AS textfile AS - | SELECT key, value - | FROM src - | ORDER BY key, value""".stripMargin).collect() - - // the table schema may like (key: integer, value: string) - sql( - """CREATE TABLE IF NOT EXISTS ctas4 AS - | SELECT 1 AS key, value FROM src LIMIT 1""".stripMargin).collect() - // do nothing cause the table ctas4 already existed. - sql( - """CREATE TABLE IF NOT EXISTS ctas4 AS - | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect() - - checkAnswer( - sql("SELECT k, value FROM ctas1 ORDER BY k, value"), - sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq) - checkAnswer( - sql("SELECT key, value FROM ctas2 ORDER BY key, value"), - sql( - """ - SELECT key, value - FROM src - ORDER BY key, value""").collect().toSeq) - checkAnswer( - sql("SELECT key, value FROM ctas3 ORDER BY key, value"), - sql( - """ - SELECT key, value - FROM src - ORDER BY key, value""").collect().toSeq) - intercept[AnalysisException] { - sql( - """CREATE TABLE ctas4 AS - | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect() - } - checkAnswer( - sql("SELECT key, value FROM ctas4 ORDER BY key, value"), - sql("SELECT key, value FROM ctas4 LIMIT 1").collect().toSeq) - - checkExistence(sql("DESC EXTENDED ctas2"), true, - "name:key", "type:string", "name:value", "ctas2", - "org.apache.hadoop.hive.ql.io.RCFileInputFormat", - "org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - "serde_p1=p1", "serde_p2=p2", "tbl_p1=p11", "tbl_p2=p22", "MANAGED_TABLE" - ) - - val origUseParquetDataSource = conf.parquetUseDataSourceApi - try { - setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, false) + withTable("ctas1", "ctas2", "ctas3", "ctas4", "ctas5") { + sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value").collect() + sql( + """CREATE TABLE ctas2 + | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" + | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2") + | STORED AS RCFile + | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22") + | AS + | SELECT key, value + | FROM src + | ORDER BY key, value""".stripMargin).collect() sql( - """CREATE TABLE ctas5 - | STORED AS parquet AS + """CREATE TABLE ctas3 + | ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\012' + | STORED AS textfile AS | SELECT key, value | FROM src | ORDER BY key, value""".stripMargin).collect() - checkExistence(sql("DESC EXTENDED ctas5"), true, - "name:key", "type:string", "name:value", "ctas5", - "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", - "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", - "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", - "MANAGED_TABLE" - ) + // the table schema may like (key: integer, value: string) + sql( + """CREATE TABLE IF NOT EXISTS ctas4 AS + | SELECT 1 AS key, value FROM src LIMIT 1""".stripMargin).collect() + // do nothing cause the table ctas4 already existed. + sql( + """CREATE TABLE IF NOT EXISTS ctas4 AS + | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect() - val default = convertMetastoreParquet - // use the Hive SerDe for parquet tables - sql("set spark.sql.hive.convertMetastoreParquet = false") checkAnswer( - sql("SELECT key, value FROM ctas5 ORDER BY key, value"), + sql("SELECT k, value FROM ctas1 ORDER BY k, value"), sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq) - sql(s"set spark.sql.hive.convertMetastoreParquet = $default") - } finally { - setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, origUseParquetDataSource) - } + checkAnswer( + sql("SELECT key, value FROM ctas2 ORDER BY key, value"), + sql( + """ + SELECT key, value + FROM src + ORDER BY key, value""").collect().toSeq) + checkAnswer( + sql("SELECT key, value FROM ctas3 ORDER BY key, value"), + sql( + """ + SELECT key, value + FROM src + ORDER BY key, value""").collect().toSeq) + intercept[AnalysisException] { + sql( + """CREATE TABLE ctas4 AS + | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect() + } + checkAnswer( + sql("SELECT key, value FROM ctas4 ORDER BY key, value"), + sql("SELECT key, value FROM ctas4 LIMIT 1").collect().toSeq) + + checkExistence(sql("DESC EXTENDED ctas2"), true, + "name:key", "type:string", "name:value", "ctas2", + "org.apache.hadoop.hive.ql.io.RCFileInputFormat", + "org.apache.hadoop.hive.ql.io.RCFileOutputFormat", + "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", + "serde_p1=p1", "serde_p2=p2", "tbl_p1=p11", "tbl_p2=p22", "MANAGED_TABLE" + ) + + val origUseParquetDataSource = conf.parquetUseDataSourceApi + try { + setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, false) + sql( + """CREATE TABLE ctas5 + | STORED AS parquet AS + | SELECT key, value + | FROM src + | ORDER BY key, value""".stripMargin).collect() + + checkExistence(sql("DESC EXTENDED ctas5"), true, + "name:key", "type:string", "name:value", "ctas5", + "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", + "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", + "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", + "MANAGED_TABLE" + ) + + val default = convertMetastoreParquet + // use the Hive SerDe for parquet tables + sql("set spark.sql.hive.convertMetastoreParquet = false") + checkAnswer( + sql("SELECT key, value FROM ctas5 ORDER BY key, value"), + sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq) + sql(s"set spark.sql.hive.convertMetastoreParquet = $default") + } finally { + setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, origUseParquetDataSource) + } + } } test("specifying the column list for CTAS") { - dropTable("gen__tmp") - dropTable("mytable1") - Seq((1, "111111"), (2, "222222")).toDF("key", "value").registerTempTable("mytable1") + withTable("gen__tmp", "mytable1") { + Seq((1, "111111"), (2, "222222")).toDF("key", "value").registerTempTable("mytable1") - sql("create table gen__tmp(a int, b string) as select key, value from mytable1") - checkAnswer( - sql("SELECT a, b from gen__tmp"), - sql("select key, value from mytable1").collect()) - sql("DROP TABLE gen__tmp") + sql("create table gen__tmp(a int, b string) as select key, value from mytable1") + checkAnswer( + sql("SELECT a, b from gen__tmp"), + sql("select key, value from mytable1").collect()) + sql("DROP TABLE gen__tmp") - sql("create table gen__tmp(a double, b double) as select key, value from mytable1") - checkAnswer( - sql("SELECT a, b from gen__tmp"), - sql("select cast(key as double), cast(value as double) from mytable1").collect()) - sql("DROP TABLE gen__tmp") + sql("create table gen__tmp(a double, b double) as select key, value from mytable1") + checkAnswer( + sql("SELECT a, b from gen__tmp"), + sql("select cast(key as double), cast(value as double) from mytable1").collect()) + sql("DROP TABLE gen__tmp") - sql("drop table mytable1") + sql("drop table mytable1") + } } test("command substitution") { @@ -447,26 +454,27 @@ class SQLQuerySuite extends QueryTest { } test("test CTAS") { - dropTable("test_ctas_123") - checkAnswer(sql("CREATE TABLE test_ctas_123 AS SELECT key, value FROM src"), Seq.empty[Row]) - checkAnswer( - sql("SELECT key, value FROM test_ctas_123 ORDER BY key"), - sql("SELECT key, value FROM src ORDER BY key").collect().toSeq) + withTable("test_ctas_123") { + checkAnswer(sql("CREATE TABLE test_ctas_123 AS SELECT key, value FROM src"), Seq.empty[Row]) + checkAnswer( + sql("SELECT key, value FROM test_ctas_123 ORDER BY key"), + sql("SELECT key, value FROM src ORDER BY key").collect().toSeq) + } } test("SPARK-4825 save join to table") { - dropTable("test1") - dropTable("test2") - val testData = sparkContext.parallelize(1 to 10).map(i => TestData(i, i.toString)).toDF() - sql("CREATE TABLE test1 (key INT, value STRING)") - testData.write.mode(SaveMode.Append).insertInto("test1") - sql("CREATE TABLE test2 (key INT, value STRING)") - testData.write.mode(SaveMode.Append).insertInto("test2") - testData.write.mode(SaveMode.Append).insertInto("test2") - sql("CREATE TABLE test AS SELECT COUNT(a.value) FROM test1 a JOIN test2 b ON a.key = b.key") - checkAnswer( - table("test"), - sql("SELECT COUNT(a.value) FROM test1 a JOIN test2 b ON a.key = b.key").collect().toSeq) + withTable("test1", "test2") { + val testData = sparkContext.parallelize(1 to 10).map(i => TestData(i, i.toString)).toDF() + sql("CREATE TABLE test1 (key INT, value STRING)") + testData.write.mode(SaveMode.Append).insertInto("test1") + sql("CREATE TABLE test2 (key INT, value STRING)") + testData.write.mode(SaveMode.Append).insertInto("test2") + testData.write.mode(SaveMode.Append).insertInto("test2") + sql("CREATE TABLE test AS SELECT COUNT(a.value) FROM test1 a JOIN test2 b ON a.key = b.key") + checkAnswer( + table("test"), + sql("SELECT COUNT(a.value) FROM test1 a JOIN test2 b ON a.key = b.key").collect().toSeq) + } } test("SPARK-3708 Backticks aren't handled correctly is aliases") { @@ -533,39 +541,41 @@ class SQLQuerySuite extends QueryTest { } test("SPARK-5284 Insert into Hive throws NPE when a inner complex type field has a null value") { - val schema = StructType( - StructField("s", - StructType( - StructField("innerStruct", StructType(StructField("s1", StringType, true) :: Nil)) :: - StructField("innerArray", ArrayType(IntegerType), true) :: - StructField("innerMap", MapType(StringType, IntegerType)) :: Nil), true) :: Nil) - val row = Row(Row(null, null, null)) + withTable("nullValuesInInnerComplexTypes") { + val schema = StructType( + StructField("s", + StructType( + StructField("innerStruct", StructType(StructField("s1", StringType, true) :: Nil)) :: + StructField("innerArray", ArrayType(IntegerType), true) :: + StructField("innerMap", MapType(StringType, IntegerType)) :: Nil), true) :: Nil) + val row = Row(Row(null, null, null)) - val rowRdd = sparkContext.parallelize(row :: Nil) + val rowRdd = sparkContext.parallelize(row :: Nil) - TestHive.createDataFrame(rowRdd, schema).registerTempTable("testTable") - dropTable("nullValuesInInnerComplexTypes") + TestHive.createDataFrame(rowRdd, schema).registerTempTable("testTable") - sql( - """CREATE TABLE nullValuesInInnerComplexTypes - | (s struct, - | innerArray:array, - | innerMap: map>) - """.stripMargin).collect() + sql( + """CREATE TABLE nullValuesInInnerComplexTypes + | (s struct, + | innerArray:array, + | innerMap: map>) + """.stripMargin).collect() - sql( - """ - |INSERT OVERWRITE TABLE nullValuesInInnerComplexTypes - |SELECT * FROM testTable - """.stripMargin) + sql( + """ + |INSERT OVERWRITE TABLE nullValuesInInnerComplexTypes + |SELECT * FROM testTable + """.stripMargin) - checkAnswer( - sql("SELECT * FROM nullValuesInInnerComplexTypes"), - Row(Row(null, null, null)) - ) + checkAnswer( + sql("SELECT * FROM nullValuesInInnerComplexTypes"), + Row(Row(null, null, null)) + ) + + sql("DROP TABLE nullValuesInInnerComplexTypes") + dropTempTable("testTable") + } - sql("DROP TABLE nullValuesInInnerComplexTypes") - dropTempTable("testTable") } test("SPARK-4296 Grouping field with Hive UDF as sub expression") { @@ -620,28 +630,28 @@ class SQLQuerySuite extends QueryTest { // is not in a valid state (cannot be executed). Because of this bug, the analysis rule of // PreInsertionCasts will actually start to work before ImplicitGenerate and then // generates an invalid query plan. - val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}""")) - read.json(rdd).registerTempTable("data") - val originalConf = convertCTAS - setConf(HiveContext.CONVERT_CTAS, false) - sql("DROP TABLE IF EXISTS explodeTest") - - sql("CREATE TABLE explodeTest (key bigInt)") - table("explodeTest").queryExecution.analyzed match { - case metastoreRelation: MetastoreRelation => // OK - case _ => - fail("To correctly test the fix of SPARK-5875, explodeTest should be a MetastoreRelation") - } + withTable("explodeTest") { + val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}""")) + read.json(rdd).registerTempTable("data") + val originalConf = convertCTAS + setConf(HiveContext.CONVERT_CTAS, false) + + sql("CREATE TABLE explodeTest (key bigInt)") + table("explodeTest").queryExecution.analyzed match { + case metastoreRelation: MetastoreRelation => // OK + case _ => + fail("To correctly test the fix of SPARK-5875, explodeTest should be a MetastoreRelation") + } - sql(s"INSERT OVERWRITE TABLE explodeTest SELECT explode(a) AS val FROM data") - checkAnswer( - sql("SELECT key from explodeTest"), - (1 to 5).flatMap(i => Row(i) :: Row(i + 1) :: Nil) - ) + sql(s"INSERT OVERWRITE TABLE explodeTest SELECT explode(a) AS val FROM data") + checkAnswer( + sql("SELECT key from explodeTest"), + (1 to 5).flatMap(i => Row(i) :: Row(i + 1) :: Nil) + ) - sql("DROP TABLE explodeTest") - dropTempTable("data") - setConf(HiveContext.CONVERT_CTAS, originalConf) + dropTempTable("data") + setConf(HiveContext.CONVERT_CTAS, originalConf) + } } test("sanity test for SPARK-6618") { From d0d8dc7ab2071dc665606f0bad9ad01cdbddc1b1 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 2 Jul 2015 22:06:41 +0100 Subject: [PATCH 3/3] SPARK-8789 split SQL command across two lines to keep line width down --- .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index b2932fa3a905f..be7f442dae7bd 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -235,8 +235,8 @@ class SQLQuerySuite extends QueryTest { checkRelation("ctas1", true) sql("DROP TABLE ctas1") - sql( - "CREATE TABLE ctas1 stored as sequencefile AS SELECT key k, value FROM src ORDER BY k, value") + sql("CREATE TABLE ctas1 stored as sequencefile AS " + + "SELECT key k, value FROM src ORDER BY k, value") checkRelation("ctas1", true) sql("DROP TABLE ctas1")