From 2fa0aa69e92b69ab9433e5ced4e6fd1b4047cb70 Mon Sep 17 00:00:00 2001 From: "zhichao.li" Date: Fri, 18 Sep 2015 15:13:31 +0800 Subject: [PATCH 1/3] fix selection fails when a column has special characters --- .../main/scala/org/apache/spark/sql/DataFrame.scala | 4 ++-- .../spark/sql/hive/execution/SQLQuerySuite.scala | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 3e61123c145cd..6023af0e1557c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -648,7 +648,7 @@ class DataFrame private[sql]( */ def col(colName: String): Column = colName match { case "*" => - Column(ResolvedStar(schema.fieldNames.map(resolve))) + Column(ResolvedStar(schema.fieldNames.map(name => resolve(s"`$name`")))) case _ => val expr = resolve(colName) Column(expr) @@ -1181,7 +1181,7 @@ class DataFrame private[sql]( if (shouldRename) { val colNames = schema.map { field => val name = field.name - if (resolver(name, existingName)) Column(name).as(newName) else Column(name) + if (resolver(name, existingName)) Column(s"`$name`").as(newName) else Column(s"`$name`") } select(colNames : _*) } else { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 8126d02335217..663e51872dcb2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1116,6 +1116,18 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { checkAnswer(sql("SELECT a.`c.b`, `b.$q`[0].`a@!.q`, `q.w`.`w.i&`[0] FROM t"), Row(1, 1, 1)) } + test("SPARK-10656: select(df(*)) fails when a column has special characters") { + // user should backtick the name if the column with special character '.' + // otherwise it's hard to differentiate the intention of "a.b" and "g.f" + val df = sqlContext.read.json(sqlContext.sparkContext.makeRDD( + """{"a.b": 10, "d": 11, "f": {"g": 12} }""" :: Nil)) + checkAnswer(df.select("f.g"), Row(12)) + checkAnswer(df.select("`a.b`"), Row(10)) + checkAnswer(df.select("*"), Row(10, 11, Row(12))) + checkAnswer(df.withColumnRenamed("f", "h").select("h"), Row(Row(12))) + checkAnswer(df.withColumnRenamed("f", "h").select("`a.b`"), Row(10)) + } + test("Convert hive interval term into Literal of CalendarIntervalType") { checkAnswer(sql("select interval '10-9' year to month"), Row(CalendarInterval.fromString("interval 10 years 9 months"))) From d360b07b673a72daebb82b10deed91f5c4fdb420 Mon Sep 17 00:00:00 2001 From: "zhichao.li" Date: Tue, 22 Sep 2015 09:45:44 +0800 Subject: [PATCH 2/3] add more unittest --- .../src/main/scala/org/apache/spark/sql/DataFrame.scala | 6 ++++-- .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 6023af0e1557c..39d91b1c89bfc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -1177,11 +1177,13 @@ class DataFrame private[sql]( */ def withColumnRenamed(existingName: String, newName: String): DataFrame = { val resolver = sqlContext.analyzer.resolver - val shouldRename = schema.exists(f => resolver(f.name, existingName)) + // use parseAttributeName here to drop backtick if it exists in the user input existingName. + val resolvedExistingName = UnresolvedAttribute.parseAttributeName(existingName).iterator.next() + val shouldRename = schema.exists(f => resolver(f.name, resolvedExistingName)) if (shouldRename) { val colNames = schema.map { field => val name = field.name - if (resolver(name, existingName)) Column(s"`$name`").as(newName) else Column(s"`$name`") + if (resolver(name, resolvedExistingName)) Column(s"`$name`").as(newName) else Column(s"`$name`") } select(colNames : _*) } else { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 663e51872dcb2..e38ff3d98d47c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1123,8 +1123,10 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { """{"a.b": 10, "d": 11, "f": {"g": 12} }""" :: Nil)) checkAnswer(df.select("f.g"), Row(12)) checkAnswer(df.select("`a.b`"), Row(10)) - checkAnswer(df.select("*"), Row(10, 11, Row(12))) + checkAnswer(df.select(df("*")), Row(10, 11, Row(12))) checkAnswer(df.withColumnRenamed("f", "h").select("h"), Row(Row(12))) + checkAnswer(df.withColumnRenamed("f", "f").select("f"), Row(Row(12))) + checkAnswer(df.withColumnRenamed("`a.b`", "s").select("s"), Row(10)) checkAnswer(df.withColumnRenamed("f", "h").select("`a.b`"), Row(10)) } From 626a82b08fd1545329add7c761771a6c2b3c8b17 Mon Sep 17 00:00:00 2001 From: "zhichao.li" Date: Tue, 22 Sep 2015 12:20:20 +0800 Subject: [PATCH 3/3] style --- sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 39d91b1c89bfc..0a15d98ec75fc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -1183,7 +1183,8 @@ class DataFrame private[sql]( if (shouldRename) { val colNames = schema.map { field => val name = field.name - if (resolver(name, resolvedExistingName)) Column(s"`$name`").as(newName) else Column(s"`$name`") + if (resolver(name, resolvedExistingName)) Column(s"`$name`").as(newName) + else Column(s"`$name`") } select(colNames : _*) } else {