From 91763fd5b94f0d11ae0ccdfca91d6ccade4ebca2 Mon Sep 17 00:00:00 2001 From: animesh Date: Fri, 26 Jun 2015 11:16:44 +0530 Subject: [PATCH 1/2] 8621: Enclose row names and column names in " " --- .../org/apache/spark/sql/execution/stat/StatFunctions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala index 93383e5a62f11..e8c48cfe1623f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala @@ -123,10 +123,10 @@ private[sql] object StatFunctions extends Logging { countsRow.setLong(distinctCol2.get(row.get(1)).get + 1, row.getLong(2)) } // the value of col1 is the first value, the rest are the counts - countsRow.setString(0, col1Item.toString) + countsRow.setString(0, "\"" + col1Item.toString + "\"") countsRow }.toSeq - val headerNames = distinctCol2.map(r => StructField(r._1.toString, LongType)).toSeq + val headerNames = distinctCol2.map(r => StructField("\"" + r._1.toString + "\"", LongType)).toSeq val schema = StructType(StructField(tableName, StringType) +: headerNames) new DataFrame(df.sqlContext, LocalRelation(schema.toAttributes, table)).na.fill(0.0) From 8cd66fc202503a5383d9341a7219e15fc4df7257 Mon Sep 17 00:00:00 2001 From: animesh Date: Tue, 30 Jun 2015 13:14:16 +0530 Subject: [PATCH 2/2] SPARK-8621: Support empty string Analysis --- .../catalyst/plans/logical/LogicalPlan.scala | 50 +++++++++++-------- .../sql/execution/stat/StatFunctions.scala | 4 +- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index b009a200b920f..b43bee1f2ae0e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -146,32 +146,40 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { val nameParts = scala.collection.mutable.ArrayBuffer.empty[String] val tmp = scala.collection.mutable.ArrayBuffer.empty[Char] var inBacktick = false - var i = 0 - while (i < name.length) { - val char = name(i) - if (inBacktick) { - if (char == '`') { - inBacktick = false - if (i + 1 < name.length && name(i + 1) != '.') throw e - } else { - tmp += char - } - } else { - if (char == '`') { - if (tmp.nonEmpty) throw e - inBacktick = true - } else if (char == '.') { - if (tmp.isEmpty) throw e - nameParts += tmp.mkString - tmp.clear() + if (name.length == 0) { + tmp += '\0' + } + else { + var i = 0 + while (i < name.length) { + val char = name(i) + if (inBacktick) { + if (char == '`') { + inBacktick = false + if (name(i - 1) == '`') tmp += '\0' + if (i + 1 < name.length && name(i + 1) != '.') throw e + } else { + tmp += char + } } else { - tmp += char + if (char == '`') { + if (tmp.nonEmpty) throw e + inBacktick = true + } else if (char == '.') { + if (tmp.isEmpty) throw e + if (tmp == scala.collection.mutable.ArrayBuffer('\0')) nameParts += "" + else nameParts += tmp.mkString + tmp.clear() + } else { + tmp += char + } } + i += 1 } - i += 1 } if (tmp.isEmpty || inBacktick) throw e - nameParts += tmp.mkString + if (tmp == scala.collection.mutable.ArrayBuffer('\0')) nameParts += "" + else nameParts += tmp.mkString nameParts.toSeq } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala index e8c48cfe1623f..93383e5a62f11 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala @@ -123,10 +123,10 @@ private[sql] object StatFunctions extends Logging { countsRow.setLong(distinctCol2.get(row.get(1)).get + 1, row.getLong(2)) } // the value of col1 is the first value, the rest are the counts - countsRow.setString(0, "\"" + col1Item.toString + "\"") + countsRow.setString(0, col1Item.toString) countsRow }.toSeq - val headerNames = distinctCol2.map(r => StructField("\"" + r._1.toString + "\"", LongType)).toSeq + val headerNames = distinctCol2.map(r => StructField(r._1.toString, LongType)).toSeq val schema = StructType(StructField(tableName, StringType) +: headerNames) new DataFrame(df.sqlContext, LocalRelation(schema.toAttributes, table)).na.fill(0.0)