From 2e7fdb9eb01a0b618114b00518ae3469f52e0e8e Mon Sep 17 00:00:00 2001 From: Sameer Agarwal Date: Tue, 29 Mar 2016 12:14:10 -0700 Subject: [PATCH 1/3] [SPARK-14225][SQL] Cap the length of toCommentSafeString at 128 chars --- .../sql/catalyst/expressions/codegen/CodeFormatter.scala | 5 ++++- .../org/apache/spark/sql/catalyst/util/package.scala | 9 ++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index 9d99bbffbe13e..cb68da46d9665 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -49,8 +49,11 @@ private class CodeFormatter { private var currentLine = 1 private def addLine(line: String): Unit = { - val indentChange = + val indentChange = if (line.startsWith("/*") || line.startsWith("//")) { + 0 // comments shouldn't affect indentation of subsequent lines + } else { line.count(c => "({".indexOf(c) >= 0) - line.count(c => ")}".indexOf(c) >= 0) + } val newIndentLevel = math.max(0, indentLevel + indentChange) // Lines starting with '}' should be de-indented even if they contain '{' after; // in addition, lines ending with ':' are typically labels diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala index b11365b297184..f879b34358a9b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala @@ -155,10 +155,13 @@ package object util { /** * Returns the string representation of this expression that is safe to be put in - * code comments of generated code. + * code comments of generated code. The length is capped at 128 characters. */ - def toCommentSafeString(str: String): String = - str.replace("*/", "\\*\\/").replace("\\u", "\\\\u") + def toCommentSafeString(str: String): String = { + val len = math.min(str.length, 128) + val suffix = if (str.length > len) "..." else "" + str.substring(0, len).replace("*/", "\\*\\/").replace("\\u", "\\\\u") + suffix + } /* FIX ME implicit class debugLogging(a: Any) { From 846414959f889bc6293d4d4786da01b762de556d Mon Sep 17 00:00:00 2001 From: Sameer Agarwal Date: Tue, 29 Mar 2016 14:18:21 -0700 Subject: [PATCH 2/3] support formatting multi-line comments --- .../expressions/codegen/CodeFormatter.scala | 33 ++++++++++++--- .../codegen/CodeFormatterSuite.scala | 41 ++++++++++++++++++- 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index cb68da46d9665..c7e3058711872 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -43,18 +43,41 @@ object CodeFormatter { private class CodeFormatter { private val code = new StringBuilder - private var indentLevel = 0 private val indentSize = 2 + + // Tracks the level of indentation in the current line. + private var indentLevel = 0 private var indentString = "" private var currentLine = 1 + // Tracks the level of indentation in multi-line comment blocks. + private var inCommentBlock = false + private var indentLevelOutsideCommentBlock = indentLevel + private def addLine(line: String): Unit = { - val indentChange = if (line.startsWith("/*") || line.startsWith("//")) { - 0 // comments shouldn't affect indentation of subsequent lines + + val indentChange = line.count(c => "({".indexOf(c) >= 0) - line.count(c => ")}".indexOf(c) >= 0) + var newIndentLevel = math.max(0, indentLevel + indentChange) + + // Please note that while we try to format the comment blocks in exactly the same way as the + // rest of the code, once the block ends, we reset the next line's indentation level to what it + // was immediately before entering the comment block. + if (!inCommentBlock) { + if (line.startsWith("/*")) { + // Handle multi-line comments + inCommentBlock = true + indentLevelOutsideCommentBlock = indentLevel + } else if (line.startsWith("//")) { + // Handle single line comments + newIndentLevel = indentLevel + } } else { - line.count(c => "({".indexOf(c) >= 0) - line.count(c => ")}".indexOf(c) >= 0) + if (line.endsWith("*/")) { + inCommentBlock = false + newIndentLevel = indentLevelOutsideCommentBlock + } } - val newIndentLevel = math.max(0, indentLevel + indentChange) + // Lines starting with '}' should be de-indented even if they contain '{' after; // in addition, lines ending with ':' are typically labels val thisLineIndent = if (line.startsWith("}") || line.startsWith(")") || line.endsWith(":")) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala index 9da1068e9ca1d..8973cbfb9c4dd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -18,13 +18,20 @@ package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.util._ class CodeFormatterSuite extends SparkFunSuite { def testCase(name: String)(input: String)(expected: String): Unit = { test(name) { - assert(CodeFormatter.format(input).trim === expected.trim) + if (CodeFormatter.format(input).trim !== expected.trim) { + fail( + s""" + |== FAIL: Formatted code doesn't match === + |${sideBySide(CodeFormatter.format(input).trim, expected.trim).mkString("\n")} + """.stripMargin) + } } } @@ -93,4 +100,36 @@ class CodeFormatterSuite extends SparkFunSuite { |/* 004 */ c) """.stripMargin } + + testCase("single line comments") { + """// This is a comment about class A { { { ( ( + |class A { + |class body; + |}""".stripMargin + }{ + """ + |/* 001 */ // This is a comment about class A { { { ( ( + |/* 002 */ class A { + |/* 003 */ class body; + |/* 004 */ } + """.stripMargin + } + + testCase("multi-line comments") { + """/* This is a comment about + |class A { + |class body; ...*/ + |class A { + |class body; + |}""".stripMargin + }{ + """ + |/* 001 */ /* This is a comment about + |/* 002 */ class A { + |/* 003 */ class body; ...*/ + |/* 004 */ class A { + |/* 005 */ class body; + |/* 006 */ } + """.stripMargin + } } From 978e932a351376a5d315b005653096971444de0f Mon Sep 17 00:00:00 2001 From: Sameer Agarwal Date: Tue, 29 Mar 2016 14:59:58 -0700 Subject: [PATCH 3/3] PR --- .../spark/sql/catalyst/expressions/codegen/CodeFormatter.scala | 3 +++ .../sql/catalyst/expressions/codegen/CodeFormatterSuite.scala | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index c7e3058711872..8e40754dc3270 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -56,6 +56,9 @@ private class CodeFormatter { private def addLine(line: String): Unit = { + // We currently infer the level of indentation of a given line based on a simple heuristic that + // examines the number of parenthesis and braces in that line. This isn't the most robust + // implementation but works for all code that we generate. val indentChange = line.count(c => "({".indexOf(c) >= 0) - line.count(c => ")}".indexOf(c) >= 0) var newIndentLevel = math.max(0, indentLevel + indentChange) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala index 8973cbfb9c4dd..d7836aa3b2bd6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -116,7 +116,7 @@ class CodeFormatterSuite extends SparkFunSuite { } testCase("multi-line comments") { - """/* This is a comment about + """ /* This is a comment about |class A { |class body; ...*/ |class A {