apache · viirya · Jan 12, 2016 · Jan 13, 2016 · Jan 18, 2016 · Jan 18, 2016
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
@@ -153,6 +153,11 @@ object MimaExcludes {
         ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.Logging.org$apache$spark$streaming$flume$sink$Logging$$_log_="),
         ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.TransactionProcessor.org$apache$spark$streaming$flume$sink$Logging$$log_"),
         ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.TransactionProcessor.org$apache$spark$streaming$flume$sink$Logging$$log__=")
+      ) ++ Seq(
+        // SPARK-12689 Migrate DDL parsing to the newly absorbed parser
+        ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.execution.datasources.DDLParser"),
+        ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.execution.datasources.DDLException"),
+        ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.ddlParser")
       ) ++ Seq(
         // SPARK-7799 Add "streaming-akka" project
         ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.zeromq.ZeroMQUtils.createStream"),

diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g
@@ -493,6 +493,16 @@ descFuncNames
     | functionIdentifier
     ;
 
+//We are allowed to use From and To in CreateTableUsing command's options (actually seems we can use any string as the option key). But we can't simply add them into nonReserved because by doing that we mess other existing rules. So we create a looseIdentifier and looseNonReserved here.
+looseIdentifier
+    :
+    Identifier
+    | looseNonReserved -> Identifier[$looseNonReserved.text]
+    // If it decides to support SQL11 reserved keywords, i.e., useSQL11ReservedKeywordsForIdentifier()=false,
+    // the sql11keywords in existing q tests will NOT be added back.
+    | {useSQL11ReservedKeywordsForIdentifier()}? sql11ReservedKeywordsUsedAsIdentifier -> Identifier[$sql11ReservedKeywordsUsedAsIdentifier.text]
+    ;
+
 identifier
     :
     Identifier
@@ -518,6 +528,10 @@ principalIdentifier
     | QuotedIdentifier
     ;
 
+looseNonReserved
+    : nonReserved | KW_FROM | KW_TO
+    ;
+
 //The new version of nonReserved + sql11ReservedKeywordsUsedAsIdentifier = old version of nonReserved
 //Non reserved keywords are basically the keywords that can be used as identifiers.
 //All the KW_* are automatically not only keywords, but also reserved keywords.

diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
@@ -324,6 +324,8 @@ KW_ISOLATION: 'ISOLATION';
 KW_LEVEL: 'LEVEL';
 KW_SNAPSHOT: 'SNAPSHOT';
 KW_AUTOCOMMIT: 'AUTOCOMMIT';
+KW_REFRESH: 'REFRESH';
+KW_OPTIONS: 'OPTIONS';
 KW_WEEK: 'WEEK'|'WEEKS';
 KW_MILLISECOND: 'MILLISECOND'|'MILLISECONDS';
 KW_MICROSECOND: 'MICROSECOND'|'MICROSECONDS';
@@ -465,7 +467,7 @@ Identifier
 fragment    
 QuotedIdentifier 
     :
-    '`'  ( '``' | ~('`') )* '`' { setText(getText().substring(1, getText().length() -1 ).replaceAll("``", "`")); }
+    '`'  ( '``' | ~('`') )* '`' { setText(getText().replaceAll("``", "`")); }
     ;
 
 WS  :  (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;}

diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
@@ -142,6 +142,7 @@ TOK_UNIONTYPE;
 TOK_COLTYPELIST;
 TOK_CREATEDATABASE;
 TOK_CREATETABLE;
+TOK_CREATETABLEUSING;
 TOK_TRUNCATETABLE;
 TOK_CREATEINDEX;
 TOK_CREATEINDEX_INDEXTBLNAME;
@@ -371,6 +372,10 @@ TOK_TXN_READ_WRITE;
 TOK_COMMIT;
 TOK_ROLLBACK;
 TOK_SET_AUTOCOMMIT;
+TOK_REFRESHTABLE;
+TOK_TABLEPROVIDER;
+TOK_TABLEOPTIONS;
+TOK_TABLEOPTION;
 }
 
 
@@ -648,6 +653,12 @@ import java.util.HashMap;
   }
   private char [] excludedCharForColumnName = {'.', ':'};
   private boolean containExcludedCharForCreateTableColumnName(String input) {
+    if (input.length() > 0) {
+      if (input.charAt(0) == '`' && input.charAt(input.length() - 1) == '`') {
+        // When column name is backquoted, we don't care about excluded chars.
+        return false;
+      }
+    }
     for(char c : excludedCharForColumnName) {
       if(input.indexOf(c)>-1) {
         return true;
@@ -764,6 +775,7 @@ ddlStatement
     | truncateTableStatement
     | alterStatement
     | descStatement
+    | refreshStatement
     | showStatement
     | metastoreCheck
     | createViewStatement
@@ -890,12 +902,31 @@ createTableStatement
 @init { pushMsg("create table statement", state); }
 @after { popMsg(state); }
     : KW_CREATE (temp=KW_TEMPORARY)? (ext=KW_EXTERNAL)? KW_TABLE ifNotExists? name=tableName
-      (  like=KW_LIKE likeName=tableName
+      (
+         like=KW_LIKE likeName=tableName
          tableRowFormat?
          tableFileFormat?
          tableLocation?
          tablePropertiesPrefixed?
+      -> ^(TOK_CREATETABLE $name $temp? $ext? ifNotExists?
+         ^(TOK_LIKETABLE $likeName?)
+         tableRowFormat?
+         tableFileFormat?
+         tableLocation?
+         tablePropertiesPrefixed?
+         )
+      |
+         tableProvider
+         tableOpts?
+         (KW_AS selectStatementWithCTE)?
+      -> ^(TOK_CREATETABLEUSING $name $temp? ifNotExists?
+          tableProvider
+          tableOpts?
+          selectStatementWithCTE?
+          )
        | (LPAREN columnNameTypeList RPAREN)?
+         (p=tableProvider?)
+         tableOpts?
          tableComment?
          tablePartition?
          tableBuckets?
@@ -905,8 +936,15 @@ createTableStatement
          tableLocation?
          tablePropertiesPrefixed?
          (KW_AS selectStatementWithCTE)?
-      )
-    -> ^(TOK_CREATETABLE $name $temp? $ext? ifNotExists?
+      -> {p != null}?
+         ^(TOK_CREATETABLEUSING $name $temp? ifNotExists?
+         columnNameTypeList?
+         $p
+         tableOpts?
+         selectStatementWithCTE?
+         )
+      ->
+         ^(TOK_CREATETABLE $name $temp? $ext? ifNotExists?
          ^(TOK_LIKETABLE $likeName?)
          columnNameTypeList?
          tableComment?
@@ -918,7 +956,8 @@ createTableStatement
          tableLocation?
          tablePropertiesPrefixed?
          selectStatementWithCTE?
-        )
+         )
+      )
     ;
 
 truncateTableStatement
@@ -1362,6 +1401,13 @@ tabPartColTypeExpr
     :  tableName partitionSpec? extColumnName? -> ^(TOK_TABTYPE tableName partitionSpec? extColumnName?)
     ;
 
+refreshStatement
+@init { pushMsg("refresh statement", state); }
+@after { popMsg(state); }
+    :
+    KW_REFRESH KW_TABLE tableName -> ^(TOK_REFRESHTABLE tableName)
+    ;
+
 descStatement
 @init { pushMsg("describe statement", state); }
 @after { popMsg(state); }
@@ -1757,6 +1803,30 @@ showStmtIdentifier
     | StringLiteral
     ;
 
+tableProvider
+@init { pushMsg("table's provider", state); }
+@after { popMsg(state); }
+    :
+      KW_USING Identifier (DOT Identifier)*
+    -> ^(TOK_TABLEPROVIDER Identifier+)
+    ;
+
+optionKeyValue
+@init { pushMsg("table's option specification", state); }
+@after { popMsg(state); }
+    :
+       (looseIdentifier (DOT looseIdentifier)*) StringLiteral
+    -> ^(TOK_TABLEOPTION looseIdentifier+ StringLiteral)
+    ;
+
+tableOpts
+@init { pushMsg("table's options", state); }
+@after { popMsg(state); }
+    :
+      KW_OPTIONS LPAREN optionKeyValue (COMMA optionKeyValue)* RPAREN
+    -> ^(TOK_TABLEOPTIONS optionKeyValue+)
+    ;
+
 tableComment
 @init { pushMsg("table's comment", state); }
 @after { popMsg(state); }
@@ -2115,7 +2185,7 @@ structType
 mapType
 @init { pushMsg("map type", state); }
 @after { popMsg(state); }
-    : KW_MAP LESSTHAN left=primitiveType COMMA right=type GREATERTHAN
+    : KW_MAP LESSTHAN left=type COMMA right=type GREATERTHAN
     -> ^(TOK_MAP $left $right)
     ;
 

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
@@ -140,6 +140,7 @@ private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends
     case Token("TOK_BOOLEAN", Nil) => BooleanType
     case Token("TOK_STRING", Nil) => StringType
     case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
+    case Token("TOK_CHAR", Token(_, Nil) :: Nil) => StringType
     case Token("TOK_FLOAT", Nil) => FloatType
     case Token("TOK_DOUBLE", Nil) => DoubleType
     case Token("TOK_DATE", Nil) => DateType
@@ -156,9 +157,10 @@ private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends
 
   protected def nodeToStructField(node: ASTNode): StructField = node match {
     case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: Nil) =>
-      StructField(fieldName, nodeToDataType(dataType), nullable = true)
-    case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: _ /* comment */:: Nil) =>
-      StructField(fieldName, nodeToDataType(dataType), nullable = true)
+      StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true)
+    case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: comment :: Nil) =>
+      val meta = new MetadataBuilder().putString("comment", unquoteString(comment.text)).build()
+      StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true, meta)
     case _ =>
       noParseRule("StructField", node)
   }
@@ -633,15 +635,15 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
       nodeToExpr(qualifier) match {
         case UnresolvedAttribute(nameParts) =>
           UnresolvedAttribute(nameParts :+ cleanIdentifier(attr))
-        case other => UnresolvedExtractValue(other, Literal(attr))
+        case other => UnresolvedExtractValue(other, Literal(cleanIdentifier(attr)))
       }
 
     /* Stars (*) */
     case Token("TOK_ALLCOLREF", Nil) => UnresolvedStar(None)
     // The format of dbName.tableName.* cannot be parsed by HiveParser. TOK_TABNAME will only
     // has a single child which is tableName.
     case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", target) :: Nil) if target.nonEmpty =>
-      UnresolvedStar(Some(target.map(_.text)))
+      UnresolvedStar(Some(target.map(x => cleanIdentifier(x.text))))
 
     /* Aggregate Functions */
     case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) =>
@@ -949,7 +951,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
   protected def nodeToGenerate(node: ASTNode, outer: Boolean, child: LogicalPlan): Generate = {
     val Token("TOK_SELECT", Token("TOK_SELEXPR", clauses) :: Nil) = node
 
-    val alias = getClause("TOK_TABALIAS", clauses).children.head.text
+    val alias = cleanIdentifier(getClause("TOK_TABALIAS", clauses).children.head.text)
 
     val generator = clauses.head match {
       case Token("TOK_FUNCTION", Token(explode(), Nil) :: childNode :: Nil) =>

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -205,10 +205,7 @@ class SQLContext private[sql](
   @transient
   protected[sql] val sqlParser: ParserInterface = new SparkSQLParser(new SparkQl(conf))
 
-  @transient
-  protected[sql] val ddlParser: DDLParser = new DDLParser(sqlParser)
-
-  protected[sql] def parseSql(sql: String): LogicalPlan = ddlParser.parse(sql, false)
+  protected[sql] def parseSql(sql: String): LogicalPlan = sqlParser.parsePlan(sql)
 
   protected[sql] def executeSql(sql: String):
     org.apache.spark.sql.execution.QueryExecution = executePlan(parseSql(sql))