Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-14591] [SQL] Remove DataTypeParser and add more keywords to the nonReserved list. #12796

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import javax.annotation.Nullable
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.catalyst.parser.DataTypeParser
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}


Expand Down Expand Up @@ -189,7 +189,7 @@ case class SimpleCatalogRelation(
(cols ++ catalogTable.partitionColumns).map { f =>
AttributeReference(
f.name,
DataTypeParser.parse(f.dataType),
CatalystSqlParser.parseDataType(f.dataType),
// Since data can be dumped in randomly with no validation, everything is nullable.
nullable = true
)(qualifier = Some(alias.getOrElse(metadata.identifier.table)))
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import org.json4s.JsonDSL._
import org.apache.spark.SparkException
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering}
import org.apache.spark.sql.catalyst.parser.{DataTypeParser, LegacyTypeStringParser}
import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, LegacyTypeStringParser}
import org.apache.spark.sql.catalyst.util.quoteIdentifier

/**
Expand Down Expand Up @@ -169,7 +169,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
* }}}
*/
def add(name: String, dataType: String): StructType = {
add(name, DataTypeParser.parse(dataType), nullable = true, Metadata.empty)
add(name, CatalystSqlParser.parseDataType(dataType), nullable = true, Metadata.empty)
}

/**
Expand All @@ -184,7 +184,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
* }}}
*/
def add(name: String, dataType: String, nullable: Boolean): StructType = {
add(name, DataTypeParser.parse(dataType), nullable, Metadata.empty)
add(name, CatalystSqlParser.parseDataType(dataType), nullable, Metadata.empty)
}

/**
Expand All @@ -202,7 +202,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
dataType: String,
nullable: Boolean,
metadata: Metadata): StructType = {
add(name, DataTypeParser.parse(dataType), nullable, metadata)
add(name, CatalystSqlParser.parseDataType(dataType), nullable, metadata)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,18 @@ package org.apache.spark.sql.catalyst.parser
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types._

abstract class AbstractDataTypeParserSuite extends SparkFunSuite {
class CatalystQlDataTypeParserSuite extends SparkFunSuite {

def parse(sql: String): DataType
def parse(sql: String): DataType = CatalystSqlParser.parseDataType(sql)

def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = {
test(s"parse ${dataTypeString.replace("\n", "")}") {
assert(parse(dataTypeString) === expectedDataType)
}
}

def intercept(sql: String)
def intercept(sql: String): Unit =
intercept[ParseException](CatalystSqlParser.parseDataType(sql))

def unsupported(dataTypeString: String): Unit = {
test(s"$dataTypeString is not supported") {
Expand Down Expand Up @@ -115,38 +116,16 @@ abstract class AbstractDataTypeParserSuite extends SparkFunSuite {
unsupported("it is not a data type")
unsupported("struct<x+y: int, 1.1:timestamp>")
unsupported("struct<x: int")
}

class DataTypeParserSuite extends AbstractDataTypeParserSuite {
override def intercept(sql: String): Unit =
intercept[DataTypeException](DataTypeParser.parse(sql))

override def parse(sql: String): DataType =
DataTypeParser.parse(sql)

// A column name can be a reserved word in our DDL parser and SqlParser.
// DataType parser accepts certain reserved keywords.
checkDataType(
"Struct<TABLE: string, CASE:boolean>",
"Struct<TABLE: string, DATE:boolean>",
StructType(
StructField("TABLE", StringType, true) ::
StructField("CASE", BooleanType, true) :: Nil)
StructField("DATE", BooleanType, true) :: Nil)
)

unsupported("struct<x int, y string>")

unsupported("struct<`x``y` int>")
}

class CatalystQlDataTypeParserSuite extends AbstractDataTypeParserSuite {
override def intercept(sql: String): Unit =
intercept[ParseException](CatalystSqlParser.parseDataType(sql))

override def parse(sql: String): DataType =
CatalystSqlParser.parseDataType(sql)

// A column name can be a reserved word in our DDL parser and SqlParser.
unsupported("Struct<TABLE: string, CASE:boolean>")

// Define struct columns without ':'
checkDataType(
"struct<x int, y string>",
(new StructType).add("x", IntegerType).add("y", StringType))
Expand Down
4 changes: 2 additions & 2 deletions sql/core/src/main/scala/org/apache/spark/sql/Column.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.parser.DataTypeParser
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.catalyst.util.usePrettyExpression
import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
import org.apache.spark.sql.functions.lit
Expand Down Expand Up @@ -979,7 +979,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* @group expr_ops
* @since 1.3.0
*/
def cast(to: String): Column = cast(DataTypeParser.parse(to))
def cast(to: String): Column = cast(CatalystSqlParser.parseDataType(to))

/**
* Returns an ordering used in sorting.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.expressions.{AttributeMap, AttributeReference, Expression}
import org.apache.spark.sql.catalyst.parser.DataTypeParser
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
import org.apache.spark.sql.execution.FileRelation
import org.apache.spark.sql.hive.client.HiveClient
Expand Down Expand Up @@ -188,7 +188,7 @@ private[hive] case class MetastoreRelation(
implicit class SchemaAttribute(f: CatalogColumn) {
def toAttribute: AttributeReference = AttributeReference(
f.name,
DataTypeParser.parse(f.dataType),
CatalystSqlParser.parseDataType(f.dataType),
// Since data can be dumped in randomly with no validation, everything is nullable.
nullable = true
)(qualifier = Some(alias.getOrElse(tableName)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.parser.DataTypeParser
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.types.StructType

private[orc] object OrcFileOperator extends Logging {
Expand Down Expand Up @@ -78,7 +78,7 @@ private[orc] object OrcFileOperator extends Logging {
val readerInspector = reader.getObjectInspector.asInstanceOf[StructObjectInspector]
val schema = readerInspector.getTypeName
logDebug(s"Reading schema from file $paths, got Hive schema string: $schema")
DataTypeParser.parse(schema).asInstanceOf[StructType]
CatalystSqlParser.parseDataType(schema).asInstanceOf[StructType]
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import java.io.File
import org.apache.spark.sql.{QueryTest, Row, SaveMode}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTableType
import org.apache.spark.sql.catalyst.parser.DataTypeParser
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.{ExamplePointUDT, SQLTestUtils}
Expand All @@ -33,7 +33,7 @@ class HiveMetastoreCatalogSuite extends TestHiveSingleton {

test("struct field should accept underscore in sub-column name") {
val hiveTypeStr = "struct<a: int, b_1: string, c: string>"
val dateType = DataTypeParser.parse(hiveTypeStr)
val dateType = CatalystSqlParser.parseDataType(hiveTypeStr)
assert(dateType.isInstanceOf[StructType])
}

Expand Down