Skip to content

Commit

Permalink
[SPARK-5123] Expose only one version of the data type APIs (i.e. remo…
Browse files Browse the repository at this point in the history
…ve the Java-specific API).
  • Loading branch information
rxin committed Jan 13, 2015
1 parent 14e3f11 commit 66505cc
Show file tree
Hide file tree
Showing 160 changed files with 756 additions and 2,096 deletions.
5 changes: 3 additions & 2 deletions mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ import scala.collection.mutable.ListBuffer

import org.apache.spark.Logging
import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml.param.{Params, Param, ParamMap}
import org.apache.spark.sql.{SchemaRDD, StructType}
import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.sql.SchemaRDD
import org.apache.spark.sql.types.StructType

/**
* :: AlphaComponent ::
Expand Down
2 changes: 1 addition & 1 deletion mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import org.apache.spark.sql.SchemaRDD
import org.apache.spark.sql.api.java.JavaSchemaRDD
import org.apache.spark.sql.catalyst.analysis.Star
import org.apache.spark.sql.catalyst.expressions.ScalaUdf
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._

/**
* :: AlphaComponent ::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.analysis.Star
import org.apache.spark.sql.catalyst.dsl._
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
import org.apache.spark.storage.StorageLevel

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml._
import org.apache.spark.ml.param._
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.sql.{DoubleType, Row, SchemaRDD}
import org.apache.spark.sql.{Row, SchemaRDD}
import org.apache.spark.sql.types.DoubleType

/**
* :: AlphaComponent ::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param.{IntParam, ParamMap}
import org.apache.spark.mllib.feature
import org.apache.spark.mllib.linalg.{VectorUDT, Vector}
import org.apache.spark.sql.catalyst.types.DataType
import org.apache.spark.sql.types.DataType

/**
* :: AlphaComponent ::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.analysis.Star
import org.apache.spark.sql.catalyst.dsl._
import org.apache.spark.sql.types.{StructField, StructType}

/**
* Params for [[StandardScaler]] and [[StandardScalerModel]].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.sql.{DataType, StringType, ArrayType}
import org.apache.spark.sql.types.{DataType, StringType, ArrayType}

/**
* :: AlphaComponent ::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml._
import org.apache.spark.ml.param.{IntParam, Param, ParamMap, Params}
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.{SchemaRDD, StructType}
import org.apache.spark.sql.SchemaRDD
import org.apache.spark.sql.types.StructType

/**
* Params for [[CrossValidator]] and [[CrossValidatorModel]].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}

import org.apache.spark.SparkException
import org.apache.spark.mllib.util.NumericParser
import org.apache.spark.sql.catalyst.annotation.SQLUserDefinedType
import org.apache.spark.sql.catalyst.expressions.{GenericMutableRow, Row}
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._

/**
* Represents a numeric vector, whose index type is Int and value type is Double.
Expand Down
12 changes: 12 additions & 0 deletions project/MimaExcludes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,18 @@ object MimaExcludes {
ProblemFilters.exclude[IncompatibleResultTypeProblem](
"org.apache.spark.streaming.flume.sink.SparkAvroCallbackHandler." +
"removeAndGetProcessor")
) ++ Seq(
// SPARK-5123 (SparkSQL data type change) - alpha component only
ProblemFilters.exclude[IncompatibleResultTypeProblem](
"org.apache.spark.ml.feature.HashingTF.outputDataType"),
ProblemFilters.exclude[IncompatibleResultTypeProblem](
"org.apache.spark.ml.feature.Tokenizer.outputDataType"),
ProblemFilters.exclude[IncompatibleMethTypeProblem](
"org.apache.spark.ml.feature.Tokenizer.validateInputType"),
ProblemFilters.exclude[IncompatibleMethTypeProblem](
"org.apache.spark.ml.classification.LogisticRegressionModel.validateAndTransformSchema"),
ProblemFilters.exclude[IncompatibleMethTypeProblem](
"org.apache.spark.ml.classification.LogisticRegression.validateAndTransformSchema")
)

case v if v.startsWith("1.2") =>
Expand Down
4 changes: 2 additions & 2 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -254,10 +254,10 @@ object SQL {
|import org.apache.spark.sql.catalyst.expressions._
|import org.apache.spark.sql.catalyst.plans.logical._
|import org.apache.spark.sql.catalyst.rules._
|import org.apache.spark.sql.catalyst.types._
|import org.apache.spark.sql.catalyst.util._
|import org.apache.spark.sql.execution
|import org.apache.spark.sql.test.TestSQLContext._
|import org.apache.spark.sql.types._
|import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin,
cleanupCommands in console := "sparkContext.stop()"
)
Expand All @@ -284,11 +284,11 @@ object Hive {
|import org.apache.spark.sql.catalyst.expressions._
|import org.apache.spark.sql.catalyst.plans.logical._
|import org.apache.spark.sql.catalyst.rules._
|import org.apache.spark.sql.catalyst.types._
|import org.apache.spark.sql.catalyst.util._
|import org.apache.spark.sql.execution
|import org.apache.spark.sql.hive._
|import org.apache.spark.sql.hive.test.TestHive._
|import org.apache.spark.sql.types._
|import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin,
cleanupCommands in console := "sparkContext.stop()",
// Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
Expand Down
2 changes: 1 addition & 1 deletion sql/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ import org.apache.spark.sql.catalyst.errors._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.execution
import org.apache.spark.sql.hive._
import org.apache.spark.sql.hive.TestHive._
import org.apache.spark.sql.types._
Welcome to Scala version 2.10.4 (Java HotSpot(TM) 64-Bit Server VM, Java 1.7.0_45).
Type in expressions to have them evaluated.
Type :help for more information.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@ package org.apache.spark.sql.catalyst
import java.sql.{Date, Timestamp}

import org.apache.spark.util.Utils
import org.apache.spark.sql.catalyst.annotation.SQLUserDefinedType
import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute, AttributeReference, Row}
import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.catalyst.types.decimal.Decimal
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.decimal.Decimal


/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._

/**
* A very simple SQL parser. Based loosely on:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
import org.apache.spark.sql.catalyst.types.StructType
import org.apache.spark.sql.catalyst.types.IntegerType
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.IntegerType

/**
* A trivial [[Analyzer]] with an [[EmptyCatalog]] and [[EmptyFunctionRegistry]]. Used for testing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Union}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._

object HiveTypeCoercion {
// See https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.catalyst.types.decimal.Decimal
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.decimal.Decimal

/**
* A collection of implicit conversions that create a DSL for constructing catalyst data structures.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.Logging
import org.apache.spark.sql.catalyst.errors.attachTree
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._
import org.apache.spark.sql.catalyst.trees

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import java.text.{DateFormat, SimpleDateFormat}

import org.apache.spark.Logging
import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.catalyst.types.decimal.Decimal
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.decimal.Decimal

/** Cast the child expression to the target data type. */
case class Cast(child: Expression, dataType: DataType) extends UnaryExpression with Logging {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.trees
import org.apache.spark.sql.catalyst.trees.TreeNode
import org.apache.spark.sql.catalyst.types.{DataType, FractionalType, IntegralType, NumericType, NativeType}
import org.apache.spark.sql.catalyst.util.Metadata
import org.apache.spark.sql.types._

abstract class Expression extends TreeNode[Expression] {
self: Product =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.expressions

import java.util.Random
import org.apache.spark.sql.catalyst.types.DoubleType
import org.apache.spark.sql.types.DoubleType


case object Rand extends LeafExpression {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.catalyst.types.NativeType
import org.apache.spark.sql.types.NativeType

object Row {
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.types.DataType
import org.apache.spark.util.ClosureCleaner
import org.apache.spark.sql.types.DataType

/**
* User-defined function.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._

/**
* A parent class for mutable container objects that are reused when the values are changed,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions

import scala.language.dynamics

import org.apache.spark.sql.catalyst.types.DataType
import org.apache.spark.sql.types.DataType

/**
* The data type representing [[DynamicRow]] values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions

import com.clearspring.analytics.stream.cardinality.HyperLogLog

import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._
import org.apache.spark.sql.catalyst.trees
import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.util.collection.OpenHashSet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.catalyst.analysis.UnresolvedException
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._

case class UnaryMinus(child: Expression) extends UnaryExpression {
type EvaluatedType = Any
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
package org.apache.spark.sql.catalyst.expressions.codegen

import com.google.common.cache.{CacheLoader, CacheBuilder}
import org.apache.spark.sql.catalyst.types.decimal.Decimal
import org.apache.spark.sql.types.decimal.Decimal

import scala.language.existentials

import org.apache.spark.Logging
import org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._

// These classes are here to avoid issues with serialization and integration with quasiquotes.
class IntegerHashSet extends org.apache.spark.util.collection.OpenHashSet[Int]
Expand Down Expand Up @@ -541,11 +541,11 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
childEval.code ++
q"""
var $nullTerm = ${childEval.nullTerm}
var $primitiveTerm: org.apache.spark.sql.catalyst.types.decimal.Decimal =
var $primitiveTerm: org.apache.spark.sql.types.decimal.Decimal =
${defaultPrimitive(DecimalType())}

if (!$nullTerm) {
$primitiveTerm = new org.apache.spark.sql.catalyst.types.decimal.Decimal()
$primitiveTerm = new org.apache.spark.sql.types.decimal.Decimal()
$primitiveTerm = $primitiveTerm.setOrNull(${childEval.primitiveTerm}, $precision, $scale)
$nullTerm = $primitiveTerm == null
}
Expand Down Expand Up @@ -627,7 +627,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
case LongType => ru.Literal(Constant(1L))
case ByteType => ru.Literal(Constant(-1.toByte))
case DoubleType => ru.Literal(Constant(-1.toDouble))
case DecimalType() => q"org.apache.spark.sql.catalyst.types.decimal.Decimal(-1)"
case DecimalType() => q"org.apache.spark.sql.types.decimal.Decimal(-1)"
case IntegerType => ru.Literal(Constant(-1))
case _ => ru.Literal(Constant(null))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.codegen

import org.apache.spark.Logging
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.types.{StringType, NumericType}
import org.apache.spark.sql.types.{StringType, NumericType}

/**
* Generates bytecode for an [[Ordering]] of [[Row Rows]] for a given set of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.expressions.codegen

import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._


/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions

import scala.collection.Map

import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._

/**
* Returns the item at `ordinal` in the Array `child` or the Key `ordinal` in Map `child`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.catalyst.types.decimal.Decimal
import org.apache.spark.sql.catalyst.types.{DecimalType, LongType, DoubleType, DataType}
import org.apache.spark.sql.types.decimal.Decimal
import org.apache.spark.sql.types.{DecimalType, LongType, DoubleType, DataType}

/** Return the unscaled Long value of a Decimal, assuming it fits in a Long */
case class UnscaledValue(child: Expression) extends UnaryExpression {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
import scala.collection.Map

import org.apache.spark.sql.catalyst.trees
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.types._

/**
* An expression that produces zero or more rows given a single input row.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ package org.apache.spark.sql.catalyst.expressions

import java.sql.{Date, Timestamp}

import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.catalyst.types.decimal.Decimal
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.decimal.Decimal

object Literal {
def apply(v: Any): Literal = v match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.catalyst.trees
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.catalyst.util.Metadata
import org.apache.spark.sql.types._

object NamedExpression {
private val curId = new java.util.concurrent.atomic.AtomicLong()
Expand Down
Loading

0 comments on commit 66505cc

Please sign in to comment.