diff --git a/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala b/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
index 99ec78633ab75..cb1e6cf0497cc 100644
--- a/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
@@ -19,8 +19,6 @@ package org.apache.spark.metrics.source
import com.codahale.metrics.MetricRegistry
-import org.apache.spark.annotation.Experimental
-
private[spark] object StaticSources {
/**
* The set of all static sources. These sources may be reported to from any class, including
@@ -30,10 +28,8 @@ private[spark] object StaticSources {
}
/**
- * :: Experimental ::
* Metrics for code generation.
*/
-@Experimental
object CodegenMetrics extends Source {
override val sourceName: String = "CodeGenerator"
override val metricRegistry: MetricRegistry = new MetricRegistry()
@@ -62,10 +58,8 @@ object CodegenMetrics extends Source {
}
/**
- * :: Experimental ::
* Metrics for access to the hive external catalog.
*/
-@Experimental
object HiveCatalogMetrics extends Source {
override val sourceName: String = "HiveExternalCatalog"
override val metricRegistry: MetricRegistry = new MetricRegistry()
diff --git a/core/src/main/scala/org/apache/spark/partial/package.scala b/core/src/main/scala/org/apache/spark/partial/package.scala
index 62dc5cd25a164..d9e39cbe8d24a 100644
--- a/core/src/main/scala/org/apache/spark/partial/package.scala
+++ b/core/src/main/scala/org/apache/spark/partial/package.scala
@@ -18,8 +18,6 @@
package org.apache.spark
/**
- * :: Experimental ::
- *
* Support for approximate results. This provides convenient api and also implementation for
* approximate calculation.
*
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 7f8064f01ec45..e23133682360f 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -34,7 +34,6 @@ import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob, OutputFormat => NewO
import org.apache.spark._
import org.apache.spark.Partitioner.defaultPartitioner
-import org.apache.spark.annotation.Experimental
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config.SPECULATION_ENABLED
import org.apache.spark.internal.io._
@@ -52,7 +51,6 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
extends Logging with Serializable {
/**
- * :: Experimental ::
* Generic function to combine the elements for each key using a custom set of aggregation
* functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)], for a "combined type" C
*
@@ -68,7 +66,6 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
* @note V and C can be different -- for example, one might group an RDD of type
* (Int, Int) into an RDD of type (Int, Seq[Int]).
*/
- @Experimental
def combineByKeyWithClassTag[C](
createCombiner: V => C,
mergeValue: (C, V) => C,
@@ -136,10 +133,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
}
/**
- * :: Experimental ::
* Simplified version of combineByKeyWithClassTag that hash-partitions the output RDD.
*/
- @Experimental
def combineByKeyWithClassTag[C](
createCombiner: V => C,
mergeValue: (C, V) => C,
@@ -616,11 +611,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
}
/**
- * :: Experimental ::
* Simplified version of combineByKeyWithClassTag that hash-partitions the resulting RDD using the
* existing partitioner/parallelism level.
*/
- @Experimental
def combineByKeyWithClassTag[C](
createCombiner: V => C,
mergeValue: (C, V) => C,
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala
index af0752e569ea5..9d5c4d22ea91e 100755
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala
@@ -17,8 +17,6 @@
package org.apache.spark.sql
-import org.apache.spark.annotation.Experimental
-
package object avro {
/**
@@ -31,7 +29,6 @@ package object avro {
*
* @since 2.4.0
*/
- @Experimental
@deprecated("Please use 'org.apache.spark.sql.avro.functions.from_avro' instead.", "3.0.0")
def from_avro(
data: Column,
@@ -45,7 +42,6 @@ package object avro {
*
* @since 2.4.0
*/
- @Experimental
@deprecated("Please use 'org.apache.spark.sql.avro.functions.to_avro' instead.", "3.0.0")
def to_avro(data: Column): Column = org.apache.spark.sql.avro.functions.to_avro(data)
}
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
index d4a428f45c110..608da0b8bf563 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
@@ -22,7 +22,6 @@ import scala.reflect.ClassTag
import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
import com.amazonaws.services.kinesis.model.Record
-import org.apache.spark.annotation.Evolving
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.{BlockId, StorageLevel}
import org.apache.spark.streaming.{Duration, StreamingContext, Time}
@@ -84,14 +83,12 @@ private[kinesis] class KinesisInputDStream[T: ClassTag](
}
}
-@Evolving
object KinesisInputDStream {
/**
* Builder for [[KinesisInputDStream]] instances.
*
* @since 2.2.0
*/
- @Evolving
class Builder {
// Required params
private var streamingContext: Option[StreamingContext] = None
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala
index 7488971e61634..e821adca20d27 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala
@@ -19,7 +19,6 @@ package org.apache.spark.streaming.kinesis
import com.amazonaws.auth._
-import org.apache.spark.annotation.Evolving
import org.apache.spark.internal.Logging
/**
@@ -83,14 +82,12 @@ private[kinesis] final case class STSCredentials(
}
}
-@Evolving
object SparkAWSCredentials {
/**
* Builder for [[SparkAWSCredentials]] instances.
*
* @since 2.2.0
*/
- @Evolving
class Builder {
private var basicCreds: Option[BasicCredentials] = None
private var stsCreds: Option[STSCredentials] = None
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 5e2ca32b4ceae..78503585261bf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -24,7 +24,7 @@ import breeze.optimize.{CachedDiffFunction, OWLQN => BreezeOWLQN}
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml.feature.Instance
import org.apache.spark.ml.linalg._
@@ -59,8 +59,6 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
}
/**
- * :: Experimental ::
- *
*
* Linear SVM Classifier
*
@@ -69,7 +67,6 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
*
*/
@Since("2.2.0")
-@Experimental
class LinearSVC @Since("2.2.0") (
@Since("2.2.0") override val uid: String)
extends Classifier[Vector, LinearSVC, LinearSVCModel]
@@ -290,11 +287,9 @@ object LinearSVC extends DefaultParamsReadable[LinearSVC] {
}
/**
- * :: Experimental ::
* Linear SVM Model trained by [[LinearSVC]]
*/
@Since("2.2.0")
-@Experimental
class LinearSVCModel private[classification] (
@Since("2.2.0") override val uid: String,
@Since("2.2.0") val coefficients: Vector,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 7790de064e7bd..0997c1e7b38d6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -26,7 +26,7 @@ import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS, LBFGSB => Bree
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml.feature.Instance
import org.apache.spark.ml.linalg._
@@ -1349,12 +1349,10 @@ private[ml] class MultiClassSummarizer extends Serializable {
}
/**
- * :: Experimental ::
* Abstraction for logistic regression results for a given model.
*
* Currently, the summary ignores the instance weights.
*/
-@Experimental
sealed trait LogisticRegressionSummary extends Serializable {
/**
@@ -1482,12 +1480,10 @@ sealed trait LogisticRegressionSummary extends Serializable {
}
/**
- * :: Experimental ::
* Abstraction for multiclass logistic regression training results.
* Currently, the training summary ignores the training weights except
* for the objective trace.
*/
-@Experimental
sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary {
/** objective function (scaled loss + regularization) at each iteration. */
@@ -1501,12 +1497,10 @@ sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary
}
/**
- * :: Experimental ::
* Abstraction for binary logistic regression results for a given model.
*
* Currently, the summary ignores the instance weights.
*/
-@Experimental
sealed trait BinaryLogisticRegressionSummary extends LogisticRegressionSummary {
private val sparkSession = predictions.sparkSession
@@ -1590,12 +1584,10 @@ sealed trait BinaryLogisticRegressionSummary extends LogisticRegressionSummary {
}
/**
- * :: Experimental ::
* Abstraction for binary logistic regression training results.
* Currently, the training summary ignores the training weights except
* for the objective trace.
*/
-@Experimental
sealed trait BinaryLogisticRegressionTrainingSummary extends BinaryLogisticRegressionSummary
with LogisticRegressionTrainingSummary
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index 2247880b52414..4ad0cb55b0078 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.clustering
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.param._
@@ -301,7 +301,6 @@ object BisectingKMeans extends DefaultParamsReadable[BisectingKMeans] {
/**
- * :: Experimental ::
* Summary of BisectingKMeans.
*
* @param predictions `DataFrame` produced by `BisectingKMeansModel.transform()`.
@@ -313,7 +312,6 @@ object BisectingKMeans extends DefaultParamsReadable[BisectingKMeans] {
* dataset. This is equivalent to sklearn's inertia.
*/
@Since("2.1.0")
-@Experimental
class BisectingKMeansSummary private[clustering] (
predictions: DataFrame,
predictionCol: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
index 7da4c43a1abf3..41718920c197d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
@@ -17,11 +17,10 @@
package org.apache.spark.ml.clustering
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.sql.{DataFrame, Row}
/**
- * :: Experimental ::
* Summary of clustering algorithms.
*
* @param predictions `DataFrame` produced by model.transform().
@@ -30,7 +29,6 @@ import org.apache.spark.sql.{DataFrame, Row}
* @param k Number of clusters.
* @param numIter Number of iterations.
*/
-@Experimental
class ClusteringSummary private[clustering] (
@transient val predictions: DataFrame,
val predictionCol: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 9a51d2f188460..86caa1247e77f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.clustering
import breeze.linalg.{DenseVector => BDV}
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.impl.Utils.EPSILON
@@ -697,7 +697,6 @@ private class ExpectationAggregator(
}
/**
- * :: Experimental ::
* Summary of GaussianMixture.
*
* @param predictions `DataFrame` produced by `GaussianMixtureModel.transform()`.
@@ -710,7 +709,6 @@ private class ExpectationAggregator(
* @param numIter Number of iterations.
*/
@Since("2.0.0")
-@Experimental
class GaussianMixtureSummary private[clustering] (
predictions: DataFrame,
predictionCol: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index b48a9665ec88c..5cc0f38c67e71 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.{Estimator, Model, PipelineStage}
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.param._
@@ -359,7 +359,6 @@ object KMeans extends DefaultParamsReadable[KMeans] {
}
/**
- * :: Experimental ::
* Summary of KMeans.
*
* @param predictions `DataFrame` produced by `KMeansModel.transform()`.
@@ -371,7 +370,6 @@ object KMeans extends DefaultParamsReadable[KMeans] {
* points in the training dataset). This is equivalent to sklearn's inertia.
*/
@Since("2.0.0")
-@Experimental
class KMeansSummary private[clustering] (
predictions: DataFrame,
predictionCol: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
index 149e99d2f195a..812a426a062c1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.clustering
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
import org.apache.spark.ml.util._
@@ -95,7 +95,6 @@ private[clustering] trait PowerIterationClusteringParams extends Params with Has
}
/**
- * :: Experimental ::
* Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
* Lin and Cohen. From
* the abstract: PIC finds a very low-dimensional embedding of a dataset using truncated power
@@ -108,7 +107,6 @@ private[clustering] trait PowerIterationClusteringParams extends Params with Has
* Spectral clustering (Wikipedia)
*/
@Since("2.4.0")
-@Experimental
class PowerIterationClustering private[clustering] (
@Since("2.4.0") override val uid: String)
extends PowerIterationClusteringParams with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
index c6b04333885ae..2a7b3c579b078 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.evaluation
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.linalg.{Vector, VectorUDT}
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared._
@@ -28,13 +28,11 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.DoubleType
/**
- * :: Experimental ::
* Evaluator for binary classification, which expects two input columns: rawPrediction and label.
* The rawPrediction column can be of type double (binary 0/1 prediction, or probability of label 1)
* or of type vector (length-2 vector of raw predictions, scores, or label probabilities).
*/
@Since("1.2.0")
-@Experimental
class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Evaluator with HasRawPredictionCol with HasLabelCol
with HasWeightCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
index 4c915e08d2536..868bd2a763f5e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
@@ -18,7 +18,7 @@
package org.apache.spark.ml.evaluation
import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.ml.attribute.AttributeGroup
import org.apache.spark.ml.linalg.{BLAS, DenseVector, SparseVector, Vector, Vectors}
@@ -30,8 +30,6 @@ import org.apache.spark.sql.functions.{avg, col, udf}
import org.apache.spark.sql.types.DoubleType
/**
- * :: Experimental ::
- *
* Evaluator for clustering results.
* The metric computes the Silhouette measure using the specified distance measure.
*
@@ -39,7 +37,6 @@ import org.apache.spark.sql.types.DoubleType
* between 1 and -1, where a value close to 1 means that the points in a cluster are close to the
* other points in the same cluster and far from the points of the other clusters.
*/
-@Experimental
@Since("2.3.0")
class ClusteringEvaluator @Since("2.3.0") (@Since("2.3.0") override val uid: String)
extends Evaluator with HasPredictionCol with HasFeaturesCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
index e5cbe55e1d2ef..85a6138c98a46 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.evaluation
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators}
import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol, HasWeightCol}
import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
@@ -27,11 +27,9 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.DoubleType
/**
- * :: Experimental ::
* Evaluator for multiclass classification, which expects two input columns: prediction and label.
*/
@Since("1.5.0")
-@Experimental
class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Evaluator with HasPredictionCol with HasLabelCol
with HasWeightCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
index 616569bb55e4c..dd667a85fa598 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.evaluation
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators}
import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol, HasWeightCol}
import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
@@ -27,11 +27,9 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{DoubleType, FloatType}
/**
- * :: Experimental ::
* Evaluator for regression, which expects two input columns: prediction and label.
*/
@Since("1.4.0")
-@Experimental
final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Evaluator with HasPredictionCol with HasLabelCol
with HasWeightCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
index 0554455a66d7f..c074830ec923f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
@@ -22,7 +22,7 @@ import scala.util.Random
import breeze.linalg.normalize
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.linalg._
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared.HasSeed
@@ -32,8 +32,6 @@ import org.apache.spark.sql.Row
import org.apache.spark.sql.types.StructType
/**
- * :: Experimental ::
- *
* Params for [[BucketedRandomProjectionLSH]].
*/
private[ml] trait BucketedRandomProjectionLSHParams extends Params {
@@ -56,8 +54,6 @@ private[ml] trait BucketedRandomProjectionLSHParams extends Params {
}
/**
- * :: Experimental ::
- *
* Model produced by [[BucketedRandomProjectionLSH]], where multiple random vectors are stored. The
* vectors are normalized to be unit vectors and each vector is used in a hash function:
* `h_i(x) = floor(r_i.dot(x) / bucketLength)`
@@ -66,7 +62,6 @@ private[ml] trait BucketedRandomProjectionLSHParams extends Params {
*
* @param randUnitVectors An array of random unit vectors. Each vector represents a hash function.
*/
-@Experimental
@Since("2.1.0")
class BucketedRandomProjectionLSHModel private[ml](
override val uid: String,
@@ -114,8 +109,6 @@ class BucketedRandomProjectionLSHModel private[ml](
}
/**
- * :: Experimental ::
- *
* This [[BucketedRandomProjectionLSH]] implements Locality Sensitive Hashing functions for
* Euclidean distance metrics.
*
@@ -131,7 +124,6 @@ class BucketedRandomProjectionLSHModel private[ml](
* 2. Wang, Jingdong et al. "Hashing for similarity search: A survey." arXiv preprint
* arXiv:1408.2927 (2014).
*/
-@Experimental
@Since("2.1.0")
class BucketedRandomProjectionLSH(override val uid: String)
extends LSH[BucketedRandomProjectionLSHModel]
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
index dc18e1d34880a..0a4f1b98ef67a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
@@ -18,7 +18,7 @@
package org.apache.spark.ml.feature
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.AttributeGroup
import org.apache.spark.ml.linalg.Vectors
@@ -81,7 +81,6 @@ import org.apache.spark.util.collection.OpenHashMap
* +----+-----+---------+------+------------------------------------------------------+
* }}}
*/
-@Experimental
@Since("2.3.0")
class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transformer
with HasInputCols with HasOutputCol with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
index bdad804083b01..99c0a0df53672 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared.{HasInputCols, HasOutputCols}
@@ -81,7 +81,6 @@ private[feature] trait ImputerParams extends Params with HasInputCols with HasOu
}
/**
- * :: Experimental ::
* Imputation estimator for completing missing values, either using the mean or the median
* of the columns in which the missing values are located. The input columns should be of
* numeric type. Currently Imputer does not support categorical features
@@ -95,7 +94,6 @@ private[feature] trait ImputerParams extends Params with HasInputCols with HasOu
* All Null values in the input columns are treated as missing, and so are also imputed. For
* computing median, DataFrameStatFunctions.approxQuantile is used with a relative error of 0.001.
*/
-@Experimental
@Since("2.2.0")
class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
extends Estimator[ImputerModel] with ImputerParams with DefaultParamsWritable {
@@ -194,13 +192,11 @@ object Imputer extends DefaultParamsReadable[Imputer] {
}
/**
- * :: Experimental ::
* Model fitted by [[Imputer]].
*
* @param surrogateDF a DataFrame containing inputCols and their corresponding surrogates,
* which are used to replace the missing values in the input DataFrame.
*/
-@Experimental
@Since("2.2.0")
class ImputerModel private[ml] (
@Since("2.2.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
index 21cde66d8db6b..da0eaad667ccb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
@@ -21,7 +21,7 @@ import scala.util.Random
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.param.shared.HasSeed
@@ -29,8 +29,6 @@ import org.apache.spark.ml.util._
import org.apache.spark.sql.types.StructType
/**
- * :: Experimental ::
- *
* Model produced by [[MinHashLSH]], where multiple hash functions are stored. Each hash function
* is picked from the following family of hash functions, where a_i and b_i are randomly chosen
* integers less than prime:
@@ -44,7 +42,6 @@ import org.apache.spark.sql.types.StructType
*
* @param randCoefficients Pairs of random coefficients. Each pair is used by one hash function.
*/
-@Experimental
@Since("2.1.0")
class MinHashLSHModel private[ml](
override val uid: String,
@@ -102,8 +99,6 @@ class MinHashLSHModel private[ml](
}
/**
- * :: Experimental ::
- *
* LSH class for Jaccard distance.
*
* The input can be dense or sparse vectors, but it is more efficient if it is sparse. For example,
@@ -115,7 +110,6 @@ class MinHashLSHModel private[ml](
* References:
* Wikipedia on MinHash
*/
-@Experimental
@Since("2.1.0")
class MinHashLSH(override val uid: String) extends LSH[MinHashLSHModel] with HasSeed {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index ec8f7031ad3bd..6c0d5fc70ab4e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer
import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.{Estimator, Model, Pipeline, PipelineModel, PipelineStage, Transformer}
import org.apache.spark.ml.attribute.AttributeGroup
import org.apache.spark.ml.linalg.{Vector, VectorUDT}
@@ -124,7 +124,6 @@ private[feature] trait RFormulaBase extends HasFeaturesCol with HasLabelCol with
}
/**
- * :: Experimental ::
* Implements the transforms required for fitting a dataset against an R model formula. Currently
* we support a limited subset of the R operators, including '~', '.', ':', '+', '-', '*' and '^'.
* Also see the R formula docs here:
@@ -157,7 +156,6 @@ private[feature] trait RFormulaBase extends HasFeaturesCol with HasLabelCol with
* `StringIndexer`. If the label column does not exist in the DataFrame, the output label column
* will be created from the specified response variable in the formula.
*/
-@Experimental
@Since("1.5.0")
class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[RFormulaModel] with RFormulaBase with DefaultParamsWritable {
@@ -329,14 +327,12 @@ object RFormula extends DefaultParamsReadable[RFormula] {
}
/**
- * :: Experimental ::
* Model fitted by [[RFormula]]. Fitting is required to determine the factor levels of
* formula terms.
*
* @param resolvedFormula the fitted R formula.
* @param pipelineModel the fitted feature model, including factor to index mappings.
*/
-@Experimental
@Since("1.5.0")
class RFormulaModel private[feature](
@Since("1.5.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
index f5947d61fe349..5d787f263a12f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
@@ -18,7 +18,7 @@
package org.apache.spark.ml.feature
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.AttributeGroup
import org.apache.spark.ml.linalg.{Vector, VectorUDT}
@@ -30,14 +30,12 @@ import org.apache.spark.sql.functions.{col, udf}
import org.apache.spark.sql.types.StructType
/**
- * :: Experimental ::
* A feature transformer that adds size information to the metadata of a vector column.
* VectorAssembler needs size information for its input columns and cannot be used on streaming
* dataframes without this metadata.
*
* Note: VectorSizeHint modifies `inputCol` to include size metadata and does not have an outputCol.
*/
-@Experimental
@Since("2.3.0")
class VectorSizeHint @Since("2.3.0") (@Since("2.3.0") override val uid: String)
extends Transformer with HasInputCol with HasHandleInvalid with DefaultParamsWritable {
@@ -180,8 +178,6 @@ class VectorSizeHint @Since("2.3.0") (@Since("2.3.0") override val uid: String)
override def copy(extra: ParamMap): this.type = defaultCopy(extra)
}
-/** :: Experimental :: */
-@Experimental
@Since("2.3.0")
object VectorSizeHint extends DefaultParamsReadable[VectorSizeHint] {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index 7322815c12ab8..e1c9b927a28c8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.Path
import org.json4s.{DefaultFormats, JObject}
import org.json4s.JsonDSL._
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared.HasPredictionCol
@@ -116,7 +116,6 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol {
}
/**
- * :: Experimental ::
* A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in
* Li et al., PFP: Parallel FP-Growth for Query
* Recommendation. PFP distributes computation in such a way that each worker executes an
@@ -128,7 +127,6 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol {
* Association rule learning (Wikipedia)
*/
@Since("2.2.0")
-@Experimental
class FPGrowth @Since("2.2.0") (
@Since("2.2.0") override val uid: String)
extends Estimator[FPGrowthModel] with FPGrowthParams with DefaultParamsWritable {
@@ -213,13 +211,11 @@ object FPGrowth extends DefaultParamsReadable[FPGrowth] {
}
/**
- * :: Experimental ::
* Model fitted by FPGrowth.
*
* @param freqItemsets frequent itemsets in the format of DataFrame("items"[Array], "freq"[Long])
*/
@Since("2.2.0")
-@Experimental
class FPGrowthModel private[ml] (
@Since("2.2.0") override val uid: String,
@Since("2.2.0") @transient val freqItemsets: DataFrame,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
index b0006a8d4a58e..c9c049248f70c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.fpm
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.param._
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.ml.util.Instrumentation.instrumented
@@ -27,7 +27,6 @@ import org.apache.spark.sql.functions.col
import org.apache.spark.sql.types.{ArrayType, LongType, StructField, StructType}
/**
- * :: Experimental ::
* A parallel PrefixSpan algorithm to mine frequent sequential patterns.
* The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns
* Efficiently by Prefix-Projected Pattern Growth
@@ -39,7 +38,6 @@ import org.apache.spark.sql.types.{ArrayType, LongType, StructField, StructType}
* (Wikipedia)
*/
@Since("2.4.0")
-@Experimental
final class PrefixSpan(@Since("2.4.0") override val uid: String) extends Params {
@Since("2.4.0")
@@ -125,7 +123,6 @@ final class PrefixSpan(@Since("2.4.0") override val uid: String) extends Params
sequenceCol -> "sequence")
/**
- * :: Experimental ::
* Finds the complete set of frequent sequential patterns in the input sequences of itemsets.
*
* @param dataset A dataset or a dataframe containing a sequence column which is
diff --git a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
index 03136261dd5c9..5efcf0dce68a2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
@@ -24,16 +24,13 @@ import javax.imageio.ImageIO
import scala.collection.JavaConverters._
-import org.apache.spark.annotation.{Experimental, Since}
-import org.apache.spark.input.PortableDataStream
-import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+import org.apache.spark.annotation.Since
+import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
/**
- * :: Experimental ::
* Defines the image schema and methods to read and manipulate images.
*/
-@Experimental
@Since("2.3.0")
object ImageSchema {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index a65592f0e718a..cc1d18d3836c9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -24,7 +24,7 @@ import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS}
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.internal.Logging
import org.apache.spark.ml.{Estimator, Model}
@@ -120,13 +120,11 @@ private[regression] trait AFTSurvivalRegressionParams extends Params
}
/**
- * :: Experimental ::
* Fit a parametric survival regression model named accelerated failure time (AFT) model
* (see
* Accelerated failure time model (Wikipedia))
* based on the Weibull distribution of the survival time.
*/
-@Experimental
@Since("1.6.0")
class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: String)
extends Estimator[AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams
@@ -307,10 +305,8 @@ object AFTSurvivalRegression extends DefaultParamsReadable[AFTSurvivalRegression
}
/**
- * :: Experimental ::
* Model produced by [[AFTSurvivalRegression]].
*/
-@Experimental
@Since("1.6.0")
class AFTSurvivalRegressionModel private[ml] (
@Since("1.6.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index a8f4ed9096b9e..a226ca49e6deb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -24,7 +24,7 @@ import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml.PredictorParams
import org.apache.spark.ml.attribute.AttributeGroup
@@ -221,8 +221,6 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
}
/**
- * :: Experimental ::
- *
* Fit a Generalized Linear Model
* (see
* Generalized linear model (Wikipedia))
@@ -238,7 +236,6 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
* - "tweedie" : power link function specified through "linkPower". The default link power in
* the tweedie family is 1 - variancePower.
*/
-@Experimental
@Since("2.0.0")
class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val uid: String)
extends Regressor[Vector, GeneralizedLinearRegression, GeneralizedLinearRegressionModel]
@@ -991,10 +988,8 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
}
/**
- * :: Experimental ::
* Model produced by [[GeneralizedLinearRegression]].
*/
-@Experimental
@Since("2.0.0")
class GeneralizedLinearRegressionModel private[ml] (
@Since("2.0.0") override val uid: String,
@@ -1155,7 +1150,6 @@ object GeneralizedLinearRegressionModel extends MLReadable[GeneralizedLinearRegr
}
/**
- * :: Experimental ::
* Summary of [[GeneralizedLinearRegression]] model and predictions.
*
* @param dataset Dataset to be summarized.
@@ -1163,7 +1157,6 @@ object GeneralizedLinearRegressionModel extends MLReadable[GeneralizedLinearRegr
* model which cannot be modified from outside.
*/
@Since("2.0.0")
-@Experimental
class GeneralizedLinearRegressionSummary private[regression] (
dataset: Dataset[_],
origModel: GeneralizedLinearRegressionModel) extends Serializable {
@@ -1383,7 +1376,6 @@ class GeneralizedLinearRegressionSummary private[regression] (
}
/**
- * :: Experimental ::
* Summary of [[GeneralizedLinearRegression]] fitting and model.
*
* @param dataset Dataset to be summarized.
@@ -1394,7 +1386,6 @@ class GeneralizedLinearRegressionSummary private[regression] (
* @param solver the solver algorithm used for model training
*/
@Since("2.0.0")
-@Experimental
class GeneralizedLinearRegressionTrainingSummary private[regression] (
dataset: Dataset[_],
origModel: GeneralizedLinearRegressionModel,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 09f3f94d346b6..abf75d70ea028 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -25,7 +25,7 @@ import breeze.stats.distributions.StudentsT
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkException
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml.{PipelineStage, PredictorParams}
import org.apache.spark.ml.feature.Instance
@@ -797,7 +797,6 @@ object LinearRegressionModel extends MLReadable[LinearRegressionModel] {
}
/**
- * :: Experimental ::
* Linear regression training results. Currently, the training summary ignores the
* training weights except for the objective trace.
*
@@ -805,7 +804,6 @@ object LinearRegressionModel extends MLReadable[LinearRegressionModel] {
* @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
*/
@Since("1.5.0")
-@Experimental
class LinearRegressionTrainingSummary private[regression] (
predictions: DataFrame,
predictionCol: String,
@@ -835,7 +833,6 @@ class LinearRegressionTrainingSummary private[regression] (
}
/**
- * :: Experimental ::
* Linear regression results evaluated on a dataset.
*
* @param predictions predictions output by the model's `transform` method.
@@ -845,7 +842,6 @@ class LinearRegressionTrainingSummary private[regression] (
* @param featuresCol Field in "predictions" which gives the features of each instance as a vector.
*/
@Since("1.5.0")
-@Experimental
class LinearRegressionSummary private[regression] (
@transient val predictions: DataFrame,
val predictionCol: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala
index 5b38ca73e8014..cc0c2134834d4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala
@@ -17,7 +17,7 @@
package org.apache.spark.ml.stat
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
import org.apache.spark.ml.util.SchemaUtils
import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
@@ -28,14 +28,11 @@ import org.apache.spark.sql.functions.col
/**
- * :: Experimental ::
- *
* Chi-square hypothesis testing for categorical data.
*
* See Wikipedia for more information
* on the Chi-squared test.
*/
-@Experimental
@Since("2.2.0")
object ChiSquareTest {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
index 8167ea68a7150..bab178b85d5ff 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.stat
import scala.collection.JavaConverters._
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.ml.linalg.{SQLDataTypes, Vector}
import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
import org.apache.spark.mllib.stat.{Statistics => OldStatistics}
@@ -33,11 +33,9 @@ import org.apache.spark.sql.types.{StructField, StructType}
* to spark.ml's Vector types.
*/
@Since("2.2.0")
-@Experimental
object Correlation {
/**
- * :: Experimental ::
* Compute the correlation matrix for the input Dataset of Vectors using the specified method.
* Methods currently supported: `pearson` (default), `spearman`.
*
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala
index adf8145726711..f4a6b8b033dbe 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala
@@ -19,17 +19,15 @@ package org.apache.spark.ml.stat
import scala.annotation.varargs
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.api.java.function.Function
import org.apache.spark.ml.util.SchemaUtils
import org.apache.spark.mllib.stat.{Statistics => OldStatistics}
import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.functions.col
/**
- * :: Experimental ::
- *
* Conduct the two-sided Kolmogorov Smirnov (KS) test for data sampled from a
* continuous distribution. By comparing the largest difference between the empirical cumulative
* distribution of the sample data and the theoretical distribution we can provide a test for the
@@ -38,7 +36,6 @@ import org.apache.spark.sql.functions.col
* @see
* Kolmogorov-Smirnov test (Wikipedia)
*/
-@Experimental
@Since("2.4.0")
object KolmogorovSmirnovTest {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
index ed7d7e0852647..59123a41d16fc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.stat
import java.io._
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
import org.apache.spark.internal.Logging
import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
import org.apache.spark.sql.Column
@@ -35,7 +35,6 @@ import org.apache.spark.sql.types._
* Users should not directly create such builders, but instead use one of the methods in
* [[Summarizer]].
*/
-@Experimental
@Since("2.3.0")
sealed abstract class SummaryBuilder {
/**
@@ -78,7 +77,6 @@ sealed abstract class SummaryBuilder {
* Note: Currently, the performance of this interface is about 2x~3x slower than using the RDD
* interface.
*/
-@Experimental
@Since("2.3.0")
object Summarizer extends Logging {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
index 620c754a7ba0c..5f1091e438db0 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
@@ -30,7 +30,6 @@ import org.jpmml.model.filters.ImportFilter
*/
private[spark] object PMMLUtils {
/**
- * :: Experimental ::
* Load a PMML model from a string. Note: for testing only, PMML model evaluation is supported
* through external spark-packages.
*/
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 8d2848841c109..d689d1d034bc7 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -663,8 +663,6 @@ def wholeTextFiles(self, path, minPartitions=None, use_unicode=True):
def binaryFiles(self, path, minPartitions=None):
"""
- .. note:: Experimental
-
Read a directory of binary files from HDFS, a local file system
(available on all nodes), or any Hadoop-supported file system URI
as a byte array. Each file is read as a single record and returned
@@ -680,8 +678,6 @@ def binaryFiles(self, path, minPartitions=None):
def binaryRecords(self, path, recordLength):
"""
- .. note:: Experimental
-
Load data from a flat binary file, assuming each record is a set of numbers
with the specified numerical format (see ByteBuffer), and the number of
bytes per record is constant.
diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py
index d4470b5bf2900..82ff81c58d3c6 100644
--- a/python/pyspark/ml/base.py
+++ b/python/pyspark/ml/base.py
@@ -98,7 +98,6 @@ def fitMultiple(self, dataset, paramMaps):
using `paramMaps[index]`. `index` values may not be sequential.
.. note:: DeveloperApi
- .. note:: Experimental
"""
estimator = self.copy()
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index a2482bae9ce7e..3ae2338aa77e6 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -69,8 +69,6 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
HasRegParam, HasTol, HasRawPredictionCol, HasFitIntercept, HasStandardization,
HasWeightCol, HasAggregationDepth, HasThreshold, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
`Linear SVM Classifier `_
This binary classifier optimizes the Hinge Loss using the OWLQN optimizer.
@@ -160,8 +158,6 @@ def _create_model(self, java_model):
class LinearSVCModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by LinearSVC.
.. versionadded:: 2.2.0
@@ -565,8 +561,6 @@ def __repr__(self):
class LogisticRegressionSummary(JavaWrapper):
"""
- .. note:: Experimental
-
Abstraction for Logistic Regression Results for a given model.
.. versionadded:: 2.0.0
@@ -723,8 +717,6 @@ def weightedFMeasure(self, beta=1.0):
@inherit_doc
class LogisticRegressionTrainingSummary(LogisticRegressionSummary):
"""
- .. note:: Experimental
-
Abstraction for multinomial Logistic Regression Training results.
Currently, the training summary ignores the training weights except
for the objective trace.
@@ -753,8 +745,6 @@ def totalIterations(self):
@inherit_doc
class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
"""
- .. note:: Experimental
-
Binary Logistic regression results for a given model.
.. versionadded:: 2.0.0
@@ -850,8 +840,6 @@ def recallByThreshold(self):
class BinaryLogisticRegressionTrainingSummary(BinaryLogisticRegressionSummary,
LogisticRegressionTrainingSummary):
"""
- .. note:: Experimental
-
Binary Logistic regression training results for a given model.
.. versionadded:: 2.0.0
@@ -1904,8 +1892,6 @@ def getClassifier(self):
@inherit_doc
class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Reduction of Multiclass Classification to Binary Classification.
Performs reduction using one against all strategy.
For a multiclass classification with k classes, train k models (one per class).
@@ -2118,8 +2104,6 @@ def _transfer_param_map_from_java(self, javaParamMap):
class OneVsRestModel(Model, OneVsRestParams, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by OneVsRest.
This stores the models resulting from training k binary classifiers: one for each class.
Each example is scored against all k models, and the model with the highest score
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 6c9cf7b6c829c..9b21aacacd710 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -33,8 +33,6 @@
class ClusteringSummary(JavaWrapper):
"""
- .. note:: Experimental
-
Clustering results for a given model.
.. versionadded:: 2.1.0
@@ -263,8 +261,6 @@ def getK(self):
class GaussianMixtureSummary(ClusteringSummary):
"""
- .. note:: Experimental
-
Gaussian mixture clustering results for a given model.
.. versionadded:: 2.1.0
@@ -297,8 +293,6 @@ def logLikelihood(self):
class KMeansSummary(ClusteringSummary):
"""
- .. note:: Experimental
-
Summary of KMeans.
.. versionadded:: 2.1.0
@@ -671,8 +665,6 @@ def _create_model(self, java_model):
class BisectingKMeansSummary(ClusteringSummary):
"""
- .. note:: Experimental
-
Bisecting KMeans clustering results for a given model.
.. versionadded:: 2.1.0
@@ -1175,8 +1167,6 @@ def getKeepLastCheckpoint(self):
class PowerIterationClustering(HasMaxIter, HasWeightCol, JavaParams, JavaMLReadable,
JavaMLWritable):
"""
- .. note:: Experimental
-
Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
`Lin and Cohen `_. From the
abstract: PIC finds a very low-dimensional embedding of a dataset using truncated power
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index f3665d514ea13..d96cdd594a3f3 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -110,8 +110,6 @@ def isLargerBetter(self):
class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPredictionCol, HasWeightCol,
JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Evaluator for binary classification, which expects two input columns: rawPrediction and label.
The rawPrediction column can be of type double (binary 0/1 prediction, or probability of label
1) or of type vector (length-2 vector of raw predictions, scores, or label probabilities).
@@ -194,8 +192,6 @@ def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol,
JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Evaluator for Regression, which expects input columns prediction, label
and an optional weight column.
@@ -278,8 +274,6 @@ def setParams(self, predictionCol="prediction", labelCol="label",
class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol,
JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Evaluator for Multiclass Classification, which expects two input
columns: prediction and label.
@@ -497,8 +491,6 @@ def setParams(self, predictionCol="prediction", labelCol="label",
class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol,
JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Evaluator for Clustering results, which expects two input
columns: prediction and features. The metric computes the Silhouette
measure using the squared Euclidean distance.
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index fe8ac6239a60b..9ab4e4d68691d 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -204,8 +204,6 @@ def approxSimilarityJoin(self, datasetA, datasetB, threshold, distCol="distCol")
class BucketedRandomProjectionLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed,
JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
LSH class for Euclidean distance metrics.
The input is dense or sparse vectors, each of which represents a point in the Euclidean
distance space. The output will be vectors of configurable dimension. Hash values in the same
@@ -318,8 +316,6 @@ def _create_model(self, java_model):
class BucketedRandomProjectionLSHModel(LSHModel, JavaMLReadable, JavaMLWritable):
r"""
- .. note:: Experimental
-
Model fitted by :py:class:`BucketedRandomProjectionLSH`, where multiple random vectors are
stored. The vectors are normalized to be unit vectors and each vector is used in a hash
function: :math:`h_i(x) = floor(r_i \cdot x / bucketLength)` where :math:`r_i` is the
@@ -796,8 +792,6 @@ def getScalingVec(self):
class FeatureHasher(JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures, JavaMLReadable,
JavaMLWritable):
"""
- .. note:: Experimental
-
Feature hashing projects a set of categorical or numerical features into a feature vector of
specified dimension (typically substantially smaller than that of the original feature
space). This is done using the hashing trick (https://en.wikipedia.org/wiki/Feature_hashing)
@@ -1083,8 +1077,6 @@ def numDocs(self):
@inherit_doc
class Imputer(JavaEstimator, HasInputCols, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Imputation estimator for completing missing values, either using the mean or the median
of the columns in which the missing values are located. The input columns should be of
DoubleType or FloatType. Currently Imputer does not support categorical features and
@@ -1222,8 +1214,6 @@ def _create_model(self, java_model):
class ImputerModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by :py:class:`Imputer`.
.. versionadded:: 2.2.0
@@ -1373,8 +1363,6 @@ class MinHashLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed,
JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
LSH class for Jaccard distance.
The input can be dense or sparse vectors, but it is more efficient if it is sparse.
For example, `Vectors.sparse(10, [(2, 1.0), (3, 1.0), (5, 1.0)])` means there are 10 elements
@@ -1452,8 +1440,6 @@ def _create_model(self, java_model):
class MinHashLSHModel(LSHModel, JavaMLReadable, JavaMLWritable):
r"""
- .. note:: Experimental
-
Model produced by :py:class:`MinHashLSH`, where where multiple hash functions are stored. Each
hash function is picked from the following family of hash functions, where :math:`a_i` and
:math:`b_i` are randomly chosen integers less than prime:
@@ -1920,8 +1906,6 @@ def getDegree(self):
class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
`QuantileDiscretizer` takes a column with continuous features and outputs a column with binned
categorical features. The number of bins can be set using the :py:attr:`numBuckets` parameter.
It is possible that the number of buckets used will be less than this value, for example, if
@@ -3591,8 +3575,6 @@ def explainedVariance(self):
class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, HasHandleInvalid,
JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Implements the transforms required for fitting a dataset against an
R model formula. Currently we support a limited subset of the R
operators, including '~', '.', ':', '+', '-', '*', and '^'.
@@ -3761,8 +3743,6 @@ def __str__(self):
class RFormulaModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by :py:class:`RFormula`. Fitting is required to determine the
factor levels of formula terms.
@@ -3778,8 +3758,6 @@ def __str__(self):
class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, JavaMLReadable,
JavaMLWritable):
"""
- .. note:: Experimental
-
Chi-Squared feature selection, which selects categorical features to use for predicting a
categorical label.
The selector supports different selection methods: `numTopFeatures`, `percentile`, `fpr`,
@@ -3979,8 +3957,6 @@ def _create_model(self, java_model):
class ChiSqSelectorModel(JavaModel, JavaMLReadable, JavaMLWritable):
"""
- .. note:: Experimental
-
Model fitted by :py:class:`ChiSqSelector`.
.. versionadded:: 2.0.0
@@ -3999,8 +3975,6 @@ def selectedFeatures(self):
class VectorSizeHint(JavaTransformer, HasInputCol, HasHandleInvalid, JavaMLReadable,
JavaMLWritable):
"""
- .. note:: Experimental
-
A feature transformer that adds size information to the metadata of a vector column.
VectorAssembler needs size information for its input columns and cannot be used on streaming
dataframes without this metadata.
diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index ed71fb0c57591..4fc19704ae3d1 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -125,8 +125,6 @@ def getItemsCol(self):
class FPGrowthModel(JavaModel, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by FPGrowth.
.. versionadded:: 2.2.0
@@ -159,8 +157,6 @@ class FPGrowth(JavaEstimator, HasItemsCol, HasPredictionCol,
JavaMLWritable, JavaMLReadable):
r"""
- .. note:: Experimental
-
A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in
Li et al., PFP: Parallel FP-Growth for Query Recommendation [LI2008]_.
PFP distributes computation in such a way that each worker executes an
@@ -249,8 +245,6 @@ def _create_model(self, java_model):
class PrefixSpan(JavaParams):
"""
- .. note:: Experimental
-
A parallel PrefixSpan algorithm to mine frequent sequential patterns.
The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns
Efficiently by Prefix-Projected Pattern Growth
@@ -313,8 +307,6 @@ def setParams(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=3200
@since("2.4.0")
def findFrequentSequentialPatterns(self, dataset):
"""
- .. note:: Experimental
-
Finds the complete set of frequent sequential patterns in the input sequences of itemsets.
:param dataset: A dataframe containing a sequence column which is
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index b2946a10be4ab..2d1d1272c17f8 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -224,8 +224,6 @@ def evaluate(self, dataset):
class LinearRegressionSummary(JavaWrapper):
"""
- .. note:: Experimental
-
Linear regression results evaluated on a dataset.
.. versionadded:: 2.0.0
@@ -431,8 +429,6 @@ def pValues(self):
@inherit_doc
class LinearRegressionTrainingSummary(LinearRegressionSummary):
"""
- .. note:: Experimental
-
Linear regression training results. Currently, the training summary ignores the
training weights except for the objective trace.
@@ -1473,8 +1469,6 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
HasFitIntercept, HasMaxIter, HasTol, HasAggregationDepth,
JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Accelerated Failure Time (AFT) Model Survival Regression
Fit a parametric AFT survival regression model based on the Weibull distribution
@@ -1615,8 +1609,6 @@ def getQuantilesCol(self):
class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Model fitted by :class:`AFTSurvivalRegression`.
.. versionadded:: 1.6.0
@@ -1666,8 +1658,6 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol,
HasSolver, JavaMLWritable, JavaMLReadable):
"""
- .. note:: Experimental
-
Generalized Linear Regression.
Fit a Generalized Linear Model specified by giving a symbolic description of the linear
@@ -1874,8 +1864,6 @@ def getOffsetCol(self):
class GeneralizedLinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable,
JavaMLReadable, HasTrainingSummary):
"""
- .. note:: Experimental
-
Model fitted by :class:`GeneralizedLinearRegression`.
.. versionadded:: 2.0.0
@@ -1929,8 +1917,6 @@ def evaluate(self, dataset):
class GeneralizedLinearRegressionSummary(JavaWrapper):
"""
- .. note:: Experimental
-
Generalized linear regression results evaluated on a dataset.
.. versionadded:: 2.0.0
@@ -2042,8 +2028,6 @@ def aic(self):
@inherit_doc
class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSummary):
"""
- .. note:: Experimental
-
Generalized linear regression training results.
.. versionadded:: 2.0.0
diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index 3f421024acdce..a84b67476609b 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -26,8 +26,6 @@
class ChiSquareTest(object):
"""
- .. note:: Experimental
-
Conduct Pearson's independence test for every feature against the label. For each feature,
the (feature, label) pairs are converted into a contingency matrix for which the Chi-squared
statistic is computed. All label and feature values must be categorical.
@@ -77,8 +75,6 @@ def test(dataset, featuresCol, labelCol):
class Correlation(object):
"""
- .. note:: Experimental
-
Compute the correlation matrix for the input dataset of Vectors using the specified method.
Methods currently supported: `pearson` (default), `spearman`.
@@ -138,8 +134,6 @@ def corr(dataset, column, method="pearson"):
class KolmogorovSmirnovTest(object):
"""
- .. note:: Experimental
-
Conduct the two-sided Kolmogorov Smirnov (KS) test for data sampled from a continuous
distribution.
@@ -199,8 +193,6 @@ def test(dataset, sampleCol, distName, *params):
class Summarizer(object):
"""
- .. note:: Experimental
-
Tools for vectorized statistics on MLlib Vectors.
The methods in this package provide various statistics for Vectors contained inside DataFrames.
This class lets users pick the statistics they would like to extract for a given column.
@@ -355,8 +347,6 @@ def metrics(*metrics):
class SummaryBuilder(JavaWrapper):
"""
- .. note:: Experimental
-
A builder object that provides summary statistics about a given column.
Users should not directly create such builders, but instead use one of the methods in
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index be7b8da981317..d80d6e8aaa342 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -489,8 +489,6 @@ def _to_java(self):
class TrainValidationSplit(Estimator, ValidatorParams, HasParallelism, HasCollectSubModels,
MLReadable, MLWritable):
"""
- .. note:: Experimental
-
Validation for hyper-parameter tuning. Randomly splits the input dataset into train and
validation sets, and uses evaluation metric on the validation set to select the best model.
Similar to :class:`CrossValidator`, but only splits the set once.
@@ -666,8 +664,6 @@ def _to_java(self):
class TrainValidationSplitModel(Model, ValidatorParams, MLReadable, MLWritable):
"""
- .. note:: Experimental
-
Model from train validation split.
.. versionadded:: 2.0.0
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 590e8e1e9c07c..16c226f02e633 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -2370,8 +2370,6 @@ def _to_java_object_rdd(self):
def countApprox(self, timeout, confidence=0.95):
"""
- .. note:: Experimental
-
Approximate version of count() that returns a potentially incomplete
result within a timeout, even if not all tasks have finished.
@@ -2384,8 +2382,6 @@ def countApprox(self, timeout, confidence=0.95):
def sumApprox(self, timeout, confidence=0.95):
"""
- .. note:: Experimental
-
Approximate operation to return the sum within a timeout
or meet the confidence.
@@ -2401,8 +2397,6 @@ def sumApprox(self, timeout, confidence=0.95):
def meanApprox(self, timeout, confidence=0.95):
"""
- .. note:: Experimental
-
Approximate operation to return the mean within a timeout
or meet the confidence.
@@ -2418,8 +2412,6 @@ def meanApprox(self, timeout, confidence=0.95):
def countApproxDistinct(self, relativeSD=0.05):
"""
- .. note:: Experimental
-
Return approximate number of distinct elements in the RDD.
The algorithm used is based on streamlib's implementation of
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 9e5b61f9e00b4..c7ff2882ed95a 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2917,8 +2917,6 @@ def pandas_udf(f=None, returnType=None, functionType=None):
:param functionType: an enum value in :class:`pyspark.sql.functions.PandasUDFType`.
Default: SCALAR.
- .. note:: Experimental
-
The function type of the UDF can be one of the following:
1. SCALAR
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index cc1da8e7c1f72..ec90ba905ef66 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -51,8 +51,6 @@ class GroupedData(object):
A set of methods for aggregations on a :class:`DataFrame`,
created by :func:`DataFrame.groupBy`.
- .. note:: Experimental
-
.. versionadded:: 1.3
"""
@@ -238,8 +236,6 @@ def apply(self, udf):
into memory, so the user should be aware of the potential OOM risk if data is skewed
and certain groups are too large to fit in memory.
- .. note:: Experimental
-
:param udf: a grouped map user-defined function returned by
:func:`pyspark.sql.functions.pandas_udf`.
diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py
index 9e02758a566a2..67c594c539d52 100644
--- a/python/pyspark/sql/window.py
+++ b/python/pyspark/sql/window.py
@@ -46,8 +46,6 @@ class Window(object):
unboundedPreceding, unboundedFollowing) is used by default. When ordering is defined,
a growing window frame (rangeFrame, unboundedPreceding, currentRow) is used by default.
- .. note:: Experimental
-
.. versionadded:: 1.4
"""
@@ -205,8 +203,6 @@ class WindowSpec(object):
Use the static methods in :class:`Window` to create a :class:`WindowSpec`.
- .. note:: Experimental
-
.. versionadded:: 1.4
"""
diff --git a/python/pyspark/taskcontext.py b/python/pyspark/taskcontext.py
index 790de0b9bf41a..b0c32c15793ac 100644
--- a/python/pyspark/taskcontext.py
+++ b/python/pyspark/taskcontext.py
@@ -24,8 +24,6 @@
class TaskContext(object):
"""
- .. note:: Experimental
-
Contextual information about a task which can be read or mutated during
execution. To access the TaskContext for a running task, use:
:meth:`TaskContext.get`.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/types/SQLUserDefinedType.java b/sql/catalyst/src/main/java/org/apache/spark/sql/types/SQLUserDefinedType.java
index a54398324fc66..110ed460cc8fa 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/types/SQLUserDefinedType.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/types/SQLUserDefinedType.java
@@ -20,7 +20,6 @@
import java.lang.annotation.*;
import org.apache.spark.annotation.DeveloperApi;
-import org.apache.spark.annotation.Evolving;
/**
* ::DeveloperApi::
@@ -31,7 +30,6 @@
@DeveloperApi
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
-@Evolving
public @interface SQLUserDefinedType {
/**
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
index 07d17ee14ce23..541818331a0bd 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
@@ -22,7 +22,6 @@
import org.apache.arrow.vector.complex.*;
import org.apache.arrow.vector.holders.NullableVarCharHolder;
-import org.apache.spark.annotation.Evolving;
import org.apache.spark.sql.util.ArrowUtils;
import org.apache.spark.sql.types.*;
import org.apache.spark.unsafe.types.UTF8String;
@@ -31,7 +30,6 @@
* A column vector backed by Apache Arrow. Currently calendar interval type and map type are not
* supported.
*/
-@Evolving
public final class ArrowColumnVector extends ColumnVector {
private final ArrowVectorAccessor accessor;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
index 29d81c553ff61..c43a86ad48ec9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
@@ -20,11 +20,9 @@ package org.apache.spark.sql
import scala.annotation.implicitNotFound
import scala.reflect.ClassTag
-import org.apache.spark.annotation.{Evolving, Experimental}
import org.apache.spark.sql.types._
/**
- * :: Experimental ::
* Used to convert a JVM object of type `T` to and from the internal Spark SQL representation.
*
* == Scala ==
@@ -65,8 +63,6 @@ import org.apache.spark.sql.types._
*
* @since 1.6.0
*/
-@Experimental
-@Evolving
@implicitNotFound("Unable to find encoder for type ${T}. An implicit Encoder[${T}] is needed to " +
"store ${T} instances in a Dataset. Primitive types (Int, String, etc) and Product types (case " +
"classes) are supported by importing spark.implicits._ Support for serializing other types " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
index 055fbc49bdcd7..5d31b5bbf12af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -22,7 +22,6 @@ import java.lang.reflect.Modifier
import scala.reflect.{classTag, ClassTag}
import scala.reflect.runtime.universe.TypeTag
-import org.apache.spark.annotation.{Evolving, Experimental}
import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
import org.apache.spark.sql.catalyst.expressions.{BoundReference, Cast}
@@ -30,13 +29,10 @@ import org.apache.spark.sql.catalyst.expressions.objects.{DecodeUsingSerializer,
import org.apache.spark.sql.types._
/**
- * :: Experimental ::
* Methods for creating an [[Encoder]].
*
* @since 1.6.0
*/
-@Experimental
-@Evolving
object Encoders {
/**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
index e79c0a4b62c4b..21d773d00a75a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
@@ -17,9 +17,6 @@
package org.apache.spark.sql.types
-import org.apache.spark.annotation.Evolving
-
-@Evolving
object ObjectType extends AbstractDataType {
override private[sql] def defaultConcreteType: DataType =
throw new UnsupportedOperationException(
@@ -36,7 +33,6 @@ object ObjectType extends AbstractDataType {
/**
* Represents a JVM object that is passing through Spark SQL expression evaluation.
*/
-@Evolving
case class ObjectType(cls: Class[_]) extends DataType {
override def defaultSize: Int = 4096
diff --git a/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java b/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
index 859b936acdf70..259328d319c8e 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
@@ -25,7 +25,6 @@
import org.apache.spark.sql.execution.aggregate.TypedSumLong;
/**
- * :: Experimental ::
* Type-safe functions available for {@link org.apache.spark.sql.Dataset} operations in Java.
*
* Scala users should use {@link org.apache.spark.sql.expressions.scalalang.typed}.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 23360df04594b..a2f5f03d797a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
import org.apache.commons.lang3.StringUtils
import org.apache.spark.{SparkException, TaskContext}
-import org.apache.spark.annotation.{DeveloperApi, Evolving, Experimental, Stable, Unstable}
+import org.apache.spark.annotation.{DeveloperApi, Stable, Unstable}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.java.function._
import org.apache.spark.api.python.{PythonRDD, SerDeUtil}
@@ -441,7 +441,6 @@ class Dataset[T] private[sql](
def toDF(): DataFrame = new Dataset[Row](sparkSession, queryExecution, RowEncoder(schema))
/**
- * :: Experimental ::
* Returns a new Dataset where each record has been mapped on to the specified type. The
* method used to map columns depend on the type of `U`:
* - When `U` is a class, fields for the class will be mapped to columns of the same name
@@ -461,8 +460,6 @@ class Dataset[T] private[sql](
* @group basic
* @since 1.6.0
*/
- @Experimental
- @Evolving
def as[U : Encoder]: Dataset[U] = Dataset[U](sparkSession, logicalPlan)
/**
@@ -597,7 +594,6 @@ class Dataset[T] private[sql](
* @group streaming
* @since 2.0.0
*/
- @Evolving
def isStreaming: Boolean = logicalPlan.isStreaming
/**
@@ -609,8 +605,6 @@ class Dataset[T] private[sql](
* @group basic
* @since 2.1.0
*/
- @Experimental
- @Evolving
def checkpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = true)
/**
@@ -622,8 +616,6 @@ class Dataset[T] private[sql](
* @group basic
* @since 2.1.0
*/
- @Experimental
- @Evolving
def checkpoint(eager: Boolean): Dataset[T] = checkpoint(eager = eager, reliableCheckpoint = true)
/**
@@ -635,8 +627,6 @@ class Dataset[T] private[sql](
* @group basic
* @since 2.3.0
*/
- @Experimental
- @Evolving
def localCheckpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = false)
/**
@@ -648,8 +638,6 @@ class Dataset[T] private[sql](
* @group basic
* @since 2.3.0
*/
- @Experimental
- @Evolving
def localCheckpoint(eager: Boolean): Dataset[T] = checkpoint(
eager = eager,
reliableCheckpoint = false
@@ -725,7 +713,6 @@ class Dataset[T] private[sql](
* @group streaming
* @since 2.1.0
*/
- @Evolving
// We only accept an existing column name, not a derived column here as a watermark that is
// defined on a derived column cannot referenced elsewhere in the plan.
def withWatermark(eventTime: String, delayThreshold: String): Dataset[T] = withTypedPlan {
@@ -1107,7 +1094,6 @@ class Dataset[T] private[sql](
}
/**
- * :: Experimental ::
* Joins this Dataset returning a `Tuple2` for each pair where `condition` evaluates to
* true.
*
@@ -1128,8 +1114,6 @@ class Dataset[T] private[sql](
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def joinWith[U](other: Dataset[U], condition: Column, joinType: String): Dataset[(T, U)] = {
// Creates a Join node and resolve it first, to get join condition resolved, self-join resolved,
// etc.
@@ -1203,7 +1187,6 @@ class Dataset[T] private[sql](
}
/**
- * :: Experimental ::
* Using inner equi-join to join this Dataset returning a `Tuple2` for each pair
* where `condition` evaluates to true.
*
@@ -1213,8 +1196,6 @@ class Dataset[T] private[sql](
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)] = {
joinWith(other, condition, "inner")
}
@@ -1462,7 +1443,6 @@ class Dataset[T] private[sql](
}
/**
- * :: Experimental ::
* Returns a new Dataset by computing the given [[Column]] expression for each element.
*
* {{{
@@ -1473,8 +1453,6 @@ class Dataset[T] private[sql](
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def select[U1](c1: TypedColumn[T, U1]): Dataset[U1] = {
implicit val encoder = c1.encoder
val project = Project(c1.withInputType(exprEnc, logicalPlan.output).named :: Nil, logicalPlan)
@@ -1501,26 +1479,20 @@ class Dataset[T] private[sql](
}
/**
- * :: Experimental ::
* Returns a new Dataset by computing the given [[Column]] expressions for each element.
*
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def select[U1, U2](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2]): Dataset[(U1, U2)] =
selectUntyped(c1, c2).asInstanceOf[Dataset[(U1, U2)]]
/**
- * :: Experimental ::
* Returns a new Dataset by computing the given [[Column]] expressions for each element.
*
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def select[U1, U2, U3](
c1: TypedColumn[T, U1],
c2: TypedColumn[T, U2],
@@ -1528,14 +1500,11 @@ class Dataset[T] private[sql](
selectUntyped(c1, c2, c3).asInstanceOf[Dataset[(U1, U2, U3)]]
/**
- * :: Experimental ::
* Returns a new Dataset by computing the given [[Column]] expressions for each element.
*
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def select[U1, U2, U3, U4](
c1: TypedColumn[T, U1],
c2: TypedColumn[T, U2],
@@ -1544,14 +1513,11 @@ class Dataset[T] private[sql](
selectUntyped(c1, c2, c3, c4).asInstanceOf[Dataset[(U1, U2, U3, U4)]]
/**
- * :: Experimental ::
* Returns a new Dataset by computing the given [[Column]] expressions for each element.
*
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def select[U1, U2, U3, U4, U5](
c1: TypedColumn[T, U1],
c2: TypedColumn[T, U2],
@@ -1713,7 +1679,6 @@ class Dataset[T] private[sql](
}
/**
- * :: Experimental ::
* (Scala-specific)
* Reduces the elements of this Dataset using the specified binary function. The given `func`
* must be commutative and associative or the result may be non-deterministic.
@@ -1721,14 +1686,11 @@ class Dataset[T] private[sql](
* @group action
* @since 1.6.0
*/
- @Experimental
- @Evolving
def reduce(func: (T, T) => T): T = withNewRDDExecutionId {
rdd.reduce(func)
}
/**
- * :: Experimental ::
* (Java-specific)
* Reduces the elements of this Dataset using the specified binary function. The given `func`
* must be commutative and associative or the result may be non-deterministic.
@@ -1736,20 +1698,15 @@ class Dataset[T] private[sql](
* @group action
* @since 1.6.0
*/
- @Experimental
- @Evolving
def reduce(func: ReduceFunction[T]): T = reduce(func.call(_, _))
/**
- * :: Experimental ::
* (Scala-specific)
* Returns a [[KeyValueGroupedDataset]] where the data is grouped by the given key `func`.
*
* @group typedrel
* @since 2.0.0
*/
- @Experimental
- @Evolving
def groupByKey[K: Encoder](func: T => K): KeyValueGroupedDataset[K, T] = {
val withGroupingKey = AppendColumns(func, logicalPlan)
val executed = sparkSession.sessionState.executePlan(withGroupingKey)
@@ -1763,15 +1720,12 @@ class Dataset[T] private[sql](
}
/**
- * :: Experimental ::
* (Java-specific)
* Returns a [[KeyValueGroupedDataset]] where the data is grouped by the given key `func`.
*
* @group typedrel
* @since 2.0.0
*/
- @Experimental
- @Evolving
def groupByKey[K](func: MapFunction[T, K], encoder: Encoder[K]): KeyValueGroupedDataset[K, T] =
groupByKey(func.call(_))(encoder)
@@ -2584,72 +2538,57 @@ class Dataset[T] private[sql](
def transform[U](t: Dataset[T] => Dataset[U]): Dataset[U] = t(this)
/**
- * :: Experimental ::
* (Scala-specific)
* Returns a new Dataset that only contains elements where `func` returns `true`.
*
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def filter(func: T => Boolean): Dataset[T] = {
withTypedPlan(TypedFilter(func, logicalPlan))
}
/**
- * :: Experimental ::
* (Java-specific)
* Returns a new Dataset that only contains elements where `func` returns `true`.
*
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def filter(func: FilterFunction[T]): Dataset[T] = {
withTypedPlan(TypedFilter(func, logicalPlan))
}
/**
- * :: Experimental ::
* (Scala-specific)
* Returns a new Dataset that contains the result of applying `func` to each element.
*
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def map[U : Encoder](func: T => U): Dataset[U] = withTypedPlan {
MapElements[T, U](func, logicalPlan)
}
/**
- * :: Experimental ::
* (Java-specific)
* Returns a new Dataset that contains the result of applying `func` to each element.
*
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def map[U](func: MapFunction[T, U], encoder: Encoder[U]): Dataset[U] = {
implicit val uEnc = encoder
withTypedPlan(MapElements[T, U](func, logicalPlan))
}
/**
- * :: Experimental ::
* (Scala-specific)
* Returns a new Dataset that contains the result of applying `func` to each partition.
*
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def mapPartitions[U : Encoder](func: Iterator[T] => Iterator[U]): Dataset[U] = {
new Dataset[U](
sparkSession,
@@ -2658,15 +2597,12 @@ class Dataset[T] private[sql](
}
/**
- * :: Experimental ::
* (Java-specific)
* Returns a new Dataset that contains the result of applying `f` to each partition.
*
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def mapPartitions[U](f: MapPartitionsFunction[T, U], encoder: Encoder[U]): Dataset[U] = {
val func: (Iterator[T]) => Iterator[U] = x => f.call(x.asJava).asScala
mapPartitions(func)(encoder)
@@ -2705,7 +2641,6 @@ class Dataset[T] private[sql](
}
/**
- * :: Experimental ::
* (Scala-specific)
* Returns a new Dataset by first applying a function to all elements of this Dataset,
* and then flattening the results.
@@ -2713,13 +2648,10 @@ class Dataset[T] private[sql](
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def flatMap[U : Encoder](func: T => TraversableOnce[U]): Dataset[U] =
mapPartitions(_.flatMap(func))
/**
- * :: Experimental ::
* (Java-specific)
* Returns a new Dataset by first applying a function to all elements of this Dataset,
* and then flattening the results.
@@ -2727,8 +2659,6 @@ class Dataset[T] private[sql](
* @group typedrel
* @since 1.6.0
*/
- @Experimental
- @Evolving
def flatMap[U](f: FlatMapFunction[T, U], encoder: Encoder[U]): Dataset[U] = {
val func: (T) => Iterator[U] = x => f.call(x).asScala
flatMap(func)(encoder)
@@ -3224,7 +3154,6 @@ class Dataset[T] private[sql](
* @group basic
* @since 2.0.0
*/
- @Evolving
def writeStream: DataStreamWriter[T] = {
if (!isStreaming) {
logicalPlan.failAnalysis(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
index a0b0a34a01073..27012c471462d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
@@ -17,8 +17,6 @@
package org.apache.spark.sql
-import org.apache.spark.annotation.Evolving
-
/**
* The abstract class for writing custom logic to process data generated by a query.
* This is often used to write the output of a streaming query to arbitrary storage systems.
@@ -101,7 +99,6 @@ import org.apache.spark.annotation.Evolving
*
* @since 2.0.0
*/
-@Evolving
abstract class ForeachWriter[T] extends Serializable {
// TODO: Move this to org.apache.spark.sql.util or consolidate this with batch API.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index 0da52d432d25d..89cc9735e4f6a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql
import scala.collection.JavaConverters._
-import org.apache.spark.annotation.{Evolving, Experimental}
import org.apache.spark.api.java.function._
import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CreateStruct}
@@ -30,15 +29,12 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout, OutputMode}
/**
- * :: Experimental ::
* A [[Dataset]] has been logically grouped by a user specified grouping key. Users should not
* construct a [[KeyValueGroupedDataset]] directly, but should instead call `groupByKey` on
* an existing [[Dataset]].
*
* @since 2.0.0
*/
-@Experimental
-@Evolving
class KeyValueGroupedDataset[K, V] private[sql](
kEncoder: Encoder[K],
vEncoder: Encoder[V],
@@ -221,7 +217,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
}
/**
- * ::Experimental::
* (Scala-specific)
* Applies the given function to each group of data, while maintaining a user-defined per-group
* state. The result Dataset will represent the objects returned by the function.
@@ -237,8 +232,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
* See [[Encoder]] for more details on what types are encodable to Spark SQL.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def mapGroupsWithState[S: Encoder, U: Encoder](
func: (K, Iterator[V], GroupState[S]) => U): Dataset[U] = {
val flatMapFunc = (key: K, it: Iterator[V], s: GroupState[S]) => Iterator(func(key, it, s))
@@ -255,7 +248,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
}
/**
- * ::Experimental::
* (Scala-specific)
* Applies the given function to each group of data, while maintaining a user-defined per-group
* state. The result Dataset will represent the objects returned by the function.
@@ -272,8 +264,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
* See [[Encoder]] for more details on what types are encodable to Spark SQL.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def mapGroupsWithState[S: Encoder, U: Encoder](
timeoutConf: GroupStateTimeout)(
func: (K, Iterator[V], GroupState[S]) => U): Dataset[U] = {
@@ -291,7 +281,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
}
/**
- * ::Experimental::
* (Java-specific)
* Applies the given function to each group of data, while maintaining a user-defined per-group
* state. The result Dataset will represent the objects returned by the function.
@@ -309,8 +298,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
* See [[Encoder]] for more details on what types are encodable to Spark SQL.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def mapGroupsWithState[S, U](
func: MapGroupsWithStateFunction[K, V, S, U],
stateEncoder: Encoder[S],
@@ -321,7 +308,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
}
/**
- * ::Experimental::
* (Java-specific)
* Applies the given function to each group of data, while maintaining a user-defined per-group
* state. The result Dataset will represent the objects returned by the function.
@@ -340,8 +326,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
* See [[Encoder]] for more details on what types are encodable to Spark SQL.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def mapGroupsWithState[S, U](
func: MapGroupsWithStateFunction[K, V, S, U],
stateEncoder: Encoder[S],
@@ -353,7 +337,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
}
/**
- * ::Experimental::
* (Scala-specific)
* Applies the given function to each group of data, while maintaining a user-defined per-group
* state. The result Dataset will represent the objects returned by the function.
@@ -371,8 +354,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
* See [[Encoder]] for more details on what types are encodable to Spark SQL.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def flatMapGroupsWithState[S: Encoder, U: Encoder](
outputMode: OutputMode,
timeoutConf: GroupStateTimeout)(
@@ -393,7 +374,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
}
/**
- * ::Experimental::
* (Java-specific)
* Applies the given function to each group of data, while maintaining a user-defined per-group
* state. The result Dataset will represent the objects returned by the function.
@@ -413,8 +393,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
* See [[Encoder]] for more details on what types are encodable to Spark SQL.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def flatMapGroupsWithState[S, U](
func: FlatMapGroupsWithStateFunction[K, V, S, U],
outputMode: OutputMode,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 08b7521de9573..45d0bd4122535 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -23,7 +23,7 @@ import scala.collection.immutable
import scala.reflect.runtime.universe.TypeTag
import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.annotation._
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Stable, Unstable}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config.ConfigEntry
@@ -85,8 +85,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
* An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s
* that listen for execution metrics.
*/
- @Experimental
- @Evolving
def listenerManager: ExecutionListenerManager = sparkSession.listenerManager
/**
@@ -231,7 +229,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
// scalastyle:off
// Disable style checker so "implicits" object can start with lowercase i
/**
- * :: Experimental ::
* (Scala-specific) Implicit methods available in Scala for converting
* common Scala objects into `DataFrame`s.
*
@@ -243,35 +240,27 @@ class SQLContext private[sql](val sparkSession: SparkSession)
* @group basic
* @since 1.3.0
*/
- @Experimental
- @Evolving
object implicits extends SQLImplicits with Serializable {
protected override def _sqlContext: SQLContext = self
}
// scalastyle:on
/**
- * :: Experimental ::
* Creates a DataFrame from an RDD of Product (e.g. case classes, tuples).
*
* @group dataframes
* @since 1.3.0
*/
- @Experimental
- @Evolving
def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = {
sparkSession.createDataFrame(rdd)
}
/**
- * :: Experimental ::
* Creates a DataFrame from a local Seq of Product.
*
* @group dataframes
* @since 1.3.0
*/
- @Experimental
- @Evolving
def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = {
sparkSession.createDataFrame(data)
}
@@ -319,13 +308,11 @@ class SQLContext private[sql](val sparkSession: SparkSession)
* @since 1.3.0
*/
@DeveloperApi
- @Evolving
def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = {
sparkSession.createDataFrame(rowRDD, schema)
}
/**
- * :: Experimental ::
* Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
* encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
* that is generally created automatically through implicits from a `SparkSession`, or can be
@@ -353,14 +340,11 @@ class SQLContext private[sql](val sparkSession: SparkSession)
* @since 2.0.0
* @group dataset
*/
- @Experimental
- @Evolving
def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = {
sparkSession.createDataset(data)
}
/**
- * :: Experimental ::
* Creates a [[Dataset]] from an RDD of a given type. This method requires an
* encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
* that is generally created automatically through implicits from a `SparkSession`, or can be
@@ -369,13 +353,11 @@ class SQLContext private[sql](val sparkSession: SparkSession)
* @since 2.0.0
* @group dataset
*/
- @Experimental
def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = {
sparkSession.createDataset(data)
}
/**
- * :: Experimental ::
* Creates a [[Dataset]] from a `java.util.List` of a given type. This method requires an
* encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
* that is generally created automatically through implicits from a `SparkSession`, or can be
@@ -391,8 +373,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
* @since 2.0.0
* @group dataset
*/
- @Experimental
- @Evolving
def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = {
sparkSession.createDataset(data)
}
@@ -419,7 +399,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
* @since 1.3.0
*/
@DeveloperApi
- @Evolving
def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
sparkSession.createDataFrame(rowRDD, schema)
}
@@ -434,7 +413,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
* @since 1.6.0
*/
@DeveloperApi
- @Evolving
def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = {
sparkSession.createDataFrame(rows, schema)
}
@@ -498,7 +476,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
*
* @since 2.0.0
*/
- @Evolving
def readStream: DataStreamReader = sparkSession.readStream
@@ -614,45 +591,35 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * :: Experimental ::
* Creates a `DataFrame` with a single `LongType` column named `id`, containing elements
* in a range from 0 to `end` (exclusive) with step value 1.
*
* @since 1.4.1
* @group dataframe
*/
- @Experimental
- @Evolving
def range(end: Long): DataFrame = sparkSession.range(end).toDF()
/**
- * :: Experimental ::
* Creates a `DataFrame` with a single `LongType` column named `id`, containing elements
* in a range from `start` to `end` (exclusive) with step value 1.
*
* @since 1.4.0
* @group dataframe
*/
- @Experimental
- @Evolving
def range(start: Long, end: Long): DataFrame = sparkSession.range(start, end).toDF()
/**
- * :: Experimental ::
* Creates a `DataFrame` with a single `LongType` column named `id`, containing elements
* in a range from `start` to `end` (exclusive) with a step value.
*
* @since 2.0.0
* @group dataframe
*/
- @Experimental
- @Evolving
def range(start: Long, end: Long, step: Long): DataFrame = {
sparkSession.range(start, end, step).toDF()
}
/**
- * :: Experimental ::
* Creates a `DataFrame` with a single `LongType` column named `id`, containing elements
* in an range from `start` to `end` (exclusive) with an step value, with partition number
* specified.
@@ -660,8 +627,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
* @since 1.4.0
* @group dataframe
*/
- @Experimental
- @Evolving
def range(start: Long, end: Long, step: Long, numPartitions: Int): DataFrame = {
sparkSession.range(start, end, step, numPartitions).toDF()
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index c997b7d8e0bfb..71cbc3ab14d97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -21,7 +21,6 @@ import scala.collection.Map
import scala.language.implicitConversions
import scala.reflect.runtime.universe.TypeTag
-import org.apache.spark.annotation.Evolving
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -30,7 +29,6 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
*
* @since 1.6.0
*/
-@Evolving
abstract class SQLImplicits extends LowPrioritySQLImplicits {
protected def _sqlContext: SQLContext
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index e0d0062e976c1..bd2bc1c0ad5d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -22,18 +22,16 @@ import java.util.concurrent.TimeUnit._
import java.util.concurrent.atomic.AtomicReference
import scala.collection.JavaConverters._
-import scala.collection.mutable
import scala.reflect.runtime.universe.TypeTag
import scala.util.control.NonFatal
import org.apache.spark.{SPARK_VERSION, SparkConf, SparkContext, TaskContext}
-import org.apache.spark.annotation.{DeveloperApi, Evolving, Experimental, Stable, Unstable}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Stable, Unstable}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
import org.apache.spark.sql.catalog.Catalog
-import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Catalogs}
import org.apache.spark.sql.catalyst._
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.encoders._
@@ -182,14 +180,11 @@ class SparkSession private(
@transient lazy val conf: RuntimeConfig = new RuntimeConfig(sessionState.conf)
/**
- * :: Experimental ::
* An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s
* that listen for execution metrics.
*
* @since 2.0.0
*/
- @Experimental
- @Evolving
def listenerManager: ExecutionListenerManager = sessionState.listenerManager
/**
@@ -227,13 +222,11 @@ class SparkSession private(
def udf: UDFRegistration = sessionState.udfRegistration
/**
- * :: Experimental ::
* Returns a `StreamingQueryManager` that allows managing all the
* `StreamingQuery`s active on `this`.
*
* @since 2.0.0
*/
- @Experimental
@Unstable
def streams: StreamingQueryManager = sessionState.streamingQueryManager
@@ -286,26 +279,20 @@ class SparkSession private(
}
/**
- * :: Experimental ::
* Creates a new [[Dataset]] of type T containing zero elements.
*
* @return 2.0.0
*/
- @Experimental
- @Evolving
def emptyDataset[T: Encoder]: Dataset[T] = {
val encoder = implicitly[Encoder[T]]
new Dataset(self, LocalRelation(encoder.schema.toAttributes), encoder)
}
/**
- * :: Experimental ::
* Creates a `DataFrame` from an RDD of Product (e.g. case classes, tuples).
*
* @since 2.0.0
*/
- @Experimental
- @Evolving
def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = {
SparkSession.setActiveSession(this)
val encoder = Encoders.product[A]
@@ -313,13 +300,10 @@ class SparkSession private(
}
/**
- * :: Experimental ::
* Creates a `DataFrame` from a local Seq of Product.
*
* @since 2.0.0
*/
- @Experimental
- @Evolving
def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = {
SparkSession.setActiveSession(this)
val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
@@ -359,7 +343,6 @@ class SparkSession private(
* @since 2.0.0
*/
@DeveloperApi
- @Evolving
def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = {
// TODO: use MutableProjection when rowRDD is another DataFrame and the applied
// schema differs from the existing schema on any field data type.
@@ -377,7 +360,6 @@ class SparkSession private(
* @since 2.0.0
*/
@DeveloperApi
- @Evolving
def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
createDataFrame(rowRDD.rdd, schema)
}
@@ -391,7 +373,6 @@ class SparkSession private(
* @since 2.0.0
*/
@DeveloperApi
- @Evolving
def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = {
Dataset.ofRows(self, LocalRelation.fromExternalRows(schema.toAttributes, rows.asScala))
}
@@ -453,7 +434,6 @@ class SparkSession private(
* ------------------------------- */
/**
- * :: Experimental ::
* Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
* encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
* that is generally created automatically through implicits from a `SparkSession`, or can be
@@ -480,8 +460,6 @@ class SparkSession private(
*
* @since 2.0.0
*/
- @Experimental
- @Evolving
def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = {
val enc = encoderFor[T]
val attributes = enc.schema.toAttributes
@@ -491,7 +469,6 @@ class SparkSession private(
}
/**
- * :: Experimental ::
* Creates a [[Dataset]] from an RDD of a given type. This method requires an
* encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
* that is generally created automatically through implicits from a `SparkSession`, or can be
@@ -499,14 +476,11 @@ class SparkSession private(
*
* @since 2.0.0
*/
- @Experimental
- @Evolving
def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = {
Dataset[T](self, ExternalRDD(data, self))
}
/**
- * :: Experimental ::
* Creates a [[Dataset]] from a `java.util.List` of a given type. This method requires an
* encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
* that is generally created automatically through implicits from a `SparkSession`, or can be
@@ -521,59 +495,45 @@ class SparkSession private(
*
* @since 2.0.0
*/
- @Experimental
- @Evolving
def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = {
createDataset(data.asScala)
}
/**
- * :: Experimental ::
* Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
* in a range from 0 to `end` (exclusive) with step value 1.
*
* @since 2.0.0
*/
- @Experimental
- @Evolving
def range(end: Long): Dataset[java.lang.Long] = range(0, end)
/**
- * :: Experimental ::
* Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
* in a range from `start` to `end` (exclusive) with step value 1.
*
* @since 2.0.0
*/
- @Experimental
- @Evolving
def range(start: Long, end: Long): Dataset[java.lang.Long] = {
range(start, end, step = 1, numPartitions = sparkContext.defaultParallelism)
}
/**
- * :: Experimental ::
* Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
* in a range from `start` to `end` (exclusive) with a step value.
*
* @since 2.0.0
*/
- @Experimental
- @Evolving
def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = {
range(start, end, step, numPartitions = sparkContext.defaultParallelism)
}
/**
- * :: Experimental ::
* Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
* in a range from `start` to `end` (exclusive) with a step value, with partition number
* specified.
*
* @since 2.0.0
*/
- @Experimental
- @Evolving
def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[java.lang.Long] = {
new Dataset(self, Range(start, end, step, numPartitions), Encoders.LONG)
}
@@ -668,7 +628,6 @@ class SparkSession private(
*
* @since 2.0.0
*/
- @Evolving
def readStream: DataStreamReader = new DataStreamReader(self)
/**
@@ -690,7 +649,6 @@ class SparkSession private(
// scalastyle:off
// Disable style checker so "implicits" object can start with lowercase i
/**
- * :: Experimental ::
* (Scala-specific) Implicit methods available in Scala for converting
* common Scala objects into `DataFrame`s.
*
@@ -701,8 +659,6 @@ class SparkSession private(
*
* @since 2.0.0
*/
- @Experimental
- @Evolving
object implicits extends SQLImplicits with Serializable {
protected override def _sqlContext: SQLContext = SparkSession.this.sqlContext
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index 44668610d8052..60738e6d4ef9e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -223,7 +223,6 @@ abstract class Catalog {
}
/**
- * :: Experimental ::
* Creates a table from the given path and returns the corresponding DataFrame.
* It will use the default data source configured by spark.sql.sources.default.
*
@@ -232,8 +231,6 @@ abstract class Catalog {
* the current database.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def createTable(tableName: String, path: String): DataFrame
/**
@@ -251,7 +248,6 @@ abstract class Catalog {
}
/**
- * :: Experimental ::
* Creates a table from the given path based on a data source and returns the corresponding
* DataFrame.
*
@@ -260,8 +256,6 @@ abstract class Catalog {
* the current database.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def createTable(tableName: String, path: String, source: String): DataFrame
/**
@@ -282,7 +276,6 @@ abstract class Catalog {
}
/**
- * :: Experimental ::
* Creates a table based on the dataset in a data source and a set of options.
* Then, returns the corresponding DataFrame.
*
@@ -291,8 +284,6 @@ abstract class Catalog {
* the current database.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def createTable(
tableName: String,
source: String,
@@ -319,7 +310,6 @@ abstract class Catalog {
}
/**
- * :: Experimental ::
* (Scala-specific)
* Creates a table based on the dataset in a data source and a set of options.
* Then, returns the corresponding DataFrame.
@@ -329,15 +319,12 @@ abstract class Catalog {
* the current database.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def createTable(
tableName: String,
source: String,
options: Map[String, String]): DataFrame
/**
- * :: Experimental ::
* Create a table from the given path based on a data source, a schema and a set of options.
* Then, returns the corresponding DataFrame.
*
@@ -356,7 +343,6 @@ abstract class Catalog {
}
/**
- * :: Experimental ::
* Create a table based on the dataset in a data source, a schema and a set of options.
* Then, returns the corresponding DataFrame.
*
@@ -365,8 +351,6 @@ abstract class Catalog {
* the current database.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def createTable(
tableName: String,
source: String,
@@ -395,7 +379,6 @@ abstract class Catalog {
}
/**
- * :: Experimental ::
* (Scala-specific)
* Create a table based on the dataset in a data source, a schema and a set of options.
* Then, returns the corresponding DataFrame.
@@ -405,8 +388,6 @@ abstract class Catalog {
* the current database.
* @since 2.2.0
*/
- @Experimental
- @Evolving
def createTable(
tableName: String,
source: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
index 6b4def35e1955..878dc0d83f45a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
@@ -17,14 +17,12 @@
package org.apache.spark.sql.expressions
-import org.apache.spark.annotation.{Evolving, Experimental}
import org.apache.spark.sql.{Encoder, TypedColumn}
import org.apache.spark.sql.catalyst.encoders.encoderFor
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete}
import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
/**
- * :: Experimental ::
* A base class for user-defined aggregations, which can be used in `Dataset` operations to take
* all of the elements of a group and reduce them to a single value.
*
@@ -50,8 +48,6 @@ import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
* @tparam OUT The type of the final output result.
* @since 1.6.0
*/
-@Experimental
-@Evolving
abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
/**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
index da7ed69e48390..f7591e4d265e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
@@ -21,7 +21,6 @@ import org.apache.spark.sql._
import org.apache.spark.sql.execution.aggregate._
/**
- * :: Experimental ::
* Type-safe functions available for `Dataset` operations in Scala.
*
* Java users should use [[org.apache.spark.sql.expressions.javalang.typed]].
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 2369c341762cf..831c19bbe12c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -17,9 +17,8 @@
package org.apache.spark.sql.internal
import org.apache.spark.SparkConf
-import org.apache.spark.annotation.{Experimental, Unstable}
+import org.apache.spark.annotation.Unstable
import org.apache.spark.sql.{ExperimentalMethods, SparkSession, UDFRegistration, _}
-import org.apache.spark.sql.catalog.v2.CatalogPlugin
import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
import org.apache.spark.sql.catalyst.catalog.SessionCatalog
import org.apache.spark.sql.catalyst.optimizer.Optimizer
@@ -52,7 +51,6 @@ import org.apache.spark.sql.util.ExecutionListenerManager
* state will clone the parent sessions state's `conf`, `functionRegistry`, `experimentalMethods`
* and `catalog` fields. Note that the state is cloned when `build` is called, and not before.
*/
-@Experimental
@Unstable
abstract class BaseSessionStateBuilder(
val session: SparkSession,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index baa8ebee66108..3740b56cb9cbb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.internal
import scala.reflect.runtime.universe.TypeTag
import scala.util.control.NonFatal
-import org.apache.spark.annotation.Experimental
import org.apache.spark.sql._
import org.apache.spark.sql.catalog.{Catalog, Column, Database, Function, Table}
import org.apache.spark.sql.catalyst.{DefinedByConstructorParams, FunctionIdentifier, TableIdentifier}
@@ -277,34 +276,29 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
}
/**
- * :: Experimental ::
* Creates a table from the given path and returns the corresponding DataFrame.
* It will use the default data source configured by spark.sql.sources.default.
*
* @group ddl_ops
* @since 2.2.0
*/
- @Experimental
override def createTable(tableName: String, path: String): DataFrame = {
val dataSourceName = sparkSession.sessionState.conf.defaultDataSourceName
createTable(tableName, path, dataSourceName)
}
/**
- * :: Experimental ::
* Creates a table from the given path and returns the corresponding
* DataFrame.
*
* @group ddl_ops
* @since 2.2.0
*/
- @Experimental
override def createTable(tableName: String, path: String, source: String): DataFrame = {
createTable(tableName, source, Map("path" -> path))
}
/**
- * :: Experimental ::
* (Scala-specific)
* Creates a table based on the dataset in a data source and a set of options.
* Then, returns the corresponding DataFrame.
@@ -312,7 +306,6 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
* @group ddl_ops
* @since 2.2.0
*/
- @Experimental
override def createTable(
tableName: String,
source: String,
@@ -321,7 +314,6 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
}
/**
- * :: Experimental ::
* (Scala-specific)
* Creates a table based on the dataset in a data source, a schema and a set of options.
* Then, returns the corresponding DataFrame.
@@ -329,7 +321,6 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
* @group ddl_ops
* @since 2.2.0
*/
- @Experimental
override def createTable(
tableName: String,
source: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index f1cde4fbf090b..a83a0f51ecf11 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -22,8 +22,7 @@ import java.io.File
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
-import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{Experimental, Unstable}
+import org.apache.spark.annotation.Unstable
import org.apache.spark.sql._
import org.apache.spark.sql.catalog.v2.CatalogManager
import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
@@ -127,7 +126,6 @@ private[sql] object SessionState {
/**
* Concrete implementation of a [[BaseSessionStateBuilder]].
*/
-@Experimental
@Unstable
class SessionStateBuilder(
session: SparkSession,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 230b43022b02b..a0c6d20f36451 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -21,7 +21,7 @@ import java.sql.{Connection, Date, Timestamp}
import org.apache.commons.lang3.StringUtils
-import org.apache.spark.annotation.{DeveloperApi, Evolving, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.sql.types._
/**
@@ -33,7 +33,6 @@ import org.apache.spark.sql.types._
* send a null value to the database.
*/
@DeveloperApi
-@Evolving
case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int)
/**
@@ -56,7 +55,6 @@ case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int)
* for the given Catalyst type.
*/
@DeveloperApi
-@Evolving
abstract class JdbcDialect extends Serializable {
/**
* Check if this dialect instance can handle a certain jdbc url.
@@ -196,7 +194,6 @@ abstract class JdbcDialect extends Serializable {
* sure to register your dialects first.
*/
@DeveloperApi
-@Evolving
object JdbcDialects {
/**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 6ad054c9f6403..63e57c6804e16 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -17,10 +17,9 @@
package org.apache.spark.sql.sources
-import org.apache.spark.annotation._
+import org.apache.spark.annotation.{Stable, Unstable}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.execution.streaming.{Sink, Source}
import org.apache.spark.sql.streaming.OutputMode
@@ -116,7 +115,6 @@ trait SchemaRelationProvider {
*
* @since 2.0.0
*/
-@Experimental
@Unstable
trait StreamSourceProvider {
@@ -147,7 +145,6 @@ trait StreamSourceProvider {
*
* @since 2.0.0
*/
-@Experimental
@Unstable
trait StreamSinkProvider {
def createSink(
@@ -308,7 +305,6 @@ trait InsertableRelation {
*
* @since 1.3.0
*/
-@Experimental
@Unstable
trait CatalystScan {
def buildScan(requiredColumns: Seq[Attribute], filters: Seq[Expression]): RDD[Row]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
index 2da8469a0041c..f1fe472afdc2a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.util
import scala.collection.JavaConverters._
-import org.apache.spark.annotation.{DeveloperApi, Evolving, Experimental}
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.internal.Logging
import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
import org.apache.spark.sql.SparkSession
@@ -29,14 +29,11 @@ import org.apache.spark.sql.internal.StaticSQLConf._
import org.apache.spark.util.{ListenerBus, Utils}
/**
- * :: Experimental ::
* The interface of query execution listener that can be used to analyze execution metrics.
*
* @note Implementations should guarantee thread-safety as they can be invoked by
* multiple different threads.
*/
-@Experimental
-@Evolving
trait QueryExecutionListener {
/**
@@ -68,12 +65,8 @@ trait QueryExecutionListener {
/**
- * :: Experimental ::
- *
* Manager for [[QueryExecutionListener]]. See `org.apache.spark.sql.SQLContext.listenerManager`.
*/
-@Experimental
-@Evolving
// The `session` is used to indicate which session carries this listener manager, and we only
// catch SQL executions which are launched by the same session.
// The `loadExtensions` flag is used to indicate whether we should load the pre-defined,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index d68395812a74c..a143c6f77d55d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -17,9 +17,8 @@
package org.apache.spark.sql.hive
-import org.apache.spark.annotation.{Experimental, Unstable}
+import org.apache.spark.annotation.Unstable
import org.apache.spark.sql._
-import org.apache.spark.sql.catalog.v2.CatalogPlugin
import org.apache.spark.sql.catalyst.analysis.Analyzer
import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -34,7 +33,6 @@ import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLo
/**
* Builder that produces a Hive-aware `SessionState`.
*/
-@Experimental
@Unstable
class HiveSessionStateBuilder(session: SparkSession, parentState: Option[SessionState] = None)
extends BaseSessionStateBuilder(session, parentState) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 48913eaa4a8bf..589dd877c8c97 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -34,7 +34,7 @@ import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark._
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.input.FixedLengthBinaryInputFormat
import org.apache.spark.internal.Logging
@@ -772,11 +772,8 @@ object StreamingContext extends Logging {
}
/**
- * :: Experimental ::
- *
* Get the currently active context, if there is one. Active means started but not stopped.
*/
- @Experimental
def getActive(): Option[StreamingContext] = {
ACTIVATION_LOCK.synchronized {
Option(activeContext.get())
@@ -784,13 +781,10 @@ object StreamingContext extends Logging {
}
/**
- * :: Experimental ::
- *
* Either return the "active" StreamingContext (that is, started but not stopped), or create a
* new StreamingContext that is
* @param creatingFunc Function to create a new StreamingContext
*/
- @Experimental
def getActiveOrCreate(creatingFunc: () => StreamingContext): StreamingContext = {
ACTIVATION_LOCK.synchronized {
getActive().getOrElse { creatingFunc() }
@@ -798,8 +792,6 @@ object StreamingContext extends Logging {
}
/**
- * :: Experimental ::
- *
* Either get the currently active StreamingContext (that is, started but not stopped),
* OR recreate a StreamingContext from checkpoint data in the given path. If checkpoint data
* does not exist in the provided, then create a new StreamingContext by calling the provided
@@ -813,7 +805,6 @@ object StreamingContext extends Logging {
* error in reading checkpoint data. By default, an exception will be
* thrown on error.
*/
- @Experimental
def getActiveOrCreate(
checkpointPath: String,
creatingFunc: () => StreamingContext,
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaMapWithStateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaMapWithStateDStream.scala
index 16c0d6fff8229..bf169ba6ed85d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaMapWithStateDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaMapWithStateDStream.scala
@@ -17,12 +17,10 @@
package org.apache.spark.streaming.api.java
-import org.apache.spark.annotation.Experimental
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.streaming.dstream.MapWithStateDStream
/**
- * :: Experimental ::
* DStream representing the stream of data generated by `mapWithState` operation on a
* [[JavaPairDStream]]. Additionally, it also gives access to the
* stream of state snapshots, that is, the state data of all keys after a batch has updated them.
@@ -32,7 +30,6 @@ import org.apache.spark.streaming.dstream.MapWithStateDStream
* @tparam StateType Class of the state data
* @tparam MappedType Class of the mapped data
*/
-@Experimental
class JavaMapWithStateDStream[KeyType, ValueType, StateType, MappedType] private[streaming](
dstream: MapWithStateDStream[KeyType, ValueType, StateType, MappedType])
extends JavaDStream[MappedType](dstream)(JavaSparkContext.fakeClassTag) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index c3c13df651ccd..3f88fe0817c57 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -30,7 +30,6 @@ import org.apache.hadoop.mapred.{JobConf, OutputFormat}
import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
import org.apache.spark.Partitioner
-import org.apache.spark.annotation.Experimental
import org.apache.spark.api.java.{JavaPairRDD, JavaSparkContext, JavaUtils, Optional}
import org.apache.spark.api.java.JavaPairRDD._
import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
@@ -431,7 +430,6 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
}
/**
- * :: Experimental ::
* Return a [[JavaMapWithStateDStream]] by applying a function to every key-value element of
* `this` stream, while maintaining some state data for each unique key. The mapping function
* and other specification (e.g. partitioners, timeouts, initial state data, etc.) of this
@@ -458,7 +456,6 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
* @tparam StateType Class type of the state data
* @tparam MappedType Class type of the mapped data
*/
- @Experimental
def mapWithState[StateType, MappedType](spec: StateSpec[K, V, StateType, MappedType]):
JavaMapWithStateDStream[K, V, StateType, MappedType] = {
new JavaMapWithStateDStream(dstream.mapWithState(spec)(
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala
index 9512db7d7d757..3368382a55297 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala
@@ -20,7 +20,6 @@ package org.apache.spark.streaming.dstream
import scala.reflect.ClassTag
import org.apache.spark._
-import org.apache.spark.annotation.Experimental
import org.apache.spark.rdd.{EmptyRDD, RDD}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming._
@@ -28,7 +27,6 @@ import org.apache.spark.streaming.dstream.InternalMapWithStateDStream._
import org.apache.spark.streaming.rdd.{MapWithStateRDD, MapWithStateRDDRecord}
/**
- * :: Experimental ::
* DStream representing the stream of data generated by `mapWithState` operation on a
* [[org.apache.spark.streaming.dstream.PairDStreamFunctions pair DStream]].
* Additionally, it also gives access to the stream of state snapshots, that is, the state data of
@@ -39,7 +37,6 @@ import org.apache.spark.streaming.rdd.{MapWithStateRDD, MapWithStateRDDRecord}
* @tparam StateType Class of the state data
* @tparam MappedType Class of the mapped data
*/
-@Experimental
sealed abstract class MapWithStateDStream[KeyType, ValueType, StateType, MappedType: ClassTag](
ssc: StreamingContext) extends DStream[MappedType](ssc) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index dcb51d72fa588..a5bed752bd663 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -25,7 +25,6 @@ import org.apache.hadoop.mapred.{JobConf, OutputFormat}
import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
import org.apache.spark.{HashPartitioner, Partitioner}
-import org.apache.spark.annotation.Experimental
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming._
import org.apache.spark.streaming.StreamingContext.rddToFileName
@@ -352,7 +351,6 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)])
}
/**
- * :: Experimental ::
* Return a [[MapWithStateDStream]] by applying a function to every key-value element of
* `this` stream, while maintaining some state data for each unique key. The mapping function
* and other specification (e.g. partitioners, timeouts, initial state data, etc.) of this
@@ -376,7 +374,6 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)])
* @tparam StateType Class type of the state data
* @tparam MappedType Class type of the mapped data
*/
- @Experimental
def mapWithState[StateType: ClassTag, MappedType: ClassTag](
spec: StateSpec[K, V, StateType, MappedType]
): MapWithStateDStream[K, V, StateType, MappedType] = {