diff --git a/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala b/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala index 99ec78633ab75..cb1e6cf0497cc 100644 --- a/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala +++ b/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala @@ -19,8 +19,6 @@ package org.apache.spark.metrics.source import com.codahale.metrics.MetricRegistry -import org.apache.spark.annotation.Experimental - private[spark] object StaticSources { /** * The set of all static sources. These sources may be reported to from any class, including @@ -30,10 +28,8 @@ private[spark] object StaticSources { } /** - * :: Experimental :: * Metrics for code generation. */ -@Experimental object CodegenMetrics extends Source { override val sourceName: String = "CodeGenerator" override val metricRegistry: MetricRegistry = new MetricRegistry() @@ -62,10 +58,8 @@ object CodegenMetrics extends Source { } /** - * :: Experimental :: * Metrics for access to the hive external catalog. */ -@Experimental object HiveCatalogMetrics extends Source { override val sourceName: String = "HiveExternalCatalog" override val metricRegistry: MetricRegistry = new MetricRegistry() diff --git a/core/src/main/scala/org/apache/spark/partial/package.scala b/core/src/main/scala/org/apache/spark/partial/package.scala index 62dc5cd25a164..d9e39cbe8d24a 100644 --- a/core/src/main/scala/org/apache/spark/partial/package.scala +++ b/core/src/main/scala/org/apache/spark/partial/package.scala @@ -18,8 +18,6 @@ package org.apache.spark /** - * :: Experimental :: - * * Support for approximate results. This provides convenient api and also implementation for * approximate calculation. * diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala index 7f8064f01ec45..e23133682360f 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala @@ -34,7 +34,6 @@ import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob, OutputFormat => NewO import org.apache.spark._ import org.apache.spark.Partitioner.defaultPartitioner -import org.apache.spark.annotation.Experimental import org.apache.spark.internal.Logging import org.apache.spark.internal.config.SPECULATION_ENABLED import org.apache.spark.internal.io._ @@ -52,7 +51,6 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) extends Logging with Serializable { /** - * :: Experimental :: * Generic function to combine the elements for each key using a custom set of aggregation * functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)], for a "combined type" C * @@ -68,7 +66,6 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * @note V and C can be different -- for example, one might group an RDD of type * (Int, Int) into an RDD of type (Int, Seq[Int]). */ - @Experimental def combineByKeyWithClassTag[C]( createCombiner: V => C, mergeValue: (C, V) => C, @@ -136,10 +133,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) } /** - * :: Experimental :: * Simplified version of combineByKeyWithClassTag that hash-partitions the output RDD. */ - @Experimental def combineByKeyWithClassTag[C]( createCombiner: V => C, mergeValue: (C, V) => C, @@ -616,11 +611,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) } /** - * :: Experimental :: * Simplified version of combineByKeyWithClassTag that hash-partitions the resulting RDD using the * existing partitioner/parallelism level. */ - @Experimental def combineByKeyWithClassTag[C]( createCombiner: V => C, mergeValue: (C, V) => C, diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala index af0752e569ea5..9d5c4d22ea91e 100755 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql -import org.apache.spark.annotation.Experimental - package object avro { /** @@ -31,7 +29,6 @@ package object avro { * * @since 2.4.0 */ - @Experimental @deprecated("Please use 'org.apache.spark.sql.avro.functions.from_avro' instead.", "3.0.0") def from_avro( data: Column, @@ -45,7 +42,6 @@ package object avro { * * @since 2.4.0 */ - @Experimental @deprecated("Please use 'org.apache.spark.sql.avro.functions.to_avro' instead.", "3.0.0") def to_avro(data: Column): Column = org.apache.spark.sql.avro.functions.to_avro(data) } diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala index d4a428f45c110..608da0b8bf563 100644 --- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala +++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala @@ -22,7 +22,6 @@ import scala.reflect.ClassTag import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.model.Record -import org.apache.spark.annotation.Evolving import org.apache.spark.rdd.RDD import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.streaming.{Duration, StreamingContext, Time} @@ -84,14 +83,12 @@ private[kinesis] class KinesisInputDStream[T: ClassTag]( } } -@Evolving object KinesisInputDStream { /** * Builder for [[KinesisInputDStream]] instances. * * @since 2.2.0 */ - @Evolving class Builder { // Required params private var streamingContext: Option[StreamingContext] = None diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala index 7488971e61634..e821adca20d27 100644 --- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala +++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala @@ -19,7 +19,6 @@ package org.apache.spark.streaming.kinesis import com.amazonaws.auth._ -import org.apache.spark.annotation.Evolving import org.apache.spark.internal.Logging /** @@ -83,14 +82,12 @@ private[kinesis] final case class STSCredentials( } } -@Evolving object SparkAWSCredentials { /** * Builder for [[SparkAWSCredentials]] instances. * * @since 2.2.0 */ - @Evolving class Builder { private var basicCreds: Option[BasicCredentials] = None private var stsCreds: Option[STSCredentials] = None diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala index 5e2ca32b4ceae..78503585261bf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala @@ -24,7 +24,7 @@ import breeze.optimize.{CachedDiffFunction, OWLQN => BreezeOWLQN} import org.apache.hadoop.fs.Path import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml.feature.Instance import org.apache.spark.ml.linalg._ @@ -59,8 +59,6 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR } /** - * :: Experimental :: - * * * Linear SVM Classifier * @@ -69,7 +67,6 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR * */ @Since("2.2.0") -@Experimental class LinearSVC @Since("2.2.0") ( @Since("2.2.0") override val uid: String) extends Classifier[Vector, LinearSVC, LinearSVCModel] @@ -290,11 +287,9 @@ object LinearSVC extends DefaultParamsReadable[LinearSVC] { } /** - * :: Experimental :: * Linear SVM Model trained by [[LinearSVC]] */ @Since("2.2.0") -@Experimental class LinearSVCModel private[classification] ( @Since("2.2.0") override val uid: String, @Since("2.2.0") val coefficients: Vector, diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 7790de064e7bd..0997c1e7b38d6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -26,7 +26,7 @@ import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS, LBFGSB => Bree import org.apache.hadoop.fs.Path import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml.feature.Instance import org.apache.spark.ml.linalg._ @@ -1349,12 +1349,10 @@ private[ml] class MultiClassSummarizer extends Serializable { } /** - * :: Experimental :: * Abstraction for logistic regression results for a given model. * * Currently, the summary ignores the instance weights. */ -@Experimental sealed trait LogisticRegressionSummary extends Serializable { /** @@ -1482,12 +1480,10 @@ sealed trait LogisticRegressionSummary extends Serializable { } /** - * :: Experimental :: * Abstraction for multiclass logistic regression training results. * Currently, the training summary ignores the training weights except * for the objective trace. */ -@Experimental sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary { /** objective function (scaled loss + regularization) at each iteration. */ @@ -1501,12 +1497,10 @@ sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary } /** - * :: Experimental :: * Abstraction for binary logistic regression results for a given model. * * Currently, the summary ignores the instance weights. */ -@Experimental sealed trait BinaryLogisticRegressionSummary extends LogisticRegressionSummary { private val sparkSession = predictions.sparkSession @@ -1590,12 +1584,10 @@ sealed trait BinaryLogisticRegressionSummary extends LogisticRegressionSummary { } /** - * :: Experimental :: * Abstraction for binary logistic regression training results. * Currently, the training summary ignores the training weights except * for the objective trace. */ -@Experimental sealed trait BinaryLogisticRegressionTrainingSummary extends BinaryLogisticRegressionSummary with LogisticRegressionTrainingSummary diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index 2247880b52414..4ad0cb55b0078 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.clustering import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.param._ @@ -301,7 +301,6 @@ object BisectingKMeans extends DefaultParamsReadable[BisectingKMeans] { /** - * :: Experimental :: * Summary of BisectingKMeans. * * @param predictions `DataFrame` produced by `BisectingKMeansModel.transform()`. @@ -313,7 +312,6 @@ object BisectingKMeans extends DefaultParamsReadable[BisectingKMeans] { * dataset. This is equivalent to sklearn's inertia. */ @Since("2.1.0") -@Experimental class BisectingKMeansSummary private[clustering] ( predictions: DataFrame, predictionCol: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala index 7da4c43a1abf3..41718920c197d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala @@ -17,11 +17,10 @@ package org.apache.spark.ml.clustering -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.sql.{DataFrame, Row} /** - * :: Experimental :: * Summary of clustering algorithms. * * @param predictions `DataFrame` produced by model.transform(). @@ -30,7 +29,6 @@ import org.apache.spark.sql.{DataFrame, Row} * @param k Number of clusters. * @param numIter Number of iterations. */ -@Experimental class ClusteringSummary private[clustering] ( @transient val predictions: DataFrame, val predictionCol: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala index 9a51d2f188460..86caa1247e77f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.clustering import breeze.linalg.{DenseVector => BDV} import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.broadcast.Broadcast import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.impl.Utils.EPSILON @@ -697,7 +697,6 @@ private class ExpectationAggregator( } /** - * :: Experimental :: * Summary of GaussianMixture. * * @param predictions `DataFrame` produced by `GaussianMixtureModel.transform()`. @@ -710,7 +709,6 @@ private class ExpectationAggregator( * @param numIter Number of iterations. */ @Since("2.0.0") -@Experimental class GaussianMixtureSummary private[clustering] ( predictions: DataFrame, predictionCol: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index b48a9665ec88c..5cc0f38c67e71 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -21,7 +21,7 @@ import scala.collection.mutable import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{Estimator, Model, PipelineStage} import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.param._ @@ -359,7 +359,6 @@ object KMeans extends DefaultParamsReadable[KMeans] { } /** - * :: Experimental :: * Summary of KMeans. * * @param predictions `DataFrame` produced by `KMeansModel.transform()`. @@ -371,7 +370,6 @@ object KMeans extends DefaultParamsReadable[KMeans] { * points in the training dataset). This is equivalent to sklearn's inertia. */ @Since("2.0.0") -@Experimental class KMeansSummary private[clustering] ( predictions: DataFrame, predictionCol: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala index 149e99d2f195a..812a426a062c1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.clustering -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.util._ @@ -95,7 +95,6 @@ private[clustering] trait PowerIterationClusteringParams extends Params with Has } /** - * :: Experimental :: * Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by * Lin and Cohen. From * the abstract: PIC finds a very low-dimensional embedding of a dataset using truncated power @@ -108,7 +107,6 @@ private[clustering] trait PowerIterationClusteringParams extends Params with Has * Spectral clustering (Wikipedia) */ @Since("2.4.0") -@Experimental class PowerIterationClustering private[clustering] ( @Since("2.4.0") override val uid: String) extends PowerIterationClusteringParams with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala index c6b04333885ae..2a7b3c579b078 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.evaluation -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ @@ -28,13 +28,11 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.DoubleType /** - * :: Experimental :: * Evaluator for binary classification, which expects two input columns: rawPrediction and label. * The rawPrediction column can be of type double (binary 0/1 prediction, or probability of label 1) * or of type vector (length-2 vector of raw predictions, scores, or label probabilities). */ @Since("1.2.0") -@Experimental class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Evaluator with HasRawPredictionCol with HasLabelCol with HasWeightCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala index 4c915e08d2536..868bd2a763f5e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala @@ -18,7 +18,7 @@ package org.apache.spark.ml.evaluation import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.broadcast.Broadcast import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.linalg.{BLAS, DenseVector, SparseVector, Vector, Vectors} @@ -30,8 +30,6 @@ import org.apache.spark.sql.functions.{avg, col, udf} import org.apache.spark.sql.types.DoubleType /** - * :: Experimental :: - * * Evaluator for clustering results. * The metric computes the Silhouette measure using the specified distance measure. * @@ -39,7 +37,6 @@ import org.apache.spark.sql.types.DoubleType * between 1 and -1, where a value close to 1 means that the points in a cluster are close to the * other points in the same cluster and far from the points of the other clusters. */ -@Experimental @Since("2.3.0") class ClusteringEvaluator @Since("2.3.0") (@Since("2.3.0") override val uid: String) extends Evaluator with HasPredictionCol with HasFeaturesCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala index e5cbe55e1d2ef..85a6138c98a46 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.evaluation -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators} import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol, HasWeightCol} import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils} @@ -27,11 +27,9 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.DoubleType /** - * :: Experimental :: * Evaluator for multiclass classification, which expects two input columns: prediction and label. */ @Since("1.5.0") -@Experimental class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Evaluator with HasPredictionCol with HasLabelCol with HasWeightCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala index 616569bb55e4c..dd667a85fa598 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.evaluation -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators} import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol, HasWeightCol} import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils} @@ -27,11 +27,9 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{DoubleType, FloatType} /** - * :: Experimental :: * Evaluator for regression, which expects two input columns: prediction and label. */ @Since("1.4.0") -@Experimental final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Evaluator with HasPredictionCol with HasLabelCol with HasWeightCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala index 0554455a66d7f..c074830ec923f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala @@ -22,7 +22,7 @@ import scala.util.Random import breeze.linalg.normalize import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.linalg._ import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared.HasSeed @@ -32,8 +32,6 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.types.StructType /** - * :: Experimental :: - * * Params for [[BucketedRandomProjectionLSH]]. */ private[ml] trait BucketedRandomProjectionLSHParams extends Params { @@ -56,8 +54,6 @@ private[ml] trait BucketedRandomProjectionLSHParams extends Params { } /** - * :: Experimental :: - * * Model produced by [[BucketedRandomProjectionLSH]], where multiple random vectors are stored. The * vectors are normalized to be unit vectors and each vector is used in a hash function: * `h_i(x) = floor(r_i.dot(x) / bucketLength)` @@ -66,7 +62,6 @@ private[ml] trait BucketedRandomProjectionLSHParams extends Params { * * @param randUnitVectors An array of random unit vectors. Each vector represents a hash function. */ -@Experimental @Since("2.1.0") class BucketedRandomProjectionLSHModel private[ml]( override val uid: String, @@ -114,8 +109,6 @@ class BucketedRandomProjectionLSHModel private[ml]( } /** - * :: Experimental :: - * * This [[BucketedRandomProjectionLSH]] implements Locality Sensitive Hashing functions for * Euclidean distance metrics. * @@ -131,7 +124,6 @@ class BucketedRandomProjectionLSHModel private[ml]( * 2. Wang, Jingdong et al. "Hashing for similarity search: A survey." arXiv preprint * arXiv:1408.2927 (2014). */ -@Experimental @Since("2.1.0") class BucketedRandomProjectionLSH(override val uid: String) extends LSH[BucketedRandomProjectionLSHModel] diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala index dc18e1d34880a..0a4f1b98ef67a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala @@ -18,7 +18,7 @@ package org.apache.spark.ml.feature import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.linalg.Vectors @@ -81,7 +81,6 @@ import org.apache.spark.util.collection.OpenHashMap * +----+-----+---------+------+------------------------------------------------------+ * }}} */ -@Experimental @Since("2.3.0") class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transformer with HasInputCols with HasOutputCol with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala index bdad804083b01..99c0a0df53672 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import org.apache.hadoop.fs.Path import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared.{HasInputCols, HasOutputCols} @@ -81,7 +81,6 @@ private[feature] trait ImputerParams extends Params with HasInputCols with HasOu } /** - * :: Experimental :: * Imputation estimator for completing missing values, either using the mean or the median * of the columns in which the missing values are located. The input columns should be of * numeric type. Currently Imputer does not support categorical features @@ -95,7 +94,6 @@ private[feature] trait ImputerParams extends Params with HasInputCols with HasOu * All Null values in the input columns are treated as missing, and so are also imputed. For * computing median, DataFrameStatFunctions.approxQuantile is used with a relative error of 0.001. */ -@Experimental @Since("2.2.0") class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String) extends Estimator[ImputerModel] with ImputerParams with DefaultParamsWritable { @@ -194,13 +192,11 @@ object Imputer extends DefaultParamsReadable[Imputer] { } /** - * :: Experimental :: * Model fitted by [[Imputer]]. * * @param surrogateDF a DataFrame containing inputCols and their corresponding surrogates, * which are used to replace the missing values in the input DataFrame. */ -@Experimental @Since("2.2.0") class ImputerModel private[ml] ( @Since("2.2.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala index 21cde66d8db6b..da0eaad667ccb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala @@ -21,7 +21,7 @@ import scala.util.Random import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT} import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.shared.HasSeed @@ -29,8 +29,6 @@ import org.apache.spark.ml.util._ import org.apache.spark.sql.types.StructType /** - * :: Experimental :: - * * Model produced by [[MinHashLSH]], where multiple hash functions are stored. Each hash function * is picked from the following family of hash functions, where a_i and b_i are randomly chosen * integers less than prime: @@ -44,7 +42,6 @@ import org.apache.spark.sql.types.StructType * * @param randCoefficients Pairs of random coefficients. Each pair is used by one hash function. */ -@Experimental @Since("2.1.0") class MinHashLSHModel private[ml]( override val uid: String, @@ -102,8 +99,6 @@ class MinHashLSHModel private[ml]( } /** - * :: Experimental :: - * * LSH class for Jaccard distance. * * The input can be dense or sparse vectors, but it is more efficient if it is sparse. For example, @@ -115,7 +110,6 @@ class MinHashLSHModel private[ml]( * References: * Wikipedia on MinHash */ -@Experimental @Since("2.1.0") class MinHashLSH(override val uid: String) extends LSH[MinHashLSHModel] with HasSeed { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index ec8f7031ad3bd..6c0d5fc70ab4e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{Estimator, Model, Pipeline, PipelineModel, PipelineStage, Transformer} import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.linalg.{Vector, VectorUDT} @@ -124,7 +124,6 @@ private[feature] trait RFormulaBase extends HasFeaturesCol with HasLabelCol with } /** - * :: Experimental :: * Implements the transforms required for fitting a dataset against an R model formula. Currently * we support a limited subset of the R operators, including '~', '.', ':', '+', '-', '*' and '^'. * Also see the R formula docs here: @@ -157,7 +156,6 @@ private[feature] trait RFormulaBase extends HasFeaturesCol with HasLabelCol with * `StringIndexer`. If the label column does not exist in the DataFrame, the output label column * will be created from the specified response variable in the formula. */ -@Experimental @Since("1.5.0") class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[RFormulaModel] with RFormulaBase with DefaultParamsWritable { @@ -329,14 +327,12 @@ object RFormula extends DefaultParamsReadable[RFormula] { } /** - * :: Experimental :: * Model fitted by [[RFormula]]. Fitting is required to determine the factor levels of * formula terms. * * @param resolvedFormula the fitted R formula. * @param pipelineModel the fitted feature model, including factor to index mappings. */ -@Experimental @Since("1.5.0") class RFormulaModel private[feature]( @Since("1.5.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala index f5947d61fe349..5d787f263a12f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala @@ -18,7 +18,7 @@ package org.apache.spark.ml.feature import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.linalg.{Vector, VectorUDT} @@ -30,14 +30,12 @@ import org.apache.spark.sql.functions.{col, udf} import org.apache.spark.sql.types.StructType /** - * :: Experimental :: * A feature transformer that adds size information to the metadata of a vector column. * VectorAssembler needs size information for its input columns and cannot be used on streaming * dataframes without this metadata. * * Note: VectorSizeHint modifies `inputCol` to include size metadata and does not have an outputCol. */ -@Experimental @Since("2.3.0") class VectorSizeHint @Since("2.3.0") (@Since("2.3.0") override val uid: String) extends Transformer with HasInputCol with HasHandleInvalid with DefaultParamsWritable { @@ -180,8 +178,6 @@ class VectorSizeHint @Since("2.3.0") (@Since("2.3.0") override val uid: String) override def copy(extra: ParamMap): this.type = defaultCopy(extra) } -/** :: Experimental :: */ -@Experimental @Since("2.3.0") object VectorSizeHint extends DefaultParamsReadable[VectorSizeHint] { diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala index 7322815c12ab8..e1c9b927a28c8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala @@ -23,7 +23,7 @@ import org.apache.hadoop.fs.Path import org.json4s.{DefaultFormats, JObject} import org.json4s.JsonDSL._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared.HasPredictionCol @@ -116,7 +116,6 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol { } /** - * :: Experimental :: * A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in * Li et al., PFP: Parallel FP-Growth for Query * Recommendation. PFP distributes computation in such a way that each worker executes an @@ -128,7 +127,6 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol { * Association rule learning (Wikipedia) */ @Since("2.2.0") -@Experimental class FPGrowth @Since("2.2.0") ( @Since("2.2.0") override val uid: String) extends Estimator[FPGrowthModel] with FPGrowthParams with DefaultParamsWritable { @@ -213,13 +211,11 @@ object FPGrowth extends DefaultParamsReadable[FPGrowth] { } /** - * :: Experimental :: * Model fitted by FPGrowth. * * @param freqItemsets frequent itemsets in the format of DataFrame("items"[Array], "freq"[Long]) */ @Since("2.2.0") -@Experimental class FPGrowthModel private[ml] ( @Since("2.2.0") override val uid: String, @Since("2.2.0") @transient val freqItemsets: DataFrame, diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala index b0006a8d4a58e..c9c049248f70c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.fpm -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.param._ import org.apache.spark.ml.util.Identifiable import org.apache.spark.ml.util.Instrumentation.instrumented @@ -27,7 +27,6 @@ import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.{ArrayType, LongType, StructField, StructType} /** - * :: Experimental :: * A parallel PrefixSpan algorithm to mine frequent sequential patterns. * The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns * Efficiently by Prefix-Projected Pattern Growth @@ -39,7 +38,6 @@ import org.apache.spark.sql.types.{ArrayType, LongType, StructField, StructType} * (Wikipedia) */ @Since("2.4.0") -@Experimental final class PrefixSpan(@Since("2.4.0") override val uid: String) extends Params { @Since("2.4.0") @@ -125,7 +123,6 @@ final class PrefixSpan(@Since("2.4.0") override val uid: String) extends Params sequenceCol -> "sequence") /** - * :: Experimental :: * Finds the complete set of frequent sequential patterns in the input sequences of itemsets. * * @param dataset A dataset or a dataframe containing a sequence column which is diff --git a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala index 03136261dd5c9..5efcf0dce68a2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala @@ -24,16 +24,13 @@ import javax.imageio.ImageIO import scala.collection.JavaConverters._ -import org.apache.spark.annotation.{Experimental, Since} -import org.apache.spark.input.PortableDataStream -import org.apache.spark.sql.{DataFrame, Row, SparkSession} +import org.apache.spark.annotation.Since +import org.apache.spark.sql.Row import org.apache.spark.sql.types._ /** - * :: Experimental :: * Defines the image schema and methods to read and manipulate images. */ -@Experimental @Since("2.3.0") object ImageSchema { diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index a65592f0e718a..cc1d18d3836c9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -24,7 +24,7 @@ import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS} import org.apache.hadoop.fs.Path import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.broadcast.Broadcast import org.apache.spark.internal.Logging import org.apache.spark.ml.{Estimator, Model} @@ -120,13 +120,11 @@ private[regression] trait AFTSurvivalRegressionParams extends Params } /** - * :: Experimental :: * Fit a parametric survival regression model named accelerated failure time (AFT) model * (see * Accelerated failure time model (Wikipedia)) * based on the Weibull distribution of the survival time. */ -@Experimental @Since("1.6.0") class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Estimator[AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams @@ -307,10 +305,8 @@ object AFTSurvivalRegression extends DefaultParamsReadable[AFTSurvivalRegression } /** - * :: Experimental :: * Model produced by [[AFTSurvivalRegression]]. */ -@Experimental @Since("1.6.0") class AFTSurvivalRegressionModel private[ml] ( @Since("1.6.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala index a8f4ed9096b9e..a226ca49e6deb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala @@ -24,7 +24,7 @@ import org.apache.commons.lang3.StringUtils import org.apache.hadoop.fs.Path import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml.PredictorParams import org.apache.spark.ml.attribute.AttributeGroup @@ -221,8 +221,6 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam } /** - * :: Experimental :: - * * Fit a Generalized Linear Model * (see * Generalized linear model (Wikipedia)) @@ -238,7 +236,6 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam * - "tweedie" : power link function specified through "linkPower". The default link power in * the tweedie family is 1 - variancePower. */ -@Experimental @Since("2.0.0") class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val uid: String) extends Regressor[Vector, GeneralizedLinearRegression, GeneralizedLinearRegressionModel] @@ -991,10 +988,8 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine } /** - * :: Experimental :: * Model produced by [[GeneralizedLinearRegression]]. */ -@Experimental @Since("2.0.0") class GeneralizedLinearRegressionModel private[ml] ( @Since("2.0.0") override val uid: String, @@ -1155,7 +1150,6 @@ object GeneralizedLinearRegressionModel extends MLReadable[GeneralizedLinearRegr } /** - * :: Experimental :: * Summary of [[GeneralizedLinearRegression]] model and predictions. * * @param dataset Dataset to be summarized. @@ -1163,7 +1157,6 @@ object GeneralizedLinearRegressionModel extends MLReadable[GeneralizedLinearRegr * model which cannot be modified from outside. */ @Since("2.0.0") -@Experimental class GeneralizedLinearRegressionSummary private[regression] ( dataset: Dataset[_], origModel: GeneralizedLinearRegressionModel) extends Serializable { @@ -1383,7 +1376,6 @@ class GeneralizedLinearRegressionSummary private[regression] ( } /** - * :: Experimental :: * Summary of [[GeneralizedLinearRegression]] fitting and model. * * @param dataset Dataset to be summarized. @@ -1394,7 +1386,6 @@ class GeneralizedLinearRegressionSummary private[regression] ( * @param solver the solver algorithm used for model training */ @Since("2.0.0") -@Experimental class GeneralizedLinearRegressionTrainingSummary private[regression] ( dataset: Dataset[_], origModel: GeneralizedLinearRegressionModel, diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 09f3f94d346b6..abf75d70ea028 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -25,7 +25,7 @@ import breeze.stats.distributions.StudentsT import org.apache.hadoop.fs.Path import org.apache.spark.SparkException -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml.{PipelineStage, PredictorParams} import org.apache.spark.ml.feature.Instance @@ -797,7 +797,6 @@ object LinearRegressionModel extends MLReadable[LinearRegressionModel] { } /** - * :: Experimental :: * Linear regression training results. Currently, the training summary ignores the * training weights except for the objective trace. * @@ -805,7 +804,6 @@ object LinearRegressionModel extends MLReadable[LinearRegressionModel] { * @param objectiveHistory objective function (scaled loss + regularization) at each iteration. */ @Since("1.5.0") -@Experimental class LinearRegressionTrainingSummary private[regression] ( predictions: DataFrame, predictionCol: String, @@ -835,7 +833,6 @@ class LinearRegressionTrainingSummary private[regression] ( } /** - * :: Experimental :: * Linear regression results evaluated on a dataset. * * @param predictions predictions output by the model's `transform` method. @@ -845,7 +842,6 @@ class LinearRegressionTrainingSummary private[regression] ( * @param featuresCol Field in "predictions" which gives the features of each instance as a vector. */ @Since("1.5.0") -@Experimental class LinearRegressionSummary private[regression] ( @transient val predictions: DataFrame, val predictionCol: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala index 5b38ca73e8014..cc0c2134834d4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.stat -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT} import org.apache.spark.ml.util.SchemaUtils import org.apache.spark.mllib.linalg.{Vectors => OldVectors} @@ -28,14 +28,11 @@ import org.apache.spark.sql.functions.col /** - * :: Experimental :: - * * Chi-square hypothesis testing for categorical data. * * See Wikipedia for more information * on the Chi-squared test. */ -@Experimental @Since("2.2.0") object ChiSquareTest { diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala index 8167ea68a7150..bab178b85d5ff 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.stat import scala.collection.JavaConverters._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.linalg.{SQLDataTypes, Vector} import org.apache.spark.mllib.linalg.{Vectors => OldVectors} import org.apache.spark.mllib.stat.{Statistics => OldStatistics} @@ -33,11 +33,9 @@ import org.apache.spark.sql.types.{StructField, StructType} * to spark.ml's Vector types. */ @Since("2.2.0") -@Experimental object Correlation { /** - * :: Experimental :: * Compute the correlation matrix for the input Dataset of Vectors using the specified method. * Methods currently supported: `pearson` (default), `spearman`. * diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala index adf8145726711..f4a6b8b033dbe 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala @@ -19,17 +19,15 @@ package org.apache.spark.ml.stat import scala.annotation.varargs -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.api.java.function.Function import org.apache.spark.ml.util.SchemaUtils import org.apache.spark.mllib.stat.{Statistics => OldStatistics} import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{DataFrame, Dataset, Row} +import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.functions.col /** - * :: Experimental :: - * * Conduct the two-sided Kolmogorov Smirnov (KS) test for data sampled from a * continuous distribution. By comparing the largest difference between the empirical cumulative * distribution of the sample data and the theoretical distribution we can provide a test for the @@ -38,7 +36,6 @@ import org.apache.spark.sql.functions.col * @see * Kolmogorov-Smirnov test (Wikipedia) */ -@Experimental @Since("2.4.0") object KolmogorovSmirnovTest { diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala index ed7d7e0852647..59123a41d16fc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.stat import java.io._ -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT} import org.apache.spark.sql.Column @@ -35,7 +35,6 @@ import org.apache.spark.sql.types._ * Users should not directly create such builders, but instead use one of the methods in * [[Summarizer]]. */ -@Experimental @Since("2.3.0") sealed abstract class SummaryBuilder { /** @@ -78,7 +77,6 @@ sealed abstract class SummaryBuilder { * Note: Currently, the performance of this interface is about 2x~3x slower than using the RDD * interface. */ -@Experimental @Since("2.3.0") object Summarizer extends Logging { diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala index 620c754a7ba0c..5f1091e438db0 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala @@ -30,7 +30,6 @@ import org.jpmml.model.filters.ImportFilter */ private[spark] object PMMLUtils { /** - * :: Experimental :: * Load a PMML model from a string. Note: for testing only, PMML model evaluation is supported * through external spark-packages. */ diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 8d2848841c109..d689d1d034bc7 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -663,8 +663,6 @@ def wholeTextFiles(self, path, minPartitions=None, use_unicode=True): def binaryFiles(self, path, minPartitions=None): """ - .. note:: Experimental - Read a directory of binary files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI as a byte array. Each file is read as a single record and returned @@ -680,8 +678,6 @@ def binaryFiles(self, path, minPartitions=None): def binaryRecords(self, path, recordLength): """ - .. note:: Experimental - Load data from a flat binary file, assuming each record is a set of numbers with the specified numerical format (see ByteBuffer), and the number of bytes per record is constant. diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py index d4470b5bf2900..82ff81c58d3c6 100644 --- a/python/pyspark/ml/base.py +++ b/python/pyspark/ml/base.py @@ -98,7 +98,6 @@ def fitMultiple(self, dataset, paramMaps): using `paramMaps[index]`. `index` values may not be sequential. .. note:: DeveloperApi - .. note:: Experimental """ estimator = self.copy() diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index a2482bae9ce7e..3ae2338aa77e6 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -69,8 +69,6 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha HasRegParam, HasTol, HasRawPredictionCol, HasFitIntercept, HasStandardization, HasWeightCol, HasAggregationDepth, HasThreshold, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - `Linear SVM Classifier `_ This binary classifier optimizes the Hinge Loss using the OWLQN optimizer. @@ -160,8 +158,6 @@ def _create_model(self, java_model): class LinearSVCModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by LinearSVC. .. versionadded:: 2.2.0 @@ -565,8 +561,6 @@ def __repr__(self): class LogisticRegressionSummary(JavaWrapper): """ - .. note:: Experimental - Abstraction for Logistic Regression Results for a given model. .. versionadded:: 2.0.0 @@ -723,8 +717,6 @@ def weightedFMeasure(self, beta=1.0): @inherit_doc class LogisticRegressionTrainingSummary(LogisticRegressionSummary): """ - .. note:: Experimental - Abstraction for multinomial Logistic Regression Training results. Currently, the training summary ignores the training weights except for the objective trace. @@ -753,8 +745,6 @@ def totalIterations(self): @inherit_doc class BinaryLogisticRegressionSummary(LogisticRegressionSummary): """ - .. note:: Experimental - Binary Logistic regression results for a given model. .. versionadded:: 2.0.0 @@ -850,8 +840,6 @@ def recallByThreshold(self): class BinaryLogisticRegressionTrainingSummary(BinaryLogisticRegressionSummary, LogisticRegressionTrainingSummary): """ - .. note:: Experimental - Binary Logistic regression training results for a given model. .. versionadded:: 2.0.0 @@ -1904,8 +1892,6 @@ def getClassifier(self): @inherit_doc class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Reduction of Multiclass Classification to Binary Classification. Performs reduction using one against all strategy. For a multiclass classification with k classes, train k models (one per class). @@ -2118,8 +2104,6 @@ def _transfer_param_map_from_java(self, javaParamMap): class OneVsRestModel(Model, OneVsRestParams, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by OneVsRest. This stores the models resulting from training k binary classifiers: one for each class. Each example is scored against all k models, and the model with the highest score diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 6c9cf7b6c829c..9b21aacacd710 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -33,8 +33,6 @@ class ClusteringSummary(JavaWrapper): """ - .. note:: Experimental - Clustering results for a given model. .. versionadded:: 2.1.0 @@ -263,8 +261,6 @@ def getK(self): class GaussianMixtureSummary(ClusteringSummary): """ - .. note:: Experimental - Gaussian mixture clustering results for a given model. .. versionadded:: 2.1.0 @@ -297,8 +293,6 @@ def logLikelihood(self): class KMeansSummary(ClusteringSummary): """ - .. note:: Experimental - Summary of KMeans. .. versionadded:: 2.1.0 @@ -671,8 +665,6 @@ def _create_model(self, java_model): class BisectingKMeansSummary(ClusteringSummary): """ - .. note:: Experimental - Bisecting KMeans clustering results for a given model. .. versionadded:: 2.1.0 @@ -1175,8 +1167,6 @@ def getKeepLastCheckpoint(self): class PowerIterationClustering(HasMaxIter, HasWeightCol, JavaParams, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by `Lin and Cohen `_. From the abstract: PIC finds a very low-dimensional embedding of a dataset using truncated power diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index f3665d514ea13..d96cdd594a3f3 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -110,8 +110,6 @@ def isLargerBetter(self): class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPredictionCol, HasWeightCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Evaluator for binary classification, which expects two input columns: rawPrediction and label. The rawPrediction column can be of type double (binary 0/1 prediction, or probability of label 1) or of type vector (length-2 vector of raw predictions, scores, or label probabilities). @@ -194,8 +192,6 @@ def setParams(self, rawPredictionCol="rawPrediction", labelCol="label", class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Evaluator for Regression, which expects input columns prediction, label and an optional weight column. @@ -278,8 +274,6 @@ def setParams(self, predictionCol="prediction", labelCol="label", class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Evaluator for Multiclass Classification, which expects two input columns: prediction and label. @@ -497,8 +491,6 @@ def setParams(self, predictionCol="prediction", labelCol="label", class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Evaluator for Clustering results, which expects two input columns: prediction and features. The metric computes the Silhouette measure using the squared Euclidean distance. diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index fe8ac6239a60b..9ab4e4d68691d 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -204,8 +204,6 @@ def approxSimilarityJoin(self, datasetA, datasetB, threshold, distCol="distCol") class BucketedRandomProjectionLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - LSH class for Euclidean distance metrics. The input is dense or sparse vectors, each of which represents a point in the Euclidean distance space. The output will be vectors of configurable dimension. Hash values in the same @@ -318,8 +316,6 @@ def _create_model(self, java_model): class BucketedRandomProjectionLSHModel(LSHModel, JavaMLReadable, JavaMLWritable): r""" - .. note:: Experimental - Model fitted by :py:class:`BucketedRandomProjectionLSH`, where multiple random vectors are stored. The vectors are normalized to be unit vectors and each vector is used in a hash function: :math:`h_i(x) = floor(r_i \cdot x / bucketLength)` where :math:`r_i` is the @@ -796,8 +792,6 @@ def getScalingVec(self): class FeatureHasher(JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Feature hashing projects a set of categorical or numerical features into a feature vector of specified dimension (typically substantially smaller than that of the original feature space). This is done using the hashing trick (https://en.wikipedia.org/wiki/Feature_hashing) @@ -1083,8 +1077,6 @@ def numDocs(self): @inherit_doc class Imputer(JavaEstimator, HasInputCols, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Imputation estimator for completing missing values, either using the mean or the median of the columns in which the missing values are located. The input columns should be of DoubleType or FloatType. Currently Imputer does not support categorical features and @@ -1222,8 +1214,6 @@ def _create_model(self, java_model): class ImputerModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`Imputer`. .. versionadded:: 2.2.0 @@ -1373,8 +1363,6 @@ class MinHashLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - LSH class for Jaccard distance. The input can be dense or sparse vectors, but it is more efficient if it is sparse. For example, `Vectors.sparse(10, [(2, 1.0), (3, 1.0), (5, 1.0)])` means there are 10 elements @@ -1452,8 +1440,6 @@ def _create_model(self, java_model): class MinHashLSHModel(LSHModel, JavaMLReadable, JavaMLWritable): r""" - .. note:: Experimental - Model produced by :py:class:`MinHashLSH`, where where multiple hash functions are stored. Each hash function is picked from the following family of hash functions, where :math:`a_i` and :math:`b_i` are randomly chosen integers less than prime: @@ -1920,8 +1906,6 @@ def getDegree(self): class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - `QuantileDiscretizer` takes a column with continuous features and outputs a column with binned categorical features. The number of bins can be set using the :py:attr:`numBuckets` parameter. It is possible that the number of buckets used will be less than this value, for example, if @@ -3591,8 +3575,6 @@ def explainedVariance(self): class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, HasHandleInvalid, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Implements the transforms required for fitting a dataset against an R model formula. Currently we support a limited subset of the R operators, including '~', '.', ':', '+', '-', '*', and '^'. @@ -3761,8 +3743,6 @@ def __str__(self): class RFormulaModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`RFormula`. Fitting is required to determine the factor levels of formula terms. @@ -3778,8 +3758,6 @@ def __str__(self): class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Chi-Squared feature selection, which selects categorical features to use for predicting a categorical label. The selector supports different selection methods: `numTopFeatures`, `percentile`, `fpr`, @@ -3979,8 +3957,6 @@ def _create_model(self, java_model): class ChiSqSelectorModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`ChiSqSelector`. .. versionadded:: 2.0.0 @@ -3999,8 +3975,6 @@ def selectedFeatures(self): class VectorSizeHint(JavaTransformer, HasInputCol, HasHandleInvalid, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - A feature transformer that adds size information to the metadata of a vector column. VectorAssembler needs size information for its input columns and cannot be used on streaming dataframes without this metadata. diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py index ed71fb0c57591..4fc19704ae3d1 100644 --- a/python/pyspark/ml/fpm.py +++ b/python/pyspark/ml/fpm.py @@ -125,8 +125,6 @@ def getItemsCol(self): class FPGrowthModel(JavaModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by FPGrowth. .. versionadded:: 2.2.0 @@ -159,8 +157,6 @@ class FPGrowth(JavaEstimator, HasItemsCol, HasPredictionCol, JavaMLWritable, JavaMLReadable): r""" - .. note:: Experimental - A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in Li et al., PFP: Parallel FP-Growth for Query Recommendation [LI2008]_. PFP distributes computation in such a way that each worker executes an @@ -249,8 +245,6 @@ def _create_model(self, java_model): class PrefixSpan(JavaParams): """ - .. note:: Experimental - A parallel PrefixSpan algorithm to mine frequent sequential patterns. The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns Efficiently by Prefix-Projected Pattern Growth @@ -313,8 +307,6 @@ def setParams(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=3200 @since("2.4.0") def findFrequentSequentialPatterns(self, dataset): """ - .. note:: Experimental - Finds the complete set of frequent sequential patterns in the input sequences of itemsets. :param dataset: A dataframe containing a sequence column which is diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index b2946a10be4ab..2d1d1272c17f8 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -224,8 +224,6 @@ def evaluate(self, dataset): class LinearRegressionSummary(JavaWrapper): """ - .. note:: Experimental - Linear regression results evaluated on a dataset. .. versionadded:: 2.0.0 @@ -431,8 +429,6 @@ def pValues(self): @inherit_doc class LinearRegressionTrainingSummary(LinearRegressionSummary): """ - .. note:: Experimental - Linear regression training results. Currently, the training summary ignores the training weights except for the objective trace. @@ -1473,8 +1469,6 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi HasFitIntercept, HasMaxIter, HasTol, HasAggregationDepth, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Accelerated Failure Time (AFT) Model Survival Regression Fit a parametric AFT survival regression model based on the Weibull distribution @@ -1615,8 +1609,6 @@ def getQuantilesCol(self): class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by :class:`AFTSurvivalRegression`. .. versionadded:: 1.6.0 @@ -1666,8 +1658,6 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol, HasSolver, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Generalized Linear Regression. Fit a Generalized Linear Model specified by giving a symbolic description of the linear @@ -1874,8 +1864,6 @@ def getOffsetCol(self): class GeneralizedLinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable, HasTrainingSummary): """ - .. note:: Experimental - Model fitted by :class:`GeneralizedLinearRegression`. .. versionadded:: 2.0.0 @@ -1929,8 +1917,6 @@ def evaluate(self, dataset): class GeneralizedLinearRegressionSummary(JavaWrapper): """ - .. note:: Experimental - Generalized linear regression results evaluated on a dataset. .. versionadded:: 2.0.0 @@ -2042,8 +2028,6 @@ def aic(self): @inherit_doc class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSummary): """ - .. note:: Experimental - Generalized linear regression training results. .. versionadded:: 2.0.0 diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py index 3f421024acdce..a84b67476609b 100644 --- a/python/pyspark/ml/stat.py +++ b/python/pyspark/ml/stat.py @@ -26,8 +26,6 @@ class ChiSquareTest(object): """ - .. note:: Experimental - Conduct Pearson's independence test for every feature against the label. For each feature, the (feature, label) pairs are converted into a contingency matrix for which the Chi-squared statistic is computed. All label and feature values must be categorical. @@ -77,8 +75,6 @@ def test(dataset, featuresCol, labelCol): class Correlation(object): """ - .. note:: Experimental - Compute the correlation matrix for the input dataset of Vectors using the specified method. Methods currently supported: `pearson` (default), `spearman`. @@ -138,8 +134,6 @@ def corr(dataset, column, method="pearson"): class KolmogorovSmirnovTest(object): """ - .. note:: Experimental - Conduct the two-sided Kolmogorov Smirnov (KS) test for data sampled from a continuous distribution. @@ -199,8 +193,6 @@ def test(dataset, sampleCol, distName, *params): class Summarizer(object): """ - .. note:: Experimental - Tools for vectorized statistics on MLlib Vectors. The methods in this package provide various statistics for Vectors contained inside DataFrames. This class lets users pick the statistics they would like to extract for a given column. @@ -355,8 +347,6 @@ def metrics(*metrics): class SummaryBuilder(JavaWrapper): """ - .. note:: Experimental - A builder object that provides summary statistics about a given column. Users should not directly create such builders, but instead use one of the methods in diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index be7b8da981317..d80d6e8aaa342 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -489,8 +489,6 @@ def _to_java(self): class TrainValidationSplit(Estimator, ValidatorParams, HasParallelism, HasCollectSubModels, MLReadable, MLWritable): """ - .. note:: Experimental - Validation for hyper-parameter tuning. Randomly splits the input dataset into train and validation sets, and uses evaluation metric on the validation set to select the best model. Similar to :class:`CrossValidator`, but only splits the set once. @@ -666,8 +664,6 @@ def _to_java(self): class TrainValidationSplitModel(Model, ValidatorParams, MLReadable, MLWritable): """ - .. note:: Experimental - Model from train validation split. .. versionadded:: 2.0.0 diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 590e8e1e9c07c..16c226f02e633 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -2370,8 +2370,6 @@ def _to_java_object_rdd(self): def countApprox(self, timeout, confidence=0.95): """ - .. note:: Experimental - Approximate version of count() that returns a potentially incomplete result within a timeout, even if not all tasks have finished. @@ -2384,8 +2382,6 @@ def countApprox(self, timeout, confidence=0.95): def sumApprox(self, timeout, confidence=0.95): """ - .. note:: Experimental - Approximate operation to return the sum within a timeout or meet the confidence. @@ -2401,8 +2397,6 @@ def sumApprox(self, timeout, confidence=0.95): def meanApprox(self, timeout, confidence=0.95): """ - .. note:: Experimental - Approximate operation to return the mean within a timeout or meet the confidence. @@ -2418,8 +2412,6 @@ def meanApprox(self, timeout, confidence=0.95): def countApproxDistinct(self, relativeSD=0.05): """ - .. note:: Experimental - Return approximate number of distinct elements in the RDD. The algorithm used is based on streamlib's implementation of diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 9e5b61f9e00b4..c7ff2882ed95a 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2917,8 +2917,6 @@ def pandas_udf(f=None, returnType=None, functionType=None): :param functionType: an enum value in :class:`pyspark.sql.functions.PandasUDFType`. Default: SCALAR. - .. note:: Experimental - The function type of the UDF can be one of the following: 1. SCALAR diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py index cc1da8e7c1f72..ec90ba905ef66 100644 --- a/python/pyspark/sql/group.py +++ b/python/pyspark/sql/group.py @@ -51,8 +51,6 @@ class GroupedData(object): A set of methods for aggregations on a :class:`DataFrame`, created by :func:`DataFrame.groupBy`. - .. note:: Experimental - .. versionadded:: 1.3 """ @@ -238,8 +236,6 @@ def apply(self, udf): into memory, so the user should be aware of the potential OOM risk if data is skewed and certain groups are too large to fit in memory. - .. note:: Experimental - :param udf: a grouped map user-defined function returned by :func:`pyspark.sql.functions.pandas_udf`. diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py index 9e02758a566a2..67c594c539d52 100644 --- a/python/pyspark/sql/window.py +++ b/python/pyspark/sql/window.py @@ -46,8 +46,6 @@ class Window(object): unboundedPreceding, unboundedFollowing) is used by default. When ordering is defined, a growing window frame (rangeFrame, unboundedPreceding, currentRow) is used by default. - .. note:: Experimental - .. versionadded:: 1.4 """ @@ -205,8 +203,6 @@ class WindowSpec(object): Use the static methods in :class:`Window` to create a :class:`WindowSpec`. - .. note:: Experimental - .. versionadded:: 1.4 """ diff --git a/python/pyspark/taskcontext.py b/python/pyspark/taskcontext.py index 790de0b9bf41a..b0c32c15793ac 100644 --- a/python/pyspark/taskcontext.py +++ b/python/pyspark/taskcontext.py @@ -24,8 +24,6 @@ class TaskContext(object): """ - .. note:: Experimental - Contextual information about a task which can be read or mutated during execution. To access the TaskContext for a running task, use: :meth:`TaskContext.get`. diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/types/SQLUserDefinedType.java b/sql/catalyst/src/main/java/org/apache/spark/sql/types/SQLUserDefinedType.java index a54398324fc66..110ed460cc8fa 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/types/SQLUserDefinedType.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/types/SQLUserDefinedType.java @@ -20,7 +20,6 @@ import java.lang.annotation.*; import org.apache.spark.annotation.DeveloperApi; -import org.apache.spark.annotation.Evolving; /** * ::DeveloperApi:: @@ -31,7 +30,6 @@ @DeveloperApi @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.TYPE) -@Evolving public @interface SQLUserDefinedType { /** diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java index 07d17ee14ce23..541818331a0bd 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java @@ -22,7 +22,6 @@ import org.apache.arrow.vector.complex.*; import org.apache.arrow.vector.holders.NullableVarCharHolder; -import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.util.ArrowUtils; import org.apache.spark.sql.types.*; import org.apache.spark.unsafe.types.UTF8String; @@ -31,7 +30,6 @@ * A column vector backed by Apache Arrow. Currently calendar interval type and map type are not * supported. */ -@Evolving public final class ArrowColumnVector extends ColumnVector { private final ArrowVectorAccessor accessor; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala index 29d81c553ff61..c43a86ad48ec9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala @@ -20,11 +20,9 @@ package org.apache.spark.sql import scala.annotation.implicitNotFound import scala.reflect.ClassTag -import org.apache.spark.annotation.{Evolving, Experimental} import org.apache.spark.sql.types._ /** - * :: Experimental :: * Used to convert a JVM object of type `T` to and from the internal Spark SQL representation. * * == Scala == @@ -65,8 +63,6 @@ import org.apache.spark.sql.types._ * * @since 1.6.0 */ -@Experimental -@Evolving @implicitNotFound("Unable to find encoder for type ${T}. An implicit Encoder[${T}] is needed to " + "store ${T} instances in a Dataset. Primitive types (Int, String, etc) and Product types (case " + "classes) are supported by importing spark.implicits._ Support for serializing other types " + diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala index 055fbc49bdcd7..5d31b5bbf12af 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala @@ -22,7 +22,6 @@ import java.lang.reflect.Modifier import scala.reflect.{classTag, ClassTag} import scala.reflect.runtime.universe.TypeTag -import org.apache.spark.annotation.{Evolving, Experimental} import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder} import org.apache.spark.sql.catalyst.expressions.{BoundReference, Cast} @@ -30,13 +29,10 @@ import org.apache.spark.sql.catalyst.expressions.objects.{DecodeUsingSerializer, import org.apache.spark.sql.types._ /** - * :: Experimental :: * Methods for creating an [[Encoder]]. * * @since 1.6.0 */ -@Experimental -@Evolving object Encoders { /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala index e79c0a4b62c4b..21d773d00a75a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala @@ -17,9 +17,6 @@ package org.apache.spark.sql.types -import org.apache.spark.annotation.Evolving - -@Evolving object ObjectType extends AbstractDataType { override private[sql] def defaultConcreteType: DataType = throw new UnsupportedOperationException( @@ -36,7 +33,6 @@ object ObjectType extends AbstractDataType { /** * Represents a JVM object that is passing through Spark SQL expression evaluation. */ -@Evolving case class ObjectType(cls: Class[_]) extends DataType { override def defaultSize: Int = 4096 diff --git a/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java b/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java index 859b936acdf70..259328d319c8e 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java +++ b/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java @@ -25,7 +25,6 @@ import org.apache.spark.sql.execution.aggregate.TypedSumLong; /** - * :: Experimental :: * Type-safe functions available for {@link org.apache.spark.sql.Dataset} operations in Java. * * Scala users should use {@link org.apache.spark.sql.expressions.scalalang.typed}. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 23360df04594b..a2f5f03d797a2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -27,7 +27,7 @@ import scala.util.control.NonFatal import org.apache.commons.lang3.StringUtils import org.apache.spark.{SparkException, TaskContext} -import org.apache.spark.annotation.{DeveloperApi, Evolving, Experimental, Stable, Unstable} +import org.apache.spark.annotation.{DeveloperApi, Stable, Unstable} import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.java.function._ import org.apache.spark.api.python.{PythonRDD, SerDeUtil} @@ -441,7 +441,6 @@ class Dataset[T] private[sql]( def toDF(): DataFrame = new Dataset[Row](sparkSession, queryExecution, RowEncoder(schema)) /** - * :: Experimental :: * Returns a new Dataset where each record has been mapped on to the specified type. The * method used to map columns depend on the type of `U`: * - When `U` is a class, fields for the class will be mapped to columns of the same name @@ -461,8 +460,6 @@ class Dataset[T] private[sql]( * @group basic * @since 1.6.0 */ - @Experimental - @Evolving def as[U : Encoder]: Dataset[U] = Dataset[U](sparkSession, logicalPlan) /** @@ -597,7 +594,6 @@ class Dataset[T] private[sql]( * @group streaming * @since 2.0.0 */ - @Evolving def isStreaming: Boolean = logicalPlan.isStreaming /** @@ -609,8 +605,6 @@ class Dataset[T] private[sql]( * @group basic * @since 2.1.0 */ - @Experimental - @Evolving def checkpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = true) /** @@ -622,8 +616,6 @@ class Dataset[T] private[sql]( * @group basic * @since 2.1.0 */ - @Experimental - @Evolving def checkpoint(eager: Boolean): Dataset[T] = checkpoint(eager = eager, reliableCheckpoint = true) /** @@ -635,8 +627,6 @@ class Dataset[T] private[sql]( * @group basic * @since 2.3.0 */ - @Experimental - @Evolving def localCheckpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = false) /** @@ -648,8 +638,6 @@ class Dataset[T] private[sql]( * @group basic * @since 2.3.0 */ - @Experimental - @Evolving def localCheckpoint(eager: Boolean): Dataset[T] = checkpoint( eager = eager, reliableCheckpoint = false @@ -725,7 +713,6 @@ class Dataset[T] private[sql]( * @group streaming * @since 2.1.0 */ - @Evolving // We only accept an existing column name, not a derived column here as a watermark that is // defined on a derived column cannot referenced elsewhere in the plan. def withWatermark(eventTime: String, delayThreshold: String): Dataset[T] = withTypedPlan { @@ -1107,7 +1094,6 @@ class Dataset[T] private[sql]( } /** - * :: Experimental :: * Joins this Dataset returning a `Tuple2` for each pair where `condition` evaluates to * true. * @@ -1128,8 +1114,6 @@ class Dataset[T] private[sql]( * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def joinWith[U](other: Dataset[U], condition: Column, joinType: String): Dataset[(T, U)] = { // Creates a Join node and resolve it first, to get join condition resolved, self-join resolved, // etc. @@ -1203,7 +1187,6 @@ class Dataset[T] private[sql]( } /** - * :: Experimental :: * Using inner equi-join to join this Dataset returning a `Tuple2` for each pair * where `condition` evaluates to true. * @@ -1213,8 +1196,6 @@ class Dataset[T] private[sql]( * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)] = { joinWith(other, condition, "inner") } @@ -1462,7 +1443,6 @@ class Dataset[T] private[sql]( } /** - * :: Experimental :: * Returns a new Dataset by computing the given [[Column]] expression for each element. * * {{{ @@ -1473,8 +1453,6 @@ class Dataset[T] private[sql]( * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def select[U1](c1: TypedColumn[T, U1]): Dataset[U1] = { implicit val encoder = c1.encoder val project = Project(c1.withInputType(exprEnc, logicalPlan.output).named :: Nil, logicalPlan) @@ -1501,26 +1479,20 @@ class Dataset[T] private[sql]( } /** - * :: Experimental :: * Returns a new Dataset by computing the given [[Column]] expressions for each element. * * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def select[U1, U2](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2]): Dataset[(U1, U2)] = selectUntyped(c1, c2).asInstanceOf[Dataset[(U1, U2)]] /** - * :: Experimental :: * Returns a new Dataset by computing the given [[Column]] expressions for each element. * * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def select[U1, U2, U3]( c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], @@ -1528,14 +1500,11 @@ class Dataset[T] private[sql]( selectUntyped(c1, c2, c3).asInstanceOf[Dataset[(U1, U2, U3)]] /** - * :: Experimental :: * Returns a new Dataset by computing the given [[Column]] expressions for each element. * * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def select[U1, U2, U3, U4]( c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], @@ -1544,14 +1513,11 @@ class Dataset[T] private[sql]( selectUntyped(c1, c2, c3, c4).asInstanceOf[Dataset[(U1, U2, U3, U4)]] /** - * :: Experimental :: * Returns a new Dataset by computing the given [[Column]] expressions for each element. * * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def select[U1, U2, U3, U4, U5]( c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], @@ -1713,7 +1679,6 @@ class Dataset[T] private[sql]( } /** - * :: Experimental :: * (Scala-specific) * Reduces the elements of this Dataset using the specified binary function. The given `func` * must be commutative and associative or the result may be non-deterministic. @@ -1721,14 +1686,11 @@ class Dataset[T] private[sql]( * @group action * @since 1.6.0 */ - @Experimental - @Evolving def reduce(func: (T, T) => T): T = withNewRDDExecutionId { rdd.reduce(func) } /** - * :: Experimental :: * (Java-specific) * Reduces the elements of this Dataset using the specified binary function. The given `func` * must be commutative and associative or the result may be non-deterministic. @@ -1736,20 +1698,15 @@ class Dataset[T] private[sql]( * @group action * @since 1.6.0 */ - @Experimental - @Evolving def reduce(func: ReduceFunction[T]): T = reduce(func.call(_, _)) /** - * :: Experimental :: * (Scala-specific) * Returns a [[KeyValueGroupedDataset]] where the data is grouped by the given key `func`. * * @group typedrel * @since 2.0.0 */ - @Experimental - @Evolving def groupByKey[K: Encoder](func: T => K): KeyValueGroupedDataset[K, T] = { val withGroupingKey = AppendColumns(func, logicalPlan) val executed = sparkSession.sessionState.executePlan(withGroupingKey) @@ -1763,15 +1720,12 @@ class Dataset[T] private[sql]( } /** - * :: Experimental :: * (Java-specific) * Returns a [[KeyValueGroupedDataset]] where the data is grouped by the given key `func`. * * @group typedrel * @since 2.0.0 */ - @Experimental - @Evolving def groupByKey[K](func: MapFunction[T, K], encoder: Encoder[K]): KeyValueGroupedDataset[K, T] = groupByKey(func.call(_))(encoder) @@ -2584,72 +2538,57 @@ class Dataset[T] private[sql]( def transform[U](t: Dataset[T] => Dataset[U]): Dataset[U] = t(this) /** - * :: Experimental :: * (Scala-specific) * Returns a new Dataset that only contains elements where `func` returns `true`. * * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def filter(func: T => Boolean): Dataset[T] = { withTypedPlan(TypedFilter(func, logicalPlan)) } /** - * :: Experimental :: * (Java-specific) * Returns a new Dataset that only contains elements where `func` returns `true`. * * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def filter(func: FilterFunction[T]): Dataset[T] = { withTypedPlan(TypedFilter(func, logicalPlan)) } /** - * :: Experimental :: * (Scala-specific) * Returns a new Dataset that contains the result of applying `func` to each element. * * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def map[U : Encoder](func: T => U): Dataset[U] = withTypedPlan { MapElements[T, U](func, logicalPlan) } /** - * :: Experimental :: * (Java-specific) * Returns a new Dataset that contains the result of applying `func` to each element. * * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def map[U](func: MapFunction[T, U], encoder: Encoder[U]): Dataset[U] = { implicit val uEnc = encoder withTypedPlan(MapElements[T, U](func, logicalPlan)) } /** - * :: Experimental :: * (Scala-specific) * Returns a new Dataset that contains the result of applying `func` to each partition. * * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def mapPartitions[U : Encoder](func: Iterator[T] => Iterator[U]): Dataset[U] = { new Dataset[U]( sparkSession, @@ -2658,15 +2597,12 @@ class Dataset[T] private[sql]( } /** - * :: Experimental :: * (Java-specific) * Returns a new Dataset that contains the result of applying `f` to each partition. * * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def mapPartitions[U](f: MapPartitionsFunction[T, U], encoder: Encoder[U]): Dataset[U] = { val func: (Iterator[T]) => Iterator[U] = x => f.call(x.asJava).asScala mapPartitions(func)(encoder) @@ -2705,7 +2641,6 @@ class Dataset[T] private[sql]( } /** - * :: Experimental :: * (Scala-specific) * Returns a new Dataset by first applying a function to all elements of this Dataset, * and then flattening the results. @@ -2713,13 +2648,10 @@ class Dataset[T] private[sql]( * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def flatMap[U : Encoder](func: T => TraversableOnce[U]): Dataset[U] = mapPartitions(_.flatMap(func)) /** - * :: Experimental :: * (Java-specific) * Returns a new Dataset by first applying a function to all elements of this Dataset, * and then flattening the results. @@ -2727,8 +2659,6 @@ class Dataset[T] private[sql]( * @group typedrel * @since 1.6.0 */ - @Experimental - @Evolving def flatMap[U](f: FlatMapFunction[T, U], encoder: Encoder[U]): Dataset[U] = { val func: (T) => Iterator[U] = x => f.call(x).asScala flatMap(func)(encoder) @@ -3224,7 +3154,6 @@ class Dataset[T] private[sql]( * @group basic * @since 2.0.0 */ - @Evolving def writeStream: DataStreamWriter[T] = { if (!isStreaming) { logicalPlan.failAnalysis( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala index a0b0a34a01073..27012c471462d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql -import org.apache.spark.annotation.Evolving - /** * The abstract class for writing custom logic to process data generated by a query. * This is often used to write the output of a streaming query to arbitrary storage systems. @@ -101,7 +99,6 @@ import org.apache.spark.annotation.Evolving * * @since 2.0.0 */ -@Evolving abstract class ForeachWriter[T] extends Serializable { // TODO: Move this to org.apache.spark.sql.util or consolidate this with batch API. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala index 0da52d432d25d..89cc9735e4f6a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql import scala.collection.JavaConverters._ -import org.apache.spark.annotation.{Evolving, Experimental} import org.apache.spark.api.java.function._ import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CreateStruct} @@ -30,15 +29,12 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout, OutputMode} /** - * :: Experimental :: * A [[Dataset]] has been logically grouped by a user specified grouping key. Users should not * construct a [[KeyValueGroupedDataset]] directly, but should instead call `groupByKey` on * an existing [[Dataset]]. * * @since 2.0.0 */ -@Experimental -@Evolving class KeyValueGroupedDataset[K, V] private[sql]( kEncoder: Encoder[K], vEncoder: Encoder[V], @@ -221,7 +217,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( } /** - * ::Experimental:: * (Scala-specific) * Applies the given function to each group of data, while maintaining a user-defined per-group * state. The result Dataset will represent the objects returned by the function. @@ -237,8 +232,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( * See [[Encoder]] for more details on what types are encodable to Spark SQL. * @since 2.2.0 */ - @Experimental - @Evolving def mapGroupsWithState[S: Encoder, U: Encoder]( func: (K, Iterator[V], GroupState[S]) => U): Dataset[U] = { val flatMapFunc = (key: K, it: Iterator[V], s: GroupState[S]) => Iterator(func(key, it, s)) @@ -255,7 +248,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( } /** - * ::Experimental:: * (Scala-specific) * Applies the given function to each group of data, while maintaining a user-defined per-group * state. The result Dataset will represent the objects returned by the function. @@ -272,8 +264,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( * See [[Encoder]] for more details on what types are encodable to Spark SQL. * @since 2.2.0 */ - @Experimental - @Evolving def mapGroupsWithState[S: Encoder, U: Encoder]( timeoutConf: GroupStateTimeout)( func: (K, Iterator[V], GroupState[S]) => U): Dataset[U] = { @@ -291,7 +281,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( } /** - * ::Experimental:: * (Java-specific) * Applies the given function to each group of data, while maintaining a user-defined per-group * state. The result Dataset will represent the objects returned by the function. @@ -309,8 +298,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( * See [[Encoder]] for more details on what types are encodable to Spark SQL. * @since 2.2.0 */ - @Experimental - @Evolving def mapGroupsWithState[S, U]( func: MapGroupsWithStateFunction[K, V, S, U], stateEncoder: Encoder[S], @@ -321,7 +308,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( } /** - * ::Experimental:: * (Java-specific) * Applies the given function to each group of data, while maintaining a user-defined per-group * state. The result Dataset will represent the objects returned by the function. @@ -340,8 +326,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( * See [[Encoder]] for more details on what types are encodable to Spark SQL. * @since 2.2.0 */ - @Experimental - @Evolving def mapGroupsWithState[S, U]( func: MapGroupsWithStateFunction[K, V, S, U], stateEncoder: Encoder[S], @@ -353,7 +337,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( } /** - * ::Experimental:: * (Scala-specific) * Applies the given function to each group of data, while maintaining a user-defined per-group * state. The result Dataset will represent the objects returned by the function. @@ -371,8 +354,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( * See [[Encoder]] for more details on what types are encodable to Spark SQL. * @since 2.2.0 */ - @Experimental - @Evolving def flatMapGroupsWithState[S: Encoder, U: Encoder]( outputMode: OutputMode, timeoutConf: GroupStateTimeout)( @@ -393,7 +374,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( } /** - * ::Experimental:: * (Java-specific) * Applies the given function to each group of data, while maintaining a user-defined per-group * state. The result Dataset will represent the objects returned by the function. @@ -413,8 +393,6 @@ class KeyValueGroupedDataset[K, V] private[sql]( * See [[Encoder]] for more details on what types are encodable to Spark SQL. * @since 2.2.0 */ - @Experimental - @Evolving def flatMapGroupsWithState[S, U]( func: FlatMapGroupsWithStateFunction[K, V, S, U], outputMode: OutputMode, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 08b7521de9573..45d0bd4122535 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -23,7 +23,7 @@ import scala.collection.immutable import scala.reflect.runtime.universe.TypeTag import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.annotation._ +import org.apache.spark.annotation.{DeveloperApi, Experimental, Stable, Unstable} import org.apache.spark.api.java.JavaRDD import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigEntry @@ -85,8 +85,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s * that listen for execution metrics. */ - @Experimental - @Evolving def listenerManager: ExecutionListenerManager = sparkSession.listenerManager /** @@ -231,7 +229,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) // scalastyle:off // Disable style checker so "implicits" object can start with lowercase i /** - * :: Experimental :: * (Scala-specific) Implicit methods available in Scala for converting * common Scala objects into `DataFrame`s. * @@ -243,35 +240,27 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @group basic * @since 1.3.0 */ - @Experimental - @Evolving object implicits extends SQLImplicits with Serializable { protected override def _sqlContext: SQLContext = self } // scalastyle:on /** - * :: Experimental :: * Creates a DataFrame from an RDD of Product (e.g. case classes, tuples). * * @group dataframes * @since 1.3.0 */ - @Experimental - @Evolving def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = { sparkSession.createDataFrame(rdd) } /** - * :: Experimental :: * Creates a DataFrame from a local Seq of Product. * * @group dataframes * @since 1.3.0 */ - @Experimental - @Evolving def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = { sparkSession.createDataFrame(data) } @@ -319,13 +308,11 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @since 1.3.0 */ @DeveloperApi - @Evolving def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = { sparkSession.createDataFrame(rowRDD, schema) } /** - * :: Experimental :: * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) * that is generally created automatically through implicits from a `SparkSession`, or can be @@ -353,14 +340,11 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @since 2.0.0 * @group dataset */ - @Experimental - @Evolving def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = { sparkSession.createDataset(data) } /** - * :: Experimental :: * Creates a [[Dataset]] from an RDD of a given type. This method requires an * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) * that is generally created automatically through implicits from a `SparkSession`, or can be @@ -369,13 +353,11 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @since 2.0.0 * @group dataset */ - @Experimental def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = { sparkSession.createDataset(data) } /** - * :: Experimental :: * Creates a [[Dataset]] from a `java.util.List` of a given type. This method requires an * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) * that is generally created automatically through implicits from a `SparkSession`, or can be @@ -391,8 +373,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @since 2.0.0 * @group dataset */ - @Experimental - @Evolving def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = { sparkSession.createDataset(data) } @@ -419,7 +399,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @since 1.3.0 */ @DeveloperApi - @Evolving def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { sparkSession.createDataFrame(rowRDD, schema) } @@ -434,7 +413,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @since 1.6.0 */ @DeveloperApi - @Evolving def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = { sparkSession.createDataFrame(rows, schema) } @@ -498,7 +476,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) * * @since 2.0.0 */ - @Evolving def readStream: DataStreamReader = sparkSession.readStream @@ -614,45 +591,35 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * :: Experimental :: * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements * in a range from 0 to `end` (exclusive) with step value 1. * * @since 1.4.1 * @group dataframe */ - @Experimental - @Evolving def range(end: Long): DataFrame = sparkSession.range(end).toDF() /** - * :: Experimental :: * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements * in a range from `start` to `end` (exclusive) with step value 1. * * @since 1.4.0 * @group dataframe */ - @Experimental - @Evolving def range(start: Long, end: Long): DataFrame = sparkSession.range(start, end).toDF() /** - * :: Experimental :: * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements * in a range from `start` to `end` (exclusive) with a step value. * * @since 2.0.0 * @group dataframe */ - @Experimental - @Evolving def range(start: Long, end: Long, step: Long): DataFrame = { sparkSession.range(start, end, step).toDF() } /** - * :: Experimental :: * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements * in an range from `start` to `end` (exclusive) with an step value, with partition number * specified. @@ -660,8 +627,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) * @since 1.4.0 * @group dataframe */ - @Experimental - @Evolving def range(start: Long, end: Long, step: Long, numPartitions: Int): DataFrame = { sparkSession.range(start, end, step, numPartitions).toDF() } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala index c997b7d8e0bfb..71cbc3ab14d97 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala @@ -21,7 +21,6 @@ import scala.collection.Map import scala.language.implicitConversions import scala.reflect.runtime.universe.TypeTag -import org.apache.spark.annotation.Evolving import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder @@ -30,7 +29,6 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder * * @since 1.6.0 */ -@Evolving abstract class SQLImplicits extends LowPrioritySQLImplicits { protected def _sqlContext: SQLContext diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index e0d0062e976c1..bd2bc1c0ad5d7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -22,18 +22,16 @@ import java.util.concurrent.TimeUnit._ import java.util.concurrent.atomic.AtomicReference import scala.collection.JavaConverters._ -import scala.collection.mutable import scala.reflect.runtime.universe.TypeTag import scala.util.control.NonFatal import org.apache.spark.{SPARK_VERSION, SparkConf, SparkContext, TaskContext} -import org.apache.spark.annotation.{DeveloperApi, Evolving, Experimental, Stable, Unstable} +import org.apache.spark.annotation.{DeveloperApi, Experimental, Stable, Unstable} import org.apache.spark.api.java.JavaRDD import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd} import org.apache.spark.sql.catalog.Catalog -import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Catalogs} import org.apache.spark.sql.catalyst._ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.encoders._ @@ -182,14 +180,11 @@ class SparkSession private( @transient lazy val conf: RuntimeConfig = new RuntimeConfig(sessionState.conf) /** - * :: Experimental :: * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s * that listen for execution metrics. * * @since 2.0.0 */ - @Experimental - @Evolving def listenerManager: ExecutionListenerManager = sessionState.listenerManager /** @@ -227,13 +222,11 @@ class SparkSession private( def udf: UDFRegistration = sessionState.udfRegistration /** - * :: Experimental :: * Returns a `StreamingQueryManager` that allows managing all the * `StreamingQuery`s active on `this`. * * @since 2.0.0 */ - @Experimental @Unstable def streams: StreamingQueryManager = sessionState.streamingQueryManager @@ -286,26 +279,20 @@ class SparkSession private( } /** - * :: Experimental :: * Creates a new [[Dataset]] of type T containing zero elements. * * @return 2.0.0 */ - @Experimental - @Evolving def emptyDataset[T: Encoder]: Dataset[T] = { val encoder = implicitly[Encoder[T]] new Dataset(self, LocalRelation(encoder.schema.toAttributes), encoder) } /** - * :: Experimental :: * Creates a `DataFrame` from an RDD of Product (e.g. case classes, tuples). * * @since 2.0.0 */ - @Experimental - @Evolving def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = { SparkSession.setActiveSession(this) val encoder = Encoders.product[A] @@ -313,13 +300,10 @@ class SparkSession private( } /** - * :: Experimental :: * Creates a `DataFrame` from a local Seq of Product. * * @since 2.0.0 */ - @Experimental - @Evolving def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = { SparkSession.setActiveSession(this) val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType] @@ -359,7 +343,6 @@ class SparkSession private( * @since 2.0.0 */ @DeveloperApi - @Evolving def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = { // TODO: use MutableProjection when rowRDD is another DataFrame and the applied // schema differs from the existing schema on any field data type. @@ -377,7 +360,6 @@ class SparkSession private( * @since 2.0.0 */ @DeveloperApi - @Evolving def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { createDataFrame(rowRDD.rdd, schema) } @@ -391,7 +373,6 @@ class SparkSession private( * @since 2.0.0 */ @DeveloperApi - @Evolving def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = { Dataset.ofRows(self, LocalRelation.fromExternalRows(schema.toAttributes, rows.asScala)) } @@ -453,7 +434,6 @@ class SparkSession private( * ------------------------------- */ /** - * :: Experimental :: * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) * that is generally created automatically through implicits from a `SparkSession`, or can be @@ -480,8 +460,6 @@ class SparkSession private( * * @since 2.0.0 */ - @Experimental - @Evolving def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = { val enc = encoderFor[T] val attributes = enc.schema.toAttributes @@ -491,7 +469,6 @@ class SparkSession private( } /** - * :: Experimental :: * Creates a [[Dataset]] from an RDD of a given type. This method requires an * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) * that is generally created automatically through implicits from a `SparkSession`, or can be @@ -499,14 +476,11 @@ class SparkSession private( * * @since 2.0.0 */ - @Experimental - @Evolving def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = { Dataset[T](self, ExternalRDD(data, self)) } /** - * :: Experimental :: * Creates a [[Dataset]] from a `java.util.List` of a given type. This method requires an * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) * that is generally created automatically through implicits from a `SparkSession`, or can be @@ -521,59 +495,45 @@ class SparkSession private( * * @since 2.0.0 */ - @Experimental - @Evolving def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = { createDataset(data.asScala) } /** - * :: Experimental :: * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements * in a range from 0 to `end` (exclusive) with step value 1. * * @since 2.0.0 */ - @Experimental - @Evolving def range(end: Long): Dataset[java.lang.Long] = range(0, end) /** - * :: Experimental :: * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements * in a range from `start` to `end` (exclusive) with step value 1. * * @since 2.0.0 */ - @Experimental - @Evolving def range(start: Long, end: Long): Dataset[java.lang.Long] = { range(start, end, step = 1, numPartitions = sparkContext.defaultParallelism) } /** - * :: Experimental :: * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements * in a range from `start` to `end` (exclusive) with a step value. * * @since 2.0.0 */ - @Experimental - @Evolving def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = { range(start, end, step, numPartitions = sparkContext.defaultParallelism) } /** - * :: Experimental :: * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements * in a range from `start` to `end` (exclusive) with a step value, with partition number * specified. * * @since 2.0.0 */ - @Experimental - @Evolving def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[java.lang.Long] = { new Dataset(self, Range(start, end, step, numPartitions), Encoders.LONG) } @@ -668,7 +628,6 @@ class SparkSession private( * * @since 2.0.0 */ - @Evolving def readStream: DataStreamReader = new DataStreamReader(self) /** @@ -690,7 +649,6 @@ class SparkSession private( // scalastyle:off // Disable style checker so "implicits" object can start with lowercase i /** - * :: Experimental :: * (Scala-specific) Implicit methods available in Scala for converting * common Scala objects into `DataFrame`s. * @@ -701,8 +659,6 @@ class SparkSession private( * * @since 2.0.0 */ - @Experimental - @Evolving object implicits extends SQLImplicits with Serializable { protected override def _sqlContext: SQLContext = SparkSession.this.sqlContext } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala index 44668610d8052..60738e6d4ef9e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala @@ -223,7 +223,6 @@ abstract class Catalog { } /** - * :: Experimental :: * Creates a table from the given path and returns the corresponding DataFrame. * It will use the default data source configured by spark.sql.sources.default. * @@ -232,8 +231,6 @@ abstract class Catalog { * the current database. * @since 2.2.0 */ - @Experimental - @Evolving def createTable(tableName: String, path: String): DataFrame /** @@ -251,7 +248,6 @@ abstract class Catalog { } /** - * :: Experimental :: * Creates a table from the given path based on a data source and returns the corresponding * DataFrame. * @@ -260,8 +256,6 @@ abstract class Catalog { * the current database. * @since 2.2.0 */ - @Experimental - @Evolving def createTable(tableName: String, path: String, source: String): DataFrame /** @@ -282,7 +276,6 @@ abstract class Catalog { } /** - * :: Experimental :: * Creates a table based on the dataset in a data source and a set of options. * Then, returns the corresponding DataFrame. * @@ -291,8 +284,6 @@ abstract class Catalog { * the current database. * @since 2.2.0 */ - @Experimental - @Evolving def createTable( tableName: String, source: String, @@ -319,7 +310,6 @@ abstract class Catalog { } /** - * :: Experimental :: * (Scala-specific) * Creates a table based on the dataset in a data source and a set of options. * Then, returns the corresponding DataFrame. @@ -329,15 +319,12 @@ abstract class Catalog { * the current database. * @since 2.2.0 */ - @Experimental - @Evolving def createTable( tableName: String, source: String, options: Map[String, String]): DataFrame /** - * :: Experimental :: * Create a table from the given path based on a data source, a schema and a set of options. * Then, returns the corresponding DataFrame. * @@ -356,7 +343,6 @@ abstract class Catalog { } /** - * :: Experimental :: * Create a table based on the dataset in a data source, a schema and a set of options. * Then, returns the corresponding DataFrame. * @@ -365,8 +351,6 @@ abstract class Catalog { * the current database. * @since 2.2.0 */ - @Experimental - @Evolving def createTable( tableName: String, source: String, @@ -395,7 +379,6 @@ abstract class Catalog { } /** - * :: Experimental :: * (Scala-specific) * Create a table based on the dataset in a data source, a schema and a set of options. * Then, returns the corresponding DataFrame. @@ -405,8 +388,6 @@ abstract class Catalog { * the current database. * @since 2.2.0 */ - @Experimental - @Evolving def createTable( tableName: String, source: String, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala index 6b4def35e1955..878dc0d83f45a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala @@ -17,14 +17,12 @@ package org.apache.spark.sql.expressions -import org.apache.spark.annotation.{Evolving, Experimental} import org.apache.spark.sql.{Encoder, TypedColumn} import org.apache.spark.sql.catalyst.encoders.encoderFor import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete} import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression /** - * :: Experimental :: * A base class for user-defined aggregations, which can be used in `Dataset` operations to take * all of the elements of a group and reduce them to a single value. * @@ -50,8 +48,6 @@ import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression * @tparam OUT The type of the final output result. * @since 1.6.0 */ -@Experimental -@Evolving abstract class Aggregator[-IN, BUF, OUT] extends Serializable { /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala index da7ed69e48390..f7591e4d265e0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala @@ -21,7 +21,6 @@ import org.apache.spark.sql._ import org.apache.spark.sql.execution.aggregate._ /** - * :: Experimental :: * Type-safe functions available for `Dataset` operations in Scala. * * Java users should use [[org.apache.spark.sql.expressions.javalang.typed]]. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 2369c341762cf..831c19bbe12c4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -17,9 +17,8 @@ package org.apache.spark.sql.internal import org.apache.spark.SparkConf -import org.apache.spark.annotation.{Experimental, Unstable} +import org.apache.spark.annotation.Unstable import org.apache.spark.sql.{ExperimentalMethods, SparkSession, UDFRegistration, _} -import org.apache.spark.sql.catalog.v2.CatalogPlugin import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry} import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.optimizer.Optimizer @@ -52,7 +51,6 @@ import org.apache.spark.sql.util.ExecutionListenerManager * state will clone the parent sessions state's `conf`, `functionRegistry`, `experimentalMethods` * and `catalog` fields. Note that the state is cloned when `build` is called, and not before. */ -@Experimental @Unstable abstract class BaseSessionStateBuilder( val session: SparkSession, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index baa8ebee66108..3740b56cb9cbb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.internal import scala.reflect.runtime.universe.TypeTag import scala.util.control.NonFatal -import org.apache.spark.annotation.Experimental import org.apache.spark.sql._ import org.apache.spark.sql.catalog.{Catalog, Column, Database, Function, Table} import org.apache.spark.sql.catalyst.{DefinedByConstructorParams, FunctionIdentifier, TableIdentifier} @@ -277,34 +276,29 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { } /** - * :: Experimental :: * Creates a table from the given path and returns the corresponding DataFrame. * It will use the default data source configured by spark.sql.sources.default. * * @group ddl_ops * @since 2.2.0 */ - @Experimental override def createTable(tableName: String, path: String): DataFrame = { val dataSourceName = sparkSession.sessionState.conf.defaultDataSourceName createTable(tableName, path, dataSourceName) } /** - * :: Experimental :: * Creates a table from the given path and returns the corresponding * DataFrame. * * @group ddl_ops * @since 2.2.0 */ - @Experimental override def createTable(tableName: String, path: String, source: String): DataFrame = { createTable(tableName, source, Map("path" -> path)) } /** - * :: Experimental :: * (Scala-specific) * Creates a table based on the dataset in a data source and a set of options. * Then, returns the corresponding DataFrame. @@ -312,7 +306,6 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { * @group ddl_ops * @since 2.2.0 */ - @Experimental override def createTable( tableName: String, source: String, @@ -321,7 +314,6 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { } /** - * :: Experimental :: * (Scala-specific) * Creates a table based on the dataset in a data source, a schema and a set of options. * Then, returns the corresponding DataFrame. @@ -329,7 +321,6 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { * @group ddl_ops * @since 2.2.0 */ - @Experimental override def createTable( tableName: String, source: String, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala index f1cde4fbf090b..a83a0f51ecf11 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala @@ -22,8 +22,7 @@ import java.io.File import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path -import org.apache.spark.SparkContext -import org.apache.spark.annotation.{Experimental, Unstable} +import org.apache.spark.annotation.Unstable import org.apache.spark.sql._ import org.apache.spark.sql.catalog.v2.CatalogManager import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry} @@ -127,7 +126,6 @@ private[sql] object SessionState { /** * Concrete implementation of a [[BaseSessionStateBuilder]]. */ -@Experimental @Unstable class SessionStateBuilder( session: SparkSession, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 230b43022b02b..a0c6d20f36451 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -21,7 +21,7 @@ import java.sql.{Connection, Date, Timestamp} import org.apache.commons.lang3.StringUtils -import org.apache.spark.annotation.{DeveloperApi, Evolving, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.sql.types._ /** @@ -33,7 +33,6 @@ import org.apache.spark.sql.types._ * send a null value to the database. */ @DeveloperApi -@Evolving case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int) /** @@ -56,7 +55,6 @@ case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int) * for the given Catalyst type. */ @DeveloperApi -@Evolving abstract class JdbcDialect extends Serializable { /** * Check if this dialect instance can handle a certain jdbc url. @@ -196,7 +194,6 @@ abstract class JdbcDialect extends Serializable { * sure to register your dialects first. */ @DeveloperApi -@Evolving object JdbcDialects { /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index 6ad054c9f6403..63e57c6804e16 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -17,10 +17,9 @@ package org.apache.spark.sql.sources -import org.apache.spark.annotation._ +import org.apache.spark.annotation.{Stable, Unstable} import org.apache.spark.rdd.RDD import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.streaming.{Sink, Source} import org.apache.spark.sql.streaming.OutputMode @@ -116,7 +115,6 @@ trait SchemaRelationProvider { * * @since 2.0.0 */ -@Experimental @Unstable trait StreamSourceProvider { @@ -147,7 +145,6 @@ trait StreamSourceProvider { * * @since 2.0.0 */ -@Experimental @Unstable trait StreamSinkProvider { def createSink( @@ -308,7 +305,6 @@ trait InsertableRelation { * * @since 1.3.0 */ -@Experimental @Unstable trait CatalystScan { def buildScan(requiredColumns: Seq[Attribute], filters: Seq[Expression]): RDD[Row] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala index 2da8469a0041c..f1fe472afdc2a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.util import scala.collection.JavaConverters._ -import org.apache.spark.annotation.{DeveloperApi, Evolving, Experimental} +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.internal.Logging import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent} import org.apache.spark.sql.SparkSession @@ -29,14 +29,11 @@ import org.apache.spark.sql.internal.StaticSQLConf._ import org.apache.spark.util.{ListenerBus, Utils} /** - * :: Experimental :: * The interface of query execution listener that can be used to analyze execution metrics. * * @note Implementations should guarantee thread-safety as they can be invoked by * multiple different threads. */ -@Experimental -@Evolving trait QueryExecutionListener { /** @@ -68,12 +65,8 @@ trait QueryExecutionListener { /** - * :: Experimental :: - * * Manager for [[QueryExecutionListener]]. See `org.apache.spark.sql.SQLContext.listenerManager`. */ -@Experimental -@Evolving // The `session` is used to indicate which session carries this listener manager, and we only // catch SQL executions which are launched by the same session. // The `loadExtensions` flag is used to indicate whether we should load the pre-defined, diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index d68395812a74c..a143c6f77d55d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -17,9 +17,8 @@ package org.apache.spark.sql.hive -import org.apache.spark.annotation.{Experimental, Unstable} +import org.apache.spark.annotation.Unstable import org.apache.spark.sql._ -import org.apache.spark.sql.catalog.v2.CatalogPlugin import org.apache.spark.sql.catalyst.analysis.Analyzer import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -34,7 +33,6 @@ import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLo /** * Builder that produces a Hive-aware `SessionState`. */ -@Experimental @Unstable class HiveSessionStateBuilder(session: SparkSession, parentState: Option[SessionState] = None) extends BaseSessionStateBuilder(session, parentState) { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala index 48913eaa4a8bf..589dd877c8c97 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala @@ -34,7 +34,7 @@ import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} import org.apache.hadoop.mapreduce.lib.input.TextInputFormat import org.apache.spark._ -import org.apache.spark.annotation.{DeveloperApi, Experimental} +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.input.FixedLengthBinaryInputFormat import org.apache.spark.internal.Logging @@ -772,11 +772,8 @@ object StreamingContext extends Logging { } /** - * :: Experimental :: - * * Get the currently active context, if there is one. Active means started but not stopped. */ - @Experimental def getActive(): Option[StreamingContext] = { ACTIVATION_LOCK.synchronized { Option(activeContext.get()) @@ -784,13 +781,10 @@ object StreamingContext extends Logging { } /** - * :: Experimental :: - * * Either return the "active" StreamingContext (that is, started but not stopped), or create a * new StreamingContext that is * @param creatingFunc Function to create a new StreamingContext */ - @Experimental def getActiveOrCreate(creatingFunc: () => StreamingContext): StreamingContext = { ACTIVATION_LOCK.synchronized { getActive().getOrElse { creatingFunc() } @@ -798,8 +792,6 @@ object StreamingContext extends Logging { } /** - * :: Experimental :: - * * Either get the currently active StreamingContext (that is, started but not stopped), * OR recreate a StreamingContext from checkpoint data in the given path. If checkpoint data * does not exist in the provided, then create a new StreamingContext by calling the provided @@ -813,7 +805,6 @@ object StreamingContext extends Logging { * error in reading checkpoint data. By default, an exception will be * thrown on error. */ - @Experimental def getActiveOrCreate( checkpointPath: String, creatingFunc: () => StreamingContext, diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaMapWithStateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaMapWithStateDStream.scala index 16c0d6fff8229..bf169ba6ed85d 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaMapWithStateDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaMapWithStateDStream.scala @@ -17,12 +17,10 @@ package org.apache.spark.streaming.api.java -import org.apache.spark.annotation.Experimental import org.apache.spark.api.java.JavaSparkContext import org.apache.spark.streaming.dstream.MapWithStateDStream /** - * :: Experimental :: * DStream representing the stream of data generated by `mapWithState` operation on a * [[JavaPairDStream]]. Additionally, it also gives access to the * stream of state snapshots, that is, the state data of all keys after a batch has updated them. @@ -32,7 +30,6 @@ import org.apache.spark.streaming.dstream.MapWithStateDStream * @tparam StateType Class of the state data * @tparam MappedType Class of the mapped data */ -@Experimental class JavaMapWithStateDStream[KeyType, ValueType, StateType, MappedType] private[streaming]( dstream: MapWithStateDStream[KeyType, ValueType, StateType, MappedType]) extends JavaDStream[MappedType](dstream)(JavaSparkContext.fakeClassTag) { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala index c3c13df651ccd..3f88fe0817c57 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala @@ -30,7 +30,6 @@ import org.apache.hadoop.mapred.{JobConf, OutputFormat} import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat} import org.apache.spark.Partitioner -import org.apache.spark.annotation.Experimental import org.apache.spark.api.java.{JavaPairRDD, JavaSparkContext, JavaUtils, Optional} import org.apache.spark.api.java.JavaPairRDD._ import org.apache.spark.api.java.JavaSparkContext.fakeClassTag @@ -431,7 +430,6 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( } /** - * :: Experimental :: * Return a [[JavaMapWithStateDStream]] by applying a function to every key-value element of * `this` stream, while maintaining some state data for each unique key. The mapping function * and other specification (e.g. partitioners, timeouts, initial state data, etc.) of this @@ -458,7 +456,6 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * @tparam StateType Class type of the state data * @tparam MappedType Class type of the mapped data */ - @Experimental def mapWithState[StateType, MappedType](spec: StateSpec[K, V, StateType, MappedType]): JavaMapWithStateDStream[K, V, StateType, MappedType] = { new JavaMapWithStateDStream(dstream.mapWithState(spec)( diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala index 9512db7d7d757..3368382a55297 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala @@ -20,7 +20,6 @@ package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark._ -import org.apache.spark.annotation.Experimental import org.apache.spark.rdd.{EmptyRDD, RDD} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ @@ -28,7 +27,6 @@ import org.apache.spark.streaming.dstream.InternalMapWithStateDStream._ import org.apache.spark.streaming.rdd.{MapWithStateRDD, MapWithStateRDDRecord} /** - * :: Experimental :: * DStream representing the stream of data generated by `mapWithState` operation on a * [[org.apache.spark.streaming.dstream.PairDStreamFunctions pair DStream]]. * Additionally, it also gives access to the stream of state snapshots, that is, the state data of @@ -39,7 +37,6 @@ import org.apache.spark.streaming.rdd.{MapWithStateRDD, MapWithStateRDDRecord} * @tparam StateType Class of the state data * @tparam MappedType Class of the mapped data */ -@Experimental sealed abstract class MapWithStateDStream[KeyType, ValueType, StateType, MappedType: ClassTag]( ssc: StreamingContext) extends DStream[MappedType](ssc) { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala index dcb51d72fa588..a5bed752bd663 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala @@ -25,7 +25,6 @@ import org.apache.hadoop.mapred.{JobConf, OutputFormat} import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat} import org.apache.spark.{HashPartitioner, Partitioner} -import org.apache.spark.annotation.Experimental import org.apache.spark.rdd.RDD import org.apache.spark.streaming._ import org.apache.spark.streaming.StreamingContext.rddToFileName @@ -352,7 +351,6 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)]) } /** - * :: Experimental :: * Return a [[MapWithStateDStream]] by applying a function to every key-value element of * `this` stream, while maintaining some state data for each unique key. The mapping function * and other specification (e.g. partitioners, timeouts, initial state data, etc.) of this @@ -376,7 +374,6 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)]) * @tparam StateType Class type of the state data * @tparam MappedType Class type of the mapped data */ - @Experimental def mapWithState[StateType: ClassTag, MappedType: ClassTag]( spec: StateSpec[K, V, StateType, MappedType] ): MapWithStateDStream[K, V, StateType, MappedType] = {