From 830d812ee924a8d08a801898bd6752eb3add1467 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Tue, 7 Mar 2017 10:53:51 -0800 Subject: [PATCH 1/3] made keyword_only decorator thread-safe by saving _input_kwargs to self --- python/pyspark/__init__.py | 10 ++++++---- python/pyspark/ml/pipeline.py | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index ec1687415a7f6..87d2082aed52b 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -89,13 +89,15 @@ def keyword_only(func): """ A decorator that forces keyword arguments in the wrapped method and saves actual input keyword arguments in `_input_kwargs`. + + .. note:: Should only be used to wrap a method where first arg is `self` """ @wraps(func) - def wrapper(*args, **kwargs): - if len(args) > 1: + def wrapper(self, *args, **kwargs): + if len(args) > 0: raise TypeError("Method %s forces keyword arguments." % func.__name__) - wrapper._input_kwargs = kwargs - return func(*args, **kwargs) + self._input_kwargs = kwargs + return func(self, **kwargs) return wrapper diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py index a48f4bb2ad1ba..a86362c3d8e69 100644 --- a/python/pyspark/ml/pipeline.py +++ b/python/pyspark/ml/pipeline.py @@ -60,7 +60,7 @@ def __init__(self, stages=None): if stages is None: stages = [] super(Pipeline, self).__init__() - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @since("1.3.0") @@ -90,7 +90,7 @@ def setParams(self, stages=None): """ if stages is None: stages = [] - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _fit(self, dataset): From 2e5a9e141b711a94f329a93b8f62de4559f678a4 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Tue, 7 Mar 2017 11:32:45 -0800 Subject: [PATCH 2/3] updated _input_kwargs usage in pyspark-ml --- python/pyspark/ml/classification.py | 28 +++---- python/pyspark/ml/clustering.py | 16 ++-- python/pyspark/ml/evaluation.py | 12 +-- python/pyspark/ml/feature.py | 112 ++++++++++++++-------------- python/pyspark/ml/recommendation.py | 4 +- python/pyspark/ml/regression.py | 28 +++---- python/pyspark/ml/tests.py | 8 +- python/pyspark/ml/tuning.py | 8 +- 8 files changed, 108 insertions(+), 108 deletions(-) diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 3c4af90acac85..bfeda7c2c6cb9 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -116,7 +116,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.LogisticRegression", self.uid) self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) self._checkThresholdConsistency() @@ -134,7 +134,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre Sets params for logistic regression. If the threshold and thresholds Params are both set, they must be equivalent. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs self._set(**kwargs) self._checkThresholdConsistency() return self @@ -569,7 +569,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -587,7 +587,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre seed=None) Sets params for the DecisionTreeClassifier. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -697,7 +697,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", numTrees=20, featureSubsetStrategy="auto") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -715,7 +715,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre impurity="gini", numTrees=20, featureSubsetStrategy="auto") Sets params for linear classification. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -836,7 +836,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic", maxIter=20, stepSize=0.1) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -852,7 +852,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre lossType="logistic", maxIter=20, stepSize=0.1, seed=None) Sets params for Gradient Boosted Tree Classification. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -978,7 +978,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.NaiveBayes", self.uid) self._setDefault(smoothing=1.0, modelType="multinomial") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -992,7 +992,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre modelType="multinomial", thresholds=None) Sets params for Naive Bayes. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -1135,7 +1135,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred self._java_obj = self._new_java_obj( "org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid) self._setDefault(maxIter=100, tol=1E-4, blockSize=128, stepSize=0.03, solver="l-bfgs") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1149,7 +1149,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre solver="l-bfgs", initialWeights=None) Sets params for MultilayerPerceptronClassifier. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -1321,7 +1321,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred classifier=None) """ super(OneVsRest, self).__init__() - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self._set(**kwargs) @keyword_only @@ -1331,7 +1331,7 @@ def setParams(self, featuresCol=None, labelCol=None, predictionCol=None, classif setParams(self, featuresCol=None, labelCol=None, predictionCol=None, classifier=None): Sets params for OneVsRest. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _fit(self, dataset): diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 4dab83362a0a4..90afd056cf3b9 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -147,7 +147,7 @@ def __init__(self, featuresCol="features", predictionCol="prediction", k=2, self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.GaussianMixture", self.uid) self._setDefault(k=2, tol=0.01, maxIter=100) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) def _create_model(self, java_model): @@ -163,7 +163,7 @@ def setParams(self, featuresCol="features", predictionCol="prediction", k=2, Sets params for GaussianMixture. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.0.0") @@ -262,7 +262,7 @@ def __init__(self, featuresCol="features", predictionCol="prediction", k=2, super(KMeans, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.KMeans", self.uid) self._setDefault(k=2, initMode="k-means||", initSteps=5, tol=1e-4, maxIter=20) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) def _create_model(self, java_model): @@ -278,7 +278,7 @@ def setParams(self, featuresCol="features", predictionCol="prediction", k=2, Sets params for KMeans. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.5.0") @@ -413,7 +413,7 @@ def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=2 self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.BisectingKMeans", self.uid) self._setDefault(maxIter=20, k=4, minDivisibleClusterSize=1.0) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -425,7 +425,7 @@ def setParams(self, featuresCol="features", predictionCol="prediction", maxIter= seed=None, k=4, minDivisibleClusterSize=1.0) Sets params for BisectingKMeans. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.0.0") @@ -735,7 +735,7 @@ def __init__(self, featuresCol="features", maxIter=20, seed=None, checkpointInte k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51, subsamplingRate=0.05, optimizeDocConcentration=True, topicDistributionCol="topicDistribution", keepLastCheckpoint=True) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) def _create_model(self, java_model): @@ -760,7 +760,7 @@ def setParams(self, featuresCol="features", maxIter=20, seed=None, checkpointInt Sets params for LDA. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.0.0") diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index 1fe8772da772a..db95576c5c36e 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -141,7 +141,7 @@ def __init__(self, rawPredictionCol="rawPrediction", labelCol="label", "org.apache.spark.ml.evaluation.BinaryClassificationEvaluator", self.uid) self._setDefault(rawPredictionCol="rawPrediction", labelCol="label", metricName="areaUnderROC") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self._set(**kwargs) @since("1.4.0") @@ -167,7 +167,7 @@ def setParams(self, rawPredictionCol="rawPrediction", labelCol="label", metricName="areaUnderROC") Sets params for binary classification evaluator. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @@ -213,7 +213,7 @@ def __init__(self, predictionCol="prediction", labelCol="label", "org.apache.spark.ml.evaluation.RegressionEvaluator", self.uid) self._setDefault(predictionCol="prediction", labelCol="label", metricName="rmse") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self._set(**kwargs) @since("1.4.0") @@ -239,7 +239,7 @@ def setParams(self, predictionCol="prediction", labelCol="label", metricName="rmse") Sets params for regression evaluator. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @@ -280,7 +280,7 @@ def __init__(self, predictionCol="prediction", labelCol="label", "org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator", self.uid) self._setDefault(predictionCol="prediction", labelCol="label", metricName="f1") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self._set(**kwargs) @since("1.5.0") @@ -306,7 +306,7 @@ def setParams(self, predictionCol="prediction", labelCol="label", metricName="f1") Sets params for multiclass classification evaluator. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) if __name__ == "__main__": diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 2881380152c8d..80c26b18199c8 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -92,7 +92,7 @@ def __init__(self, threshold=0.0, inputCol=None, outputCol=None): super(Binarizer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Binarizer", self.uid) self._setDefault(threshold=0.0) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -102,7 +102,7 @@ def setParams(self, threshold=0.0, inputCol=None, outputCol=None): setParams(self, threshold=0.0, inputCol=None, outputCol=None) Sets params for this Binarizer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -165,7 +165,7 @@ def __init__(self, splits=None, inputCol=None, outputCol=None): """ super(Bucketizer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Bucketizer", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -175,7 +175,7 @@ def setParams(self, splits=None, inputCol=None, outputCol=None): setParams(self, splits=None, inputCol=None, outputCol=None) Sets params for this Bucketizer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -265,7 +265,7 @@ def __init__(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, binary=False, inputC self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.CountVectorizer", self.uid) self._setDefault(minTF=1.0, minDF=1.0, vocabSize=1 << 18, binary=False) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -277,7 +277,7 @@ def setParams(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, binary=False, input outputCol=None) Set the params for the CountVectorizer """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -397,7 +397,7 @@ def __init__(self, inverse=False, inputCol=None, outputCol=None): super(DCT, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.DCT", self.uid) self._setDefault(inverse=False) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -407,7 +407,7 @@ def setParams(self, inverse=False, inputCol=None, outputCol=None): setParams(self, inverse=False, inputCol=None, outputCol=None) Sets params for this DCT. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -461,7 +461,7 @@ def __init__(self, scalingVec=None, inputCol=None, outputCol=None): super(ElementwiseProduct, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.ElementwiseProduct", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -471,7 +471,7 @@ def setParams(self, scalingVec=None, inputCol=None, outputCol=None): setParams(self, scalingVec=None, inputCol=None, outputCol=None) Sets params for this ElementwiseProduct. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.0.0") @@ -531,7 +531,7 @@ def __init__(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=N super(HashingTF, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.HashingTF", self.uid) self._setDefault(numFeatures=1 << 18, binary=False) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -541,7 +541,7 @@ def setParams(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol= setParams(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None) Sets params for this HashingTF. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.0.0") @@ -604,7 +604,7 @@ def __init__(self, minDocFreq=0, inputCol=None, outputCol=None): super(IDF, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IDF", self.uid) self._setDefault(minDocFreq=0) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -614,7 +614,7 @@ def setParams(self, minDocFreq=0, inputCol=None, outputCol=None): setParams(self, minDocFreq=0, inputCol=None, outputCol=None) Sets params for this IDF. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -696,7 +696,7 @@ def __init__(self, inputCol=None, outputCol=None): super(MaxAbsScaler, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MaxAbsScaler", self.uid) self._setDefault() - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -706,7 +706,7 @@ def setParams(self, inputCol=None, outputCol=None): setParams(self, inputCol=None, outputCol=None) Sets params for this MaxAbsScaler. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -792,7 +792,7 @@ def __init__(self, min=0.0, max=1.0, inputCol=None, outputCol=None): super(MinMaxScaler, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MinMaxScaler", self.uid) self._setDefault(min=0.0, max=1.0) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -802,7 +802,7 @@ def setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None): setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None) Sets params for this MinMaxScaler. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -910,7 +910,7 @@ def __init__(self, n=2, inputCol=None, outputCol=None): super(NGram, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.NGram", self.uid) self._setDefault(n=2) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -920,7 +920,7 @@ def setParams(self, n=2, inputCol=None, outputCol=None): setParams(self, n=2, inputCol=None, outputCol=None) Sets params for this NGram. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.5.0") @@ -974,7 +974,7 @@ def __init__(self, p=2.0, inputCol=None, outputCol=None): super(Normalizer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Normalizer", self.uid) self._setDefault(p=2.0) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -984,7 +984,7 @@ def setParams(self, p=2.0, inputCol=None, outputCol=None): setParams(self, p=2.0, inputCol=None, outputCol=None) Sets params for this Normalizer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -1054,7 +1054,7 @@ def __init__(self, dropLast=True, inputCol=None, outputCol=None): super(OneHotEncoder, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.OneHotEncoder", self.uid) self._setDefault(dropLast=True) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1064,7 +1064,7 @@ def setParams(self, dropLast=True, inputCol=None, outputCol=None): setParams(self, dropLast=True, inputCol=None, outputCol=None) Sets params for this OneHotEncoder. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -1120,7 +1120,7 @@ def __init__(self, degree=2, inputCol=None, outputCol=None): self._java_obj = self._new_java_obj( "org.apache.spark.ml.feature.PolynomialExpansion", self.uid) self._setDefault(degree=2) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1130,7 +1130,7 @@ def setParams(self, degree=2, inputCol=None, outputCol=None): setParams(self, degree=2, inputCol=None, outputCol=None) Sets params for this PolynomialExpansion. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -1203,7 +1203,7 @@ def __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0. self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.QuantileDiscretizer", self.uid) self._setDefault(numBuckets=2, relativeError=0.001) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1213,7 +1213,7 @@ def setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0 setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001) Set the params for the QuantileDiscretizer """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.0.0") @@ -1311,7 +1311,7 @@ def __init__(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, super(RegexTokenizer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RegexTokenizer", self.uid) self._setDefault(minTokenLength=1, gaps=True, pattern="\\s+", toLowercase=True) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1323,7 +1323,7 @@ def setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None, toLowercase=True) Sets params for this RegexTokenizer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -1414,7 +1414,7 @@ def __init__(self, statement=None): """ super(SQLTransformer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.SQLTransformer", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1424,7 +1424,7 @@ def setParams(self, statement=None): setParams(self, statement=None) Sets params for this SQLTransformer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -1493,7 +1493,7 @@ def __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None): super(StandardScaler, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StandardScaler", self.uid) self._setDefault(withMean=False, withStd=True) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1503,7 +1503,7 @@ def setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None) setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None) Sets params for this StandardScaler. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -1609,7 +1609,7 @@ def __init__(self, inputCol=None, outputCol=None, handleInvalid="error"): super(StringIndexer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StringIndexer", self.uid) self._setDefault(handleInvalid="error") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1619,7 +1619,7 @@ def setParams(self, inputCol=None, outputCol=None, handleInvalid="error"): setParams(self, inputCol=None, outputCol=None, handleInvalid="error") Sets params for this StringIndexer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -1667,7 +1667,7 @@ def __init__(self, inputCol=None, outputCol=None, labels=None): super(IndexToString, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IndexToString", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1677,7 +1677,7 @@ def setParams(self, inputCol=None, outputCol=None, labels=None): setParams(self, inputCol=None, outputCol=None, labels=None) Sets params for this IndexToString. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -1730,7 +1730,7 @@ def __init__(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive= self.uid) self._setDefault(stopWords=StopWordsRemover.loadDefaultStopWords("english"), caseSensitive=False) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1740,7 +1740,7 @@ def setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false) Sets params for this StopWordRemover. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -1823,7 +1823,7 @@ def __init__(self, inputCol=None, outputCol=None): """ super(Tokenizer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Tokenizer", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1833,7 +1833,7 @@ def setParams(self, inputCol=None, outputCol=None): setParams(self, inputCol=None, outputCol=None) Sets params for this Tokenizer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @@ -1867,7 +1867,7 @@ def __init__(self, inputCols=None, outputCol=None): """ super(VectorAssembler, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorAssembler", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1877,7 +1877,7 @@ def setParams(self, inputCols=None, outputCol=None): setParams(self, inputCols=None, outputCol=None) Sets params for this VectorAssembler. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @@ -1965,7 +1965,7 @@ def __init__(self, maxCategories=20, inputCol=None, outputCol=None): super(VectorIndexer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorIndexer", self.uid) self._setDefault(maxCategories=20) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1975,7 +1975,7 @@ def setParams(self, maxCategories=20, inputCol=None, outputCol=None): setParams(self, maxCategories=20, inputCol=None, outputCol=None) Sets params for this VectorIndexer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -2080,7 +2080,7 @@ def __init__(self, inputCol=None, outputCol=None, indices=None, names=None): super(VectorSlicer, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorSlicer", self.uid) self._setDefault(indices=[], names=[]) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2090,7 +2090,7 @@ def setParams(self, inputCol=None, outputCol=None, indices=None, names=None): setParams(self, inputCol=None, outputCol=None, indices=None, names=None): Sets params for this VectorSlicer. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.6.0") @@ -2203,7 +2203,7 @@ def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid) self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, windowSize=5, maxSentenceLength=1000) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2215,7 +2215,7 @@ def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000) Sets params for this Word2Vec. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -2363,7 +2363,7 @@ def __init__(self, k=None, inputCol=None, outputCol=None): """ super(PCA, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.PCA", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2373,7 +2373,7 @@ def setParams(self, k=None, inputCol=None, outputCol=None): setParams(self, k=None, inputCol=None, outputCol=None) Set params for this PCA. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.5.0") @@ -2496,7 +2496,7 @@ def __init__(self, formula=None, featuresCol="features", labelCol="label"): """ super(RFormula, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RFormula", self.uid) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2506,7 +2506,7 @@ def setParams(self, formula=None, featuresCol="features", labelCol="label"): setParams(self, formula=None, featuresCol="features", labelCol="label") Sets params for RFormula. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.5.0") @@ -2595,7 +2595,7 @@ def __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None, la super(ChiSqSelector, self).__init__() self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.ChiSqSelector", self.uid) self._setDefault(numTopFeatures=50) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -2607,7 +2607,7 @@ def setParams(self, numTopFeatures=50, featuresCol="features", outputCol=None, labelCol="labels") Sets params for this ChiSqSelector. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.0.0") diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py index e28d38bd19f80..ee9916f472712 100644 --- a/python/pyspark/ml/recommendation.py +++ b/python/pyspark/ml/recommendation.py @@ -146,7 +146,7 @@ def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemB ratingCol="rating", nonnegative=False, checkpointInterval=10, intermediateStorageLevel="MEMORY_AND_DISK", finalStorageLevel="MEMORY_AND_DISK") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -164,7 +164,7 @@ def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItem finalStorageLevel="MEMORY_AND_DISK") Sets params for ALS. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index d88dc75353598..f2e70f1127c5e 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -105,7 +105,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred self._java_obj = self._new_java_obj( "org.apache.spark.ml.regression.LinearRegression", self.uid) self._setDefault(maxIter=100, regParam=0.0, tol=1e-6) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -119,7 +119,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre standardization=True, solver="auto", weightCol=None) Sets params for linear regression. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -457,7 +457,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred self._java_obj = self._new_java_obj( "org.apache.spark.ml.regression.IsotonicRegression", self.uid) self._setDefault(isotonic=True, featureIndex=0) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -468,7 +468,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre weightCol=None, isotonic=True, featureIndex=0): Set the params for IsotonicRegression. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -695,7 +695,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -711,7 +711,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre impurity="variance", seed=None, varianceCol=None) Sets params for the DecisionTreeRegressor. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -884,7 +884,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance", subsamplingRate=1.0, numTrees=20, featureSubsetStrategy="auto") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -902,7 +902,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre featureSubsetStrategy="auto") Sets params for linear regression. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -1008,7 +1008,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, impurity="variance") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1026,7 +1026,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre impurity="variance") Sets params for Gradient Boosted Tree Regression. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -1156,7 +1156,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred self._setDefault(censorCol="censor", quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], maxIter=100, tol=1E-6) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1171,7 +1171,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \ quantilesCol=None): """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): @@ -1349,7 +1349,7 @@ def __init__(self, labelCol="label", featuresCol="features", predictionCol="pred self._java_obj = self._new_java_obj( "org.apache.spark.ml.regression.GeneralizedLinearRegression", self.uid) self._setDefault(family="gaussian", maxIter=25, tol=1e-6, regParam=0.0, solver="irls") - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -1363,7 +1363,7 @@ def setParams(self, labelCol="label", featuresCol="features", predictionCol="pre regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None) Sets params for generalized linear regression. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) def _create_model(self, java_model): diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index ae95f177b1f5d..3c346b9d5cb9e 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -239,7 +239,7 @@ class TestParams(HasMaxIter, HasInputCol, HasSeed): def __init__(self, seed=None): super(TestParams, self).__init__() self._setDefault(maxIter=10) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -248,7 +248,7 @@ def setParams(self, seed=None): setParams(self, seed=None) Sets params for this test. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @@ -260,7 +260,7 @@ class OtherTestParams(HasMaxIter, HasInputCol, HasSeed): def __init__(self, seed=None): super(OtherTestParams, self).__init__() self._setDefault(maxIter=10) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self.setParams(**kwargs) @keyword_only @@ -269,7 +269,7 @@ def setParams(self, seed=None): setParams(self, seed=None) Sets params for this test. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index e17d13d4d2bd2..f8be48cb3ab70 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -180,7 +180,7 @@ def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numF """ super(CrossValidator, self).__init__() self._setDefault(numFolds=3) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self._set(**kwargs) @keyword_only @@ -192,7 +192,7 @@ def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, num seed=None): Sets params for cross validator. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("1.4.0") @@ -337,7 +337,7 @@ def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, trai """ super(TrainValidationSplit, self).__init__() self._setDefault(trainRatio=0.75) - kwargs = self.__init__._input_kwargs + kwargs = self._input_kwargs self._set(**kwargs) @since("2.0.0") @@ -349,7 +349,7 @@ def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, tra seed=None): Sets params for the train validation split. """ - kwargs = self.setParams._input_kwargs + kwargs = self._input_kwargs return self._set(**kwargs) @since("2.0.0") From 2f3838caf088bae1264107e5db24f928109ab7dd Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Tue, 7 Mar 2017 10:55:22 -0800 Subject: [PATCH 3/3] added keyword_only tests, including a regression test --- python/pyspark/tests.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 53de08b60082a..0b00683e9bc82 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -58,6 +58,7 @@ from StringIO import StringIO +from pyspark import keyword_only from pyspark.conf import SparkConf from pyspark.context import SparkContext from pyspark.rdd import RDD @@ -2020,6 +2021,44 @@ def test_memory_conf(self): sc.stop() +class KeywordOnlyTests(unittest.TestCase): + class Wrapped(object): + @keyword_only + def set(self, x=None, y=None): + if "x" in self._input_kwargs: + self._x = self._input_kwargs["x"] + if "y" in self._input_kwargs: + self._y = self._input_kwargs["y"] + return x, y + + def test_keywords(self): + w = self.Wrapped() + x, y = w.set(y=1) + self.assertEqual(y, 1) + self.assertEqual(y, w._y) + self.assertIsNone(x) + self.assertFalse(hasattr(w, "_x")) + + def test_non_keywords(self): + w = self.Wrapped() + self.assertRaises(TypeError, lambda: w.set(0, y=1)) + + def test_kwarg_ownership(self): + # test _input_kwargs is owned by each class instance and not a shared static variable + class Setter(object): + @keyword_only + def set(self, x=None, other=None, other_x=None): + if "other" in self._input_kwargs: + self._input_kwargs["other"].set(x=self._input_kwargs["other_x"]) + self._x = self._input_kwargs["x"] + + a = Setter() + b = Setter() + a.set(x=1, other=b, other_x=2) + self.assertEqual(a._x, 1) + self.assertEqual(b._x, 2) + + @unittest.skipIf(not _have_scipy, "SciPy not installed") class SciPyTests(PySparkTestCase):