From cde592a29c21f752cb977934ee56809fdbb5f8b8 Mon Sep 17 00:00:00 2001 From: noelsmith Date: Sun, 6 Sep 2015 22:18:25 +0100 Subject: [PATCH 1/4] Added @since to mllib.feature --- python/pyspark/__init__.py | 20 ++++++++++++ python/pyspark/mllib/feature.py | 58 ++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index 8475dfb1c6ad0..76ee3b1895e9f 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -67,6 +67,26 @@ def deco(f): # for back compatibility from pyspark.sql import SQLContext, HiveContext, SchemaRDD, Row + +def since(version): + """ + A decorator that annotates a function to append the version of Spark the function was added. + """ + import re + indent_p = re.compile(r'\n( +)') + + def deco(f): + if f.__doc__ is None: + f.__doc__ = ".. versionadded:: {}".format(version) + return f + else: + indents = indent_p.findall(f.__doc__) + indent = ' ' * (min(len(m) for m in indents) if indents else 0) + f.__doc__ = "{}\n\n{}.. versionadded:: {}".format(f.__doc__.rstrip(), indent, version) + return f + return deco + + __all__ = [ "SparkConf", "SparkContext", "SparkFiles", "RDD", "StorageLevel", "Broadcast", "Accumulator", "AccumulatorParam", "MarshalSerializer", "PickleSerializer", diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index f921e3ad1a314..7b077b058c3fd 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -30,7 +30,7 @@ from py4j.protocol import Py4JJavaError -from pyspark import SparkContext +from pyspark import SparkContext, since from pyspark.rdd import RDD, ignore_unicode_prefix from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper from pyspark.mllib.linalg import ( @@ -84,11 +84,14 @@ class Normalizer(VectorTransformer): >>> nor2 = Normalizer(float("inf")) >>> nor2.transform(v) DenseVector([0.0, 0.5, 1.0]) + + .. versionadded:: 1.2.0 """ def __init__(self, p=2.0): assert p >= 1.0, "p should be greater than 1.0" self.p = float(p) + @since('1.2.0') def transform(self, vector): """ Applies unit length normalization on a vector. @@ -133,7 +136,11 @@ class StandardScalerModel(JavaVectorTransformer): .. note:: Experimental Represents a StandardScaler model that can transform vectors. + + .. versionadded:: 1.2.0 """ + + @since('1.2.0') def transform(self, vector): """ Applies standardization transformation on a vector. @@ -149,6 +156,7 @@ def transform(self, vector): """ return JavaVectorTransformer.transform(self, vector) + @since('1.4.0') def setWithMean(self, withMean): """ Setter of the boolean which decides @@ -157,6 +165,7 @@ def setWithMean(self, withMean): self.call("setWithMean", withMean) return self + @since('1.4.0') def setWithStd(self, withStd): """ Setter of the boolean which decides @@ -189,6 +198,8 @@ class StandardScaler(object): >>> for r in result.collect(): r DenseVector([-0.7071, 0.7071, -0.7071]) DenseVector([0.7071, -0.7071, 0.7071]) + + .. versionadded:: 1.2.0 """ def __init__(self, withMean=False, withStd=True): if not (withMean or withStd): @@ -196,6 +207,7 @@ def __init__(self, withMean=False, withStd=True): self.withMean = withMean self.withStd = withStd + @since('1.2.0') def fit(self, dataset): """ Computes the mean and variance and stores as a model to be used @@ -215,7 +227,11 @@ class ChiSqSelectorModel(JavaVectorTransformer): .. note:: Experimental Represents a Chi Squared selector model. + + .. versionadded:: 1.4.0 """ + + @since('1.4.0') def transform(self, vector): """ Applies transformation on a vector. @@ -245,10 +261,13 @@ class ChiSqSelector(object): SparseVector(1, {0: 6.0}) >>> model.transform(DenseVector([8.0, 9.0, 5.0])) DenseVector([5.0]) + + .. versionadded:: 1.4.0 """ def __init__(self, numTopFeatures): self.numTopFeatures = int(numTopFeatures) + @since('1.4.0') def fit(self, data): """ Returns a ChiSquared feature selector. @@ -265,6 +284,8 @@ def fit(self, data): class PCAModel(JavaVectorTransformer): """ Model fitted by [[PCA]] that can project vectors to a low-dimensional space using PCA. + + .. versionadded:: 1.5.0 """ @@ -281,6 +302,8 @@ class PCA(object): 1.648... >>> pcArray[1] -4.013... + + .. versionadded:: 1.5.0 """ def __init__(self, k): """ @@ -288,6 +311,7 @@ def __init__(self, k): """ self.k = int(k) + @since('1.5.0') def fit(self, data): """ Computes a [[PCAModel]] that contains the principal components of the input vectors. @@ -312,14 +336,18 @@ class HashingTF(object): >>> doc = "a a b b c d".split(" ") >>> htf.transform(doc) SparseVector(100, {...}) + + .. versionadded:: 1.2.0 """ def __init__(self, numFeatures=1 << 20): self.numFeatures = numFeatures + @since('1.2.0') def indexOf(self, term): """ Returns the index of the input term. """ return hash(term) % self.numFeatures + @since('1.2.0') def transform(self, document): """ Transforms the input document (list of terms) to term frequency @@ -339,7 +367,10 @@ def transform(self, document): class IDFModel(JavaVectorTransformer): """ Represents an IDF model that can transform term frequency vectors. + + .. versionadded:: 1.2.0 """ + @since('1.2.0') def transform(self, x): """ Transforms term frequency (TF) vectors to TF-IDF vectors. @@ -358,6 +389,7 @@ def transform(self, x): """ return JavaVectorTransformer.transform(self, x) + @since('1.4.0') def idf(self): """ Returns the current IDF vector. @@ -401,10 +433,13 @@ class IDF(object): DenseVector([0.0, 0.0, 1.3863, 0.863]) >>> model.transform(Vectors.sparse(n, (1, 3), (1.0, 2.0))) SparseVector(4, {1: 0.0, 3: 0.5754}) + + .. versionadded:: 1.2.0 """ def __init__(self, minDocFreq=0): self.minDocFreq = minDocFreq + @since('1.2.0') def fit(self, dataset): """ Computes the inverse document frequency. @@ -420,7 +455,10 @@ def fit(self, dataset): class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader): """ class for Word2Vec model + + .. versionadded:: 1.2.0 """ + @since('1.2.0') def transform(self, word): """ Transforms a word to its vector representation @@ -435,6 +473,7 @@ def transform(self, word): except Py4JJavaError: raise ValueError("%s not found" % word) + @since('1.2.0') def findSynonyms(self, word, num): """ Find synonyms of a word @@ -450,6 +489,7 @@ def findSynonyms(self, word, num): words, similarity = self.call("findSynonyms", word, num) return zip(words, similarity) + @since('1.4.0') def getVectors(self): """ Returns a map of words to their vector representations. @@ -457,7 +497,11 @@ def getVectors(self): return self.call("getVectors") @classmethod + @since('1.5.0') def load(cls, sc, path): + """ + Load a model from the given path. + """ jmodel = sc._jvm.org.apache.spark.mllib.feature \ .Word2VecModel.load(sc._jsc.sc(), path) return Word2VecModel(jmodel) @@ -507,6 +551,8 @@ class Word2Vec(object): ... rmtree(path) ... except OSError: ... pass + + .. versionadded:: 1.2.0 """ def __init__(self): """ @@ -519,6 +565,7 @@ def __init__(self): self.seed = random.randint(0, sys.maxsize) self.minCount = 5 + @since('1.2.0') def setVectorSize(self, vectorSize): """ Sets vector size (default: 100). @@ -526,6 +573,7 @@ def setVectorSize(self, vectorSize): self.vectorSize = vectorSize return self + @since('1.2.0') def setLearningRate(self, learningRate): """ Sets initial learning rate (default: 0.025). @@ -533,6 +581,7 @@ def setLearningRate(self, learningRate): self.learningRate = learningRate return self + @since('1.2.0') def setNumPartitions(self, numPartitions): """ Sets number of partitions (default: 1). Use a small number for @@ -541,6 +590,7 @@ def setNumPartitions(self, numPartitions): self.numPartitions = numPartitions return self + @since('1.2.0') def setNumIterations(self, numIterations): """ Sets number of iterations (default: 1), which should be smaller @@ -549,6 +599,7 @@ def setNumIterations(self, numIterations): self.numIterations = numIterations return self + @since('1.2.0') def setSeed(self, seed): """ Sets random seed. @@ -556,6 +607,7 @@ def setSeed(self, seed): self.seed = seed return self + @since('1.4.0') def setMinCount(self, minCount): """ Sets minCount, the minimum number of times a token must appear @@ -564,6 +616,7 @@ def setMinCount(self, minCount): self.minCount = minCount return self + @since('1.2.0') def fit(self, data): """ Computes the vector representation of each word in vocabulary. @@ -596,10 +649,13 @@ class ElementwiseProduct(VectorTransformer): >>> rdd = sc.parallelize([a, b]) >>> eprod.transform(rdd).collect() [DenseVector([2.0, 2.0, 9.0]), DenseVector([9.0, 6.0, 12.0])] + + .. versionadded:: 1.5.0 """ def __init__(self, scalingVector): self.scalingVector = _convert_to_vector(scalingVector) + @since('1.5.0') def transform(self, vector): """ Computes the Hadamard product of the vector. From e6a4c471e8ec90a5da66d0c5e21176fac6df485c Mon Sep 17 00:00:00 2001 From: noelsmith Date: Wed, 9 Sep 2015 20:51:15 +0100 Subject: [PATCH 2/4] Removed duplicate since decorator --- python/pyspark/__init__.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index 76ee3b1895e9f..8475dfb1c6ad0 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -67,26 +67,6 @@ def deco(f): # for back compatibility from pyspark.sql import SQLContext, HiveContext, SchemaRDD, Row - -def since(version): - """ - A decorator that annotates a function to append the version of Spark the function was added. - """ - import re - indent_p = re.compile(r'\n( +)') - - def deco(f): - if f.__doc__ is None: - f.__doc__ = ".. versionadded:: {}".format(version) - return f - else: - indents = indent_p.findall(f.__doc__) - indent = ' ' * (min(len(m) for m in indents) if indents else 0) - f.__doc__ = "{}\n\n{}.. versionadded:: {}".format(f.__doc__.rstrip(), indent, version) - return f - return deco - - __all__ = [ "SparkConf", "SparkContext", "SparkFiles", "RDD", "StorageLevel", "Broadcast", "Accumulator", "AccumulatorParam", "MarshalSerializer", "PickleSerializer", From bcf77a3a59b489c743ebcac2639170278f36162a Mon Sep 17 00:00:00 2001 From: noelsmith Date: Thu, 10 Sep 2015 21:28:21 +0100 Subject: [PATCH 3/4] Removed maintenance number from versions --- python/pyspark/mllib/feature.py | 76 ++++++++++++++++----------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index 7b077b058c3fd..8c82cf6b90e61 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -85,13 +85,13 @@ class Normalizer(VectorTransformer): >>> nor2.transform(v) DenseVector([0.0, 0.5, 1.0]) - .. versionadded:: 1.2.0 + .. versionadded:: 1.2 """ def __init__(self, p=2.0): assert p >= 1.0, "p should be greater than 1.0" self.p = float(p) - @since('1.2.0') + @since(1.2) def transform(self, vector): """ Applies unit length normalization on a vector. @@ -137,10 +137,10 @@ class StandardScalerModel(JavaVectorTransformer): Represents a StandardScaler model that can transform vectors. - .. versionadded:: 1.2.0 + .. versionadded:: 1.2 """ - @since('1.2.0') + @since(1.2) def transform(self, vector): """ Applies standardization transformation on a vector. @@ -156,7 +156,7 @@ def transform(self, vector): """ return JavaVectorTransformer.transform(self, vector) - @since('1.4.0') + @since(1.4) def setWithMean(self, withMean): """ Setter of the boolean which decides @@ -165,7 +165,7 @@ def setWithMean(self, withMean): self.call("setWithMean", withMean) return self - @since('1.4.0') + @since(1.4) def setWithStd(self, withStd): """ Setter of the boolean which decides @@ -199,7 +199,7 @@ class StandardScaler(object): DenseVector([-0.7071, 0.7071, -0.7071]) DenseVector([0.7071, -0.7071, 0.7071]) - .. versionadded:: 1.2.0 + .. versionadded:: 1.2 """ def __init__(self, withMean=False, withStd=True): if not (withMean or withStd): @@ -207,7 +207,7 @@ def __init__(self, withMean=False, withStd=True): self.withMean = withMean self.withStd = withStd - @since('1.2.0') + @since(1.2) def fit(self, dataset): """ Computes the mean and variance and stores as a model to be used @@ -228,10 +228,10 @@ class ChiSqSelectorModel(JavaVectorTransformer): Represents a Chi Squared selector model. - .. versionadded:: 1.4.0 + .. versionadded:: 1.4 """ - @since('1.4.0') + @since(1.4) def transform(self, vector): """ Applies transformation on a vector. @@ -262,12 +262,12 @@ class ChiSqSelector(object): >>> model.transform(DenseVector([8.0, 9.0, 5.0])) DenseVector([5.0]) - .. versionadded:: 1.4.0 + .. versionadded:: 1.4 """ def __init__(self, numTopFeatures): self.numTopFeatures = int(numTopFeatures) - @since('1.4.0') + @since(1.4) def fit(self, data): """ Returns a ChiSquared feature selector. @@ -285,7 +285,7 @@ class PCAModel(JavaVectorTransformer): """ Model fitted by [[PCA]] that can project vectors to a low-dimensional space using PCA. - .. versionadded:: 1.5.0 + .. versionadded:: 1.5 """ @@ -303,7 +303,7 @@ class PCA(object): >>> pcArray[1] -4.013... - .. versionadded:: 1.5.0 + .. versionadded:: 1.5 """ def __init__(self, k): """ @@ -311,7 +311,7 @@ def __init__(self, k): """ self.k = int(k) - @since('1.5.0') + @since(1.5) def fit(self, data): """ Computes a [[PCAModel]] that contains the principal components of the input vectors. @@ -337,17 +337,17 @@ class HashingTF(object): >>> htf.transform(doc) SparseVector(100, {...}) - .. versionadded:: 1.2.0 + .. versionadded:: 1.2 """ def __init__(self, numFeatures=1 << 20): self.numFeatures = numFeatures - @since('1.2.0') + @since(1.2) def indexOf(self, term): """ Returns the index of the input term. """ return hash(term) % self.numFeatures - @since('1.2.0') + @since(1.2) def transform(self, document): """ Transforms the input document (list of terms) to term frequency @@ -368,9 +368,9 @@ class IDFModel(JavaVectorTransformer): """ Represents an IDF model that can transform term frequency vectors. - .. versionadded:: 1.2.0 + .. versionadded:: 1.2 """ - @since('1.2.0') + @since(1.2) def transform(self, x): """ Transforms term frequency (TF) vectors to TF-IDF vectors. @@ -389,7 +389,7 @@ def transform(self, x): """ return JavaVectorTransformer.transform(self, x) - @since('1.4.0') + @since(1.4) def idf(self): """ Returns the current IDF vector. @@ -434,12 +434,12 @@ class IDF(object): >>> model.transform(Vectors.sparse(n, (1, 3), (1.0, 2.0))) SparseVector(4, {1: 0.0, 3: 0.5754}) - .. versionadded:: 1.2.0 + .. versionadded:: 1.2 """ def __init__(self, minDocFreq=0): self.minDocFreq = minDocFreq - @since('1.2.0') + @since(1.2) def fit(self, dataset): """ Computes the inverse document frequency. @@ -456,9 +456,9 @@ class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader): """ class for Word2Vec model - .. versionadded:: 1.2.0 + .. versionadded:: 1.2 """ - @since('1.2.0') + @since(1.2) def transform(self, word): """ Transforms a word to its vector representation @@ -473,7 +473,7 @@ def transform(self, word): except Py4JJavaError: raise ValueError("%s not found" % word) - @since('1.2.0') + @since(1.2) def findSynonyms(self, word, num): """ Find synonyms of a word @@ -489,7 +489,7 @@ def findSynonyms(self, word, num): words, similarity = self.call("findSynonyms", word, num) return zip(words, similarity) - @since('1.4.0') + @since(1.4) def getVectors(self): """ Returns a map of words to their vector representations. @@ -497,7 +497,7 @@ def getVectors(self): return self.call("getVectors") @classmethod - @since('1.5.0') + @since(1.5) def load(cls, sc, path): """ Load a model from the given path. @@ -552,7 +552,7 @@ class Word2Vec(object): ... except OSError: ... pass - .. versionadded:: 1.2.0 + .. versionadded:: 1.2 """ def __init__(self): """ @@ -565,7 +565,7 @@ def __init__(self): self.seed = random.randint(0, sys.maxsize) self.minCount = 5 - @since('1.2.0') + @since(1.2) def setVectorSize(self, vectorSize): """ Sets vector size (default: 100). @@ -573,7 +573,7 @@ def setVectorSize(self, vectorSize): self.vectorSize = vectorSize return self - @since('1.2.0') + @since(1.2) def setLearningRate(self, learningRate): """ Sets initial learning rate (default: 0.025). @@ -581,7 +581,7 @@ def setLearningRate(self, learningRate): self.learningRate = learningRate return self - @since('1.2.0') + @since(1.2) def setNumPartitions(self, numPartitions): """ Sets number of partitions (default: 1). Use a small number for @@ -590,7 +590,7 @@ def setNumPartitions(self, numPartitions): self.numPartitions = numPartitions return self - @since('1.2.0') + @since(1.2) def setNumIterations(self, numIterations): """ Sets number of iterations (default: 1), which should be smaller @@ -599,7 +599,7 @@ def setNumIterations(self, numIterations): self.numIterations = numIterations return self - @since('1.2.0') + @since(1.2) def setSeed(self, seed): """ Sets random seed. @@ -607,7 +607,7 @@ def setSeed(self, seed): self.seed = seed return self - @since('1.4.0') + @since(1.4) def setMinCount(self, minCount): """ Sets minCount, the minimum number of times a token must appear @@ -616,7 +616,7 @@ def setMinCount(self, minCount): self.minCount = minCount return self - @since('1.2.0') + @since(1.2) def fit(self, data): """ Computes the vector representation of each word in vocabulary. @@ -650,12 +650,12 @@ class ElementwiseProduct(VectorTransformer): >>> eprod.transform(rdd).collect() [DenseVector([2.0, 2.0, 9.0]), DenseVector([9.0, 6.0, 12.0])] - .. versionadded:: 1.5.0 + .. versionadded:: 1.5 """ def __init__(self, scalingVector): self.scalingVector = _convert_to_vector(scalingVector) - @since('1.5.0') + @since(1.5) def transform(self, vector): """ Computes the Hadamard product of the vector. From 7853215a80a06fd1d1deb45e8f3340fa9bb54129 Mon Sep 17 00:00:00 2001 From: noelsmith Date: Mon, 14 Sep 2015 22:08:41 +0100 Subject: [PATCH 4/4] Reinstated 3-part version numbers --- python/pyspark/mllib/feature.py | 76 ++++++++++++++++----------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index 8c82cf6b90e61..7b077b058c3fd 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -85,13 +85,13 @@ class Normalizer(VectorTransformer): >>> nor2.transform(v) DenseVector([0.0, 0.5, 1.0]) - .. versionadded:: 1.2 + .. versionadded:: 1.2.0 """ def __init__(self, p=2.0): assert p >= 1.0, "p should be greater than 1.0" self.p = float(p) - @since(1.2) + @since('1.2.0') def transform(self, vector): """ Applies unit length normalization on a vector. @@ -137,10 +137,10 @@ class StandardScalerModel(JavaVectorTransformer): Represents a StandardScaler model that can transform vectors. - .. versionadded:: 1.2 + .. versionadded:: 1.2.0 """ - @since(1.2) + @since('1.2.0') def transform(self, vector): """ Applies standardization transformation on a vector. @@ -156,7 +156,7 @@ def transform(self, vector): """ return JavaVectorTransformer.transform(self, vector) - @since(1.4) + @since('1.4.0') def setWithMean(self, withMean): """ Setter of the boolean which decides @@ -165,7 +165,7 @@ def setWithMean(self, withMean): self.call("setWithMean", withMean) return self - @since(1.4) + @since('1.4.0') def setWithStd(self, withStd): """ Setter of the boolean which decides @@ -199,7 +199,7 @@ class StandardScaler(object): DenseVector([-0.7071, 0.7071, -0.7071]) DenseVector([0.7071, -0.7071, 0.7071]) - .. versionadded:: 1.2 + .. versionadded:: 1.2.0 """ def __init__(self, withMean=False, withStd=True): if not (withMean or withStd): @@ -207,7 +207,7 @@ def __init__(self, withMean=False, withStd=True): self.withMean = withMean self.withStd = withStd - @since(1.2) + @since('1.2.0') def fit(self, dataset): """ Computes the mean and variance and stores as a model to be used @@ -228,10 +228,10 @@ class ChiSqSelectorModel(JavaVectorTransformer): Represents a Chi Squared selector model. - .. versionadded:: 1.4 + .. versionadded:: 1.4.0 """ - @since(1.4) + @since('1.4.0') def transform(self, vector): """ Applies transformation on a vector. @@ -262,12 +262,12 @@ class ChiSqSelector(object): >>> model.transform(DenseVector([8.0, 9.0, 5.0])) DenseVector([5.0]) - .. versionadded:: 1.4 + .. versionadded:: 1.4.0 """ def __init__(self, numTopFeatures): self.numTopFeatures = int(numTopFeatures) - @since(1.4) + @since('1.4.0') def fit(self, data): """ Returns a ChiSquared feature selector. @@ -285,7 +285,7 @@ class PCAModel(JavaVectorTransformer): """ Model fitted by [[PCA]] that can project vectors to a low-dimensional space using PCA. - .. versionadded:: 1.5 + .. versionadded:: 1.5.0 """ @@ -303,7 +303,7 @@ class PCA(object): >>> pcArray[1] -4.013... - .. versionadded:: 1.5 + .. versionadded:: 1.5.0 """ def __init__(self, k): """ @@ -311,7 +311,7 @@ def __init__(self, k): """ self.k = int(k) - @since(1.5) + @since('1.5.0') def fit(self, data): """ Computes a [[PCAModel]] that contains the principal components of the input vectors. @@ -337,17 +337,17 @@ class HashingTF(object): >>> htf.transform(doc) SparseVector(100, {...}) - .. versionadded:: 1.2 + .. versionadded:: 1.2.0 """ def __init__(self, numFeatures=1 << 20): self.numFeatures = numFeatures - @since(1.2) + @since('1.2.0') def indexOf(self, term): """ Returns the index of the input term. """ return hash(term) % self.numFeatures - @since(1.2) + @since('1.2.0') def transform(self, document): """ Transforms the input document (list of terms) to term frequency @@ -368,9 +368,9 @@ class IDFModel(JavaVectorTransformer): """ Represents an IDF model that can transform term frequency vectors. - .. versionadded:: 1.2 + .. versionadded:: 1.2.0 """ - @since(1.2) + @since('1.2.0') def transform(self, x): """ Transforms term frequency (TF) vectors to TF-IDF vectors. @@ -389,7 +389,7 @@ def transform(self, x): """ return JavaVectorTransformer.transform(self, x) - @since(1.4) + @since('1.4.0') def idf(self): """ Returns the current IDF vector. @@ -434,12 +434,12 @@ class IDF(object): >>> model.transform(Vectors.sparse(n, (1, 3), (1.0, 2.0))) SparseVector(4, {1: 0.0, 3: 0.5754}) - .. versionadded:: 1.2 + .. versionadded:: 1.2.0 """ def __init__(self, minDocFreq=0): self.minDocFreq = minDocFreq - @since(1.2) + @since('1.2.0') def fit(self, dataset): """ Computes the inverse document frequency. @@ -456,9 +456,9 @@ class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader): """ class for Word2Vec model - .. versionadded:: 1.2 + .. versionadded:: 1.2.0 """ - @since(1.2) + @since('1.2.0') def transform(self, word): """ Transforms a word to its vector representation @@ -473,7 +473,7 @@ def transform(self, word): except Py4JJavaError: raise ValueError("%s not found" % word) - @since(1.2) + @since('1.2.0') def findSynonyms(self, word, num): """ Find synonyms of a word @@ -489,7 +489,7 @@ def findSynonyms(self, word, num): words, similarity = self.call("findSynonyms", word, num) return zip(words, similarity) - @since(1.4) + @since('1.4.0') def getVectors(self): """ Returns a map of words to their vector representations. @@ -497,7 +497,7 @@ def getVectors(self): return self.call("getVectors") @classmethod - @since(1.5) + @since('1.5.0') def load(cls, sc, path): """ Load a model from the given path. @@ -552,7 +552,7 @@ class Word2Vec(object): ... except OSError: ... pass - .. versionadded:: 1.2 + .. versionadded:: 1.2.0 """ def __init__(self): """ @@ -565,7 +565,7 @@ def __init__(self): self.seed = random.randint(0, sys.maxsize) self.minCount = 5 - @since(1.2) + @since('1.2.0') def setVectorSize(self, vectorSize): """ Sets vector size (default: 100). @@ -573,7 +573,7 @@ def setVectorSize(self, vectorSize): self.vectorSize = vectorSize return self - @since(1.2) + @since('1.2.0') def setLearningRate(self, learningRate): """ Sets initial learning rate (default: 0.025). @@ -581,7 +581,7 @@ def setLearningRate(self, learningRate): self.learningRate = learningRate return self - @since(1.2) + @since('1.2.0') def setNumPartitions(self, numPartitions): """ Sets number of partitions (default: 1). Use a small number for @@ -590,7 +590,7 @@ def setNumPartitions(self, numPartitions): self.numPartitions = numPartitions return self - @since(1.2) + @since('1.2.0') def setNumIterations(self, numIterations): """ Sets number of iterations (default: 1), which should be smaller @@ -599,7 +599,7 @@ def setNumIterations(self, numIterations): self.numIterations = numIterations return self - @since(1.2) + @since('1.2.0') def setSeed(self, seed): """ Sets random seed. @@ -607,7 +607,7 @@ def setSeed(self, seed): self.seed = seed return self - @since(1.4) + @since('1.4.0') def setMinCount(self, minCount): """ Sets minCount, the minimum number of times a token must appear @@ -616,7 +616,7 @@ def setMinCount(self, minCount): self.minCount = minCount return self - @since(1.2) + @since('1.2.0') def fit(self, data): """ Computes the vector representation of each word in vocabulary. @@ -650,12 +650,12 @@ class ElementwiseProduct(VectorTransformer): >>> eprod.transform(rdd).collect() [DenseVector([2.0, 2.0, 9.0]), DenseVector([9.0, 6.0, 12.0])] - .. versionadded:: 1.5 + .. versionadded:: 1.5.0 """ def __init__(self, scalingVector): self.scalingVector = _convert_to_vector(scalingVector) - @since(1.5) + @since('1.5.0') def transform(self, vector): """ Computes the Hadamard product of the vector.