From 6f7cd5f3b37b56b35826f81f1b04908ef3571cb9 Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Sun, 6 Sep 2015 14:01:49 +0800 Subject: [PATCH 1/2] add python example for StopWordsRemover --- docs/ml-features.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/ml-features.md b/docs/ml-features.md index 90654d1e5a248..3548cd2cdfc45 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -512,6 +512,25 @@ DataFrame dataset = jsql.createDataFrame(rdd, schema); remover.transform(dataset).show(); {% endhighlight %} + +
+[`StopWordsRemover`](api/python/pyspark.ml.html#pyspark.ml.feature.StopWordsRemover) +takes an input column name, an output column name, a list of stop words, +and a boolean indicating if the matches should be case sensitive (false +by default). + +{% highlight python %} +from pyspark.ml.feature import StopWordsRemover + +sentenceData = sqlContext.createDataFrame([ + (0, ["I", "saw", "the", "red", "baloon"]), + (1, ["Mary", "had", "a", "little", "lamb"]) +], ["label", "raw"]) + +remover = StopWordsRemover(inputCol="raw", outputCol="filtered") +filtered = remover.transform(sentenceData).show() +{% endhighlight %} +
## $n$-gram From 28f179e1db33a68d71648e1c03000132a83511c6 Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Wed, 9 Sep 2015 13:18:44 +0800 Subject: [PATCH 2/2] fix issue according to comment --- docs/ml-features.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ml-features.md b/docs/ml-features.md index 3548cd2cdfc45..58b31a5a5cc47 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -528,7 +528,7 @@ sentenceData = sqlContext.createDataFrame([ ], ["label", "raw"]) remover = StopWordsRemover(inputCol="raw", outputCol="filtered") -filtered = remover.transform(sentenceData).show() +remover.transform(sentenceData).show(truncate=False) {% endhighlight %}