Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Spark-8308] [MLlib] add missing save load for python example #6760

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/mllib-collaborative-filtering.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ val ratings = data.map(_.split(',') match { case Array(user, item, rate) =>

// Build the recommendation model using ALS
val rank = 10
val numIterations = 20
val numIterations = 10
val model = ALS.train(ratings, rank, numIterations, 0.01)

// Evaluate the model on rating data
Expand Down Expand Up @@ -149,7 +149,7 @@ public class CollaborativeFiltering {

// Build the recommendation model using ALS
int rank = 10;
int numIterations = 20;
int numIterations = 10;
MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), rank, numIterations, 0.01);

// Evaluate the model on rating data
Expand Down Expand Up @@ -210,7 +210,7 @@ ratings = data.map(lambda l: l.split(',')).map(lambda l: Rating(int(l[0]), int(l

# Build the recommendation model using Alternating Least Squares
rank = 10
numIterations = 20
numIterations = 10
model = ALS.train(ratings, rank, numIterations)

# Evaluate the model on training data
Expand Down
12 changes: 10 additions & 2 deletions docs/mllib-linear-methods.md
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ Note that the Python API does not yet support multiclass classification and mode
will in the future.

{% highlight python %}
from pyspark.mllib.classification import LogisticRegressionWithLBFGS
from pyspark.mllib.classification import LogisticRegressionWithLBFGS, LogisticRegressionModel
from pyspark.mllib.regression import LabeledPoint
from numpy import array

Expand All @@ -518,6 +518,10 @@ model = LogisticRegressionWithLBFGS.train(parsedData)
labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
print("Training Error = " + str(trainErr))

# Save and load model
model.save(sc, "myModelPath")
sameModel = LogisticRegressionModel.load(sc, "myModelPath")
{% endhighlight %}
</div>
</div>
Expand Down Expand Up @@ -668,7 +672,7 @@ values. We compute the mean squared error at the end to evaluate
Note that the Python API does not yet support model save/load but will in the future.

{% highlight python %}
from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD
from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD, LinearRegressionModel
from numpy import array

# Load and parse the data
Expand All @@ -686,6 +690,10 @@ model = LinearRegressionWithSGD.train(parsedData)
valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
MSE = valuesAndPreds.map(lambda (v, p): (v - p)**2).reduce(lambda x, y: x + y) / valuesAndPreds.count()
print("Mean Squared Error = " + str(MSE))

# Save and load model
model.save(sc, "myModelPath")
sameModel = LinearRegressionModel.load(sc, "myModelPath")
{% endhighlight %}
</div>
</div>
Expand Down
6 changes: 5 additions & 1 deletion docs/mllib-naive-bayes.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ used for evaluation and prediction.
Note that the Python API does not yet support model save/load but will in the future.

{% highlight python %}
from pyspark.mllib.classification import NaiveBayes
from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel
from pyspark.mllib.linalg import Vectors
from pyspark.mllib.regression import LabeledPoint

Expand All @@ -140,6 +140,10 @@ model = NaiveBayes.train(training, 1.0)
# Make prediction and test accuracy.
predictionAndLabel = test.map(lambda p : (model.predict(p.features), p.label))
accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()

# Save and load model
model.save(sc, "myModelPath")
sameModel = NaiveBayesModel.load(sc, "myModelPath")
{% endhighlight %}

</div>
Expand Down