In [1]:
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.ml.tuning import ParamGridBuilder, TrainValidationSplit
from pyspark.sql import Row
from pyspark.ml.evaluation import RegressionEvaluator


# start spark session
spark = SparkSession.builder.appName('rec').getOrCreate()

# read csv file
rdd_orig   = spark.read.option("header", True).csv('movies.csv').rdd
ratingsRDD = rdd_orig.map(lambda p: Row(userId=int(p[2]), movieId=int(p[0]),
                                     rating=int(p[1])))

# create training and test data
train_split      = 0.8
test_split       = 0.2
ratings          = spark.createDataFrame(ratingsRDD)
(training, test) = ratings.randomSplit([train_split, test_split])

# instantiating model 
als   = ALS(userCol="userId", itemCol="movieId", ratingCol="rating", coldStartStrategy="drop")

# # hyperparameter search
parameters=ParamGridBuilder() \
                .addGrid(als.maxIter, [5, 10]) \
                .addGrid(als.rank, [5]) \
                .addGrid(als.regParam, [0.1]) \
                .build()
                

# # evaluation metric
evaluator   = RegressionEvaluator(metricName="rmse", labelCol="rating",
                                predictionCol="prediction")

# build train, validation split
trainvs = TrainValidationSplit(
                estimator=als,
                estimatorParamMaps=parameters,
                evaluator=evaluator
                )

# fit model on to training data and evaluate using validation data
model = trainvs.fit(training)
model_path = 'rec-model-v01/'
model.write().overwrite().save(model_path)

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


23/02/13 16:13:52 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


                                                                                

23/02/13 16:14:14 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
23/02/13 16:14:14 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.ForeignLinkerBLAS
23/02/13 16:14:15 WARN InstanceBuilder$NativeLAPACK: Failed to load implementation from:dev.ludovic.netlib.lapack.JNILAPACK


                                                                                

In [2]:
print('The validation metrics are: ', model.validationMetrics)

The validation metrics are:  [1.4729006701862974, 1.2818269734669225]


In [9]:
print(model.bestModel.rank)
print(model.bestModel.params)

5


AttributeError: 'list' object has no attribute 'maxIter'