In [1]:
from pyspark.sql import SparkSession

In [2]:
spark = SparkSession.builder.appName('recommender').getOrCreate()

In [3]:
df = spark.read.csv('FileStore/tables/movielens_ratings.csv', inferSchema=True, header=True)

In [4]:
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

In [5]:
df.printSchema()

In [6]:
df.head()

In [7]:
df.show(5)

In [8]:
df.describe().show()

In [9]:
df_train, df_test = df.randomSplit([0.8,0.2])

In [10]:
als = ALS(maxIter=5, regParam=0.01, 
          userCol='userId', itemCol='movieId', ratingCol='rating')

In [11]:
model = als.fit(df_train)

In [12]:
predictions = model.transform(df_test)

In [13]:
predictions.show()

In [14]:
evaluator = RegressionEvaluator(metricName='rmse', labelCol='rating', predictionCol='prediction')

In [15]:
rmse = evaluator.evaluate(predictions)

In [16]:
rmse

In [17]:
single_user = df_test.filter(df_test['userId'] == 11).select(['movieId', 'userId'])

In [18]:
single_user.show()

In [19]:
recommendations = model.transform(single_user)

In [20]:
recommendations.orderBy('prediction', ascending=False).show()