In [1]:
!pip install pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspark
  Downloading pyspark-3.4.0.tar.gz (310.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.8/310.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.4.0-py2.py3-none-any.whl size=311317130 sha256=3eda3959eebacf6d44abd747bda0fd4f89d381ee7761ee7afa5b0d80550a3f32
  Stored in directory: /root/.cache/pip/wheels/7b/1b/4b/3363a1d04368e7ff0d408e57ff57966fcdf00583774e761327
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.4.0


In [2]:
import pyspark
from pyspark.sql import SparkSession, SQLContext
spark = SparkSession.builder.appName('Movie Lens Recommendation').getOrCreate()

In [3]:
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS
from pyspark.sql import Row

lines = spark.read.text("/content/ratings.dat").rdd
parts = lines.map(lambda row: row.value.split("::"))
ratingsRDD = parts.map(lambda p: Row(userId=int(p[0]), movieId=int(p[1]),
                                     rating=int(p[2]), timestamp=int(p[3])))
ratings = spark.createDataFrame(ratingsRDD)
(training, test) = ratings.randomSplit([0.8, 0.2])

# Build the recommendation model using ALS on the training data
# Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
als = ALS(maxIter=5, regParam=0.01, userCol="userId", itemCol="movieId", ratingCol="rating")
model = als.fit(training)

# Evaluate the model by computing the RMSE on the test data
predictions = model.transform(test)
predictions.show()

import math
result = predictions.rdd.map(lambda row: row['prediction'] - row['rating']).map(lambda x: x*x).filter(lambda x: not math.isnan(x))
mse = result.reduce(lambda x,y: x+y)

+------+-------+------+---------+----------+
|userId|movieId|rating|timestamp|prediction|
+------+-------+------+---------+----------+
|     1|    260|     4|978300760| 3.9904704|
|     1|    531|     4|978302149|  3.693758|
|     1|   1029|     5|978302205| 3.8508663|
|     1|   1197|     3|978302268| 3.9899428|
|     1|   1207|     4|978300719| 5.2481565|
|     1|   1246|     4|978302091| 4.4040008|
|     1|   1270|     5|978300055|  4.077993|
|     1|   1907|     4|978824330| 3.8529975|
|     1|   2018|     4|978301777| 4.2558813|
|     1|   2355|     5|978824291| 4.0124564|
|     1|   2687|     3|978824268| 3.6171255|
|     1|   2692|     4|978301570| 4.1419935|
|     1|   2804|     5|978300719|  4.344903|
|     1|   3186|     4|978300019|  5.009645|
|     1|   3408|     4|978300275|  4.161335|
|     2|    265|     4|978299026| 3.3188462|
|     2|    292|     3|978300123| 3.3927314|
|     2|    498|     3|978299418| 1.6427406|
|     2|    589|     4|978299773| 4.0933514|
|     2|  