# Recommender
## Platform: Spark, colab.research.google.com

In [0]:
# install Spark
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q http://www-us.apache.org/dist/spark/spark-2.3.1/spark-2.3.1-bin-hadoop2.7.tgz
!tar xf spark-2.3.1-bin-hadoop2.7.tgz
!pip install -q findspark

In [0]:
# init Spark
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-2.3.1-bin-hadoop2.7"
import findspark
findspark.init()
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()

In [0]:
import pandas as pd
import time

from pyspark.ml.recommendation import ALS

In [4]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [5]:
ratings = spark.read.csv("/content/gdrive/My Drive/Colab Notebooks/SparkAzureTutorial/data/ratings.csv", header=True, inferSchema=True)
movies = spark.read.csv("/content/gdrive/My Drive/Colab Notebooks/SparkAzureTutorial/data/movies.csv", header=True, inferSchema=True)
ratings.join(movies, "movieId").show(5, False)

+-------+------+------+----------+----------------------------------------------+--------------------------------+
|movieId|userId|rating|timestamp |title                                         |genres                          |
+-------+------+------+----------+----------------------------------------------+--------------------------------+
|31     |1     |2.5   |1260759144|Dangerous Minds (1995)                        |Drama                           |
|1029   |1     |3.0   |1260759179|Dumbo (1941)                                  |Animation|Children|Drama|Musical|
|1061   |1     |3.0   |1260759182|Sleepers (1996)                               |Thriller                        |
|1129   |1     |2.0   |1260759185|Escape from New York (1981)                   |Action|Adventure|Sci-Fi|Thriller|
|1172   |1     |4.0   |1260759205|Cinema Paradiso (Nuovo cinema Paradiso) (1989)|Drama                           |
+-------+------+------+----------+----------------------------------------------

In [6]:
data = ratings.select("userId", "movieId", "rating")
splits = data.randomSplit([0.7, 0.3])
train = splits[0].withColumnRenamed("rating", "label")
test = splits[1].withColumnRenamed("rating", "trueLabel")
train_rows = train.count()
test_rows = test.count()
print("Training Rows:", train_rows, " Testing Rows:", test_rows)

Training Rows: 69848  Testing Rows: 30156


In [7]:
als = ALS(maxIter=5, regParam=0.01, userCol="userId", itemCol="movieId", ratingCol="label")
model = als.fit(train)
prediction = model.transform(test)
prediction.join(movies, "movieId").select("userId", "title", "prediction", "trueLabel").show(100, truncate=False)

+------+--------------------------------+----------+---------+
|userId|title                           |prediction|trueLabel|
+------+--------------------------------+----------+---------+
|575   |Awfully Big Adventure, An (1995)|NaN       |4.0      |
|380   |Guilty as Sin (1993)            |3.1161027 |3.0      |
|242   |Guilty as Sin (1993)            |3.7684715 |4.0      |
|30    |Guilty as Sin (1993)            |2.9855494 |4.0      |
|548   |Hudsucker Proxy, The (1994)     |3.2737086 |4.0      |
|285   |Hudsucker Proxy, The (1994)     |4.027282  |5.0      |
|292   |Hudsucker Proxy, The (1994)     |4.140784  |3.5      |
|306   |Hudsucker Proxy, The (1994)     |3.8750286 |3.0      |
|491   |Hudsucker Proxy, The (1994)     |3.4373    |3.0      |
|92    |Hudsucker Proxy, The (1994)     |3.6796966 |4.0      |
|309   |Hudsucker Proxy, The (1994)     |4.5950522 |4.0      |
|659   |Hudsucker Proxy, The (1994)     |3.9481053 |4.0      |
|649   |Hudsucker Proxy, The (1994)     |4.873652  |3.0