In [1]:
import pyspark
import os
import numpy
from pyspark.sql import SparkSession
import pyspark.sql.functions as sql_fun
from pyspark.ml.recommendation import ALSModel

In [2]:
# Load the dataset
DATAPATH = r'/home/daniel/Desktop/programming/pythondatascience/datascience/dataengineering/datasets/movieLens25m/'
movies = os.path.join(DATAPATH, 'movies.csv')
ratings = os.path.join(DATAPATH, 'ratings.csv')

spark = SparkSession.builder.appName('recommend').getOrCreate()
movies = spark.read.csv(movies, inferSchema = True, header = True)
ratings = spark.read.csv(ratings, inferSchema = True, header = True)
sc = spark.sparkContext

# We will only work with 5 million records
ratings = ratings.limit(5000000)

# Load the movies and ratings dataset in memory to improve performance
ratings=ratings.repartition(10).cache()
movies=movies.repartition(10).cache()

22/06/12 17:07:55 WARN Utils: Your hostname, daniel-X555LJ resolves to a loopback address: 127.0.1.1; using 10.0.3.1 instead (on interface lxcbr0)
22/06/12 17:07:55 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
22/06/12 17:07:56 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
                                                                                

In [26]:
# Load the recommender model
model = ALSModel.load("modelV1.model")

def recommendMovies(model, num_recommends, user):
    rel_col = ['movieId', 'userId']
    print(f"Recommendations for UserId: {user}")
    # Create a Spark DataFrame with the specified user and all the movies listed in the ratings DataFrame
    dataSet = ratings.select("movieId").distinct().withColumn("userId", sql_fun.lit(user))

    # Create a Spark DataFrame with the movies that have already been rated by this user
    moviesAlreadyRated = ratings.filter(ratings.userId==user).select(*rel_col)

    # Apply the recommender system to the dataset without the already rated movies to predict the ratings
    predictions = model.transform(dataSet.subtract(moviesAlreadyRated)).dropna()\
    .orderBy("prediction", ascending=False).limit(num_recommends).select("movieId", "prediction")

    # Join with the movies DataFrame to get the movie titles and genres
    recommendations = predictions.join(movies, predictions.movieId==movies.movieId)\
    .select(predictions.movieId, movies.title, movies.genres, predictions.prediction)

    return recommendations.show(truncate=False)

In [27]:
recommendMovies(model, num_recommends=10, user=147)

Recommendations for UserId: 147


                                                                                

+-------+---------------------------------+---------------------------------+----------+
|movieId|title                            |genres                           |prediction|
+-------+---------------------------------+---------------------------------+----------+
|161662 |Pufnstuf (1970)                  |Children|Comedy|Fantasy          |4.9076037 |
|152711 |Who Killed Chea Vichea? (2010)   |Documentary                      |4.9389124 |
|173651 |Rammstein: Live aus Berlin (1999)|(no genres listed)               |5.2215548 |
|192261 |Don't Laugh at My Romance (2008) |Comedy|Drama                     |5.097601  |
|139128 |Genius Party Beyond (2008)       |Animation                        |4.921399  |
|127019 |Line of Sight (2012)             |Documentary                      |5.310632  |
|176875 |Daniel, the Wizard (2004)        |Comedy|Crime|Drama|Fantasy|Horror|4.914569  |
|166472 |Feyzo, the Polite One (1978)     |Comedy                           |5.26434   |
|127256 |The Old Gun 

In [28]:
recommendMovies(model, num_recommends=10, user=22386)

Recommendations for UserId: 22386


                                                                                

+-------+---------------------------------------+---------------------------------+----------+
|movieId|title                                  |genres                           |prediction|
+-------+---------------------------------------+---------------------------------+----------+
|113045 |Boogie (Boogie, el aceitoso) (2009)    |Action|Animation|Crime           |4.5893526 |
|127252 |The Veil of Twilight (2014)            |Crime|Fantasy|Mystery            |4.5574656 |
|126086 |Norm MacDonald: Me Doing Standup (2011)|Comedy                           |4.462449  |
|117352 |A Kind of America 2 (2008)             |Comedy                           |4.480094  |
|179985 |Romeo Is Bleeding (2017)               |Documentary                      |4.8404074 |
|132492 |Sinatra: All or Nothing at All (2015)  |Documentary                      |4.9653397 |
|194332 |Muchas gracias de nada (1980)          |Comedy                           |4.4846635 |
|88099  |Streets of Laredo (1995)               |D

In [29]:
recommendMovies(model, num_recommends=10, user=26130)

Recommendations for UserId: 26130


                                                                                

+-------+-----------------------------------------------------------------+---------------------------------+----------+
|movieId|title                                                            |genres                           |prediction|
+-------+-----------------------------------------------------------------+---------------------------------+----------+
|208112 |Rudolph the Red-Nosed Reindeer & the Island of Misfit Toys (2001)|Animation|Children               |5.1672997 |
|113045 |Boogie (Boogie, el aceitoso) (2009)                              |Action|Animation|Crime           |5.4515486 |
|127252 |The Veil of Twilight (2014)                                      |Crime|Fantasy|Mystery            |5.5586348 |
|126086 |Norm MacDonald: Me Doing Standup (2011)                          |Comedy                           |5.1668897 |
|117352 |A Kind of America 2 (2008)                                       |Comedy                           |5.181432  |
|127019 |Line of Sight (2012)   