In [2]:
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
from pyspark.sql import SparkSession
from pyspark.sql import functions as f
from pyspark.ml.recommendation import ALS, ALSModel


spark = SparkSession.builder \
    .master('local[*]') \
    .config("spark.driver.memory", "4g") \
    .appName('movieRecommendationPySpark') \
    .getOrCreate()
model =  ALSModel.load("../modelrecommendation")
ratings = (
    spark.read.csv(
        path = "../data/ml-25m/test.csv",
        sep=",", header=True,quote='"',schema="userId INT, movieId INT, rating DOUBLE, timestamp INT",
    ).select("userId", "movieId", "rating")
    .cache()
)
ratings.show(10)
movies = (
    spark.read.csv(
        path = "../data/ml-25m/movies.csv",
        sep=",",
        header = True,
        quote='"',
        schema = "movieId INT, title STRING, genres STRING",
    )
)
movies.show(5,False)



+------+-------+------+
|userId|movieId|rating|
+------+-------+------+
|     1|    296|   5.0|
|     1|    306|   3.5|
|     1|    307|   5.0|
|     1|    665|   5.0|
|     2|    899|   3.5|
|     3|   1088|   4.0|
+------+-------+------+

+-------+----------------------------------+-------------------------------------------+
|movieId|title                             |genres                                     |
+-------+----------------------------------+-------------------------------------------+
|1      |Toy Story (1995)                  |Adventure|Animation|Children|Comedy|Fantasy|
|2      |Jumanji (1995)                    |Adventure|Children|Fantasy                 |
|3      |Grumpier Old Men (1995)           |Comedy|Romance                             |
|4      |Waiting to Exhale (1995)          |Comedy|Drama|Romance                       |
|5      |Father of the Bride Part II (1995)|Comedy                                     |
+-------+----------------------------------+---

In [3]:
ratedMovies = ratings.filter(f.col('userId')==1).select('movieId').rdd.flatMap(lambda x:x).collect()
movies_to_be_rated = (
    ratings.filter(~ f.col('movieId').isin(ratedMovies))
    .select('movieId').distinct().withColumn('userId',f.lit(1))
)
movies_to_be_rated.show()
user_movie_predictions = model.transform(movies_to_be_rated)
user_movie_predictions.filter(~f.isnan('prediction')).orderBy('prediction',ascending=False).show(5)

                                                                                

+-------+------+
|movieId|userId|
+-------+------+
|   1088|     1|
|    899|     1|
+-------+------+



                                                                                

+-------+------+----------+
|movieId|userId|prediction|
+-------+------+----------+
|    899|     1| 3.7065353|
|   1088|     1|  2.545693|
+-------+------+----------+



In [4]:
rec_all_users = model.recommendForAllUsers(5).cache()
recommendations_for_user91 = rec_all_users.filter(f.col('userId')==1)

movieid = recommendations_for_user91.withColumn('movie',f.explode('recommendations')).withColumn('movie',f.col('movie.movieId')).select('movie')
movie_for_user91= movies.join(movieid,movies.movieId==movieid.movie,'inner').withColumn("title_year",f.split(f.col("title"),"\(")).withColumn("year",f.substring(f.col("title_year").getItem(1),0,4)).withColumn("titlestring",f.col("title_year").getItem(0)).select('movieId','titlestring','year')
movie_for_user91.show(5,False)



+-------+--------------------------------------------+----+
|movieId|titlestring                                 |year|
+-------+--------------------------------------------+----+
|183947 |NOFX Backstage Passport 2                   |null|
|200930 |C'est quoi la vie?                          |1999|
|192089 |National Theatre Live: One Man, Two Guvnors |2011|
|203086 |Truth and Justice                           |2019|
|200872 |School of Babel                             |2014|
+-------+--------------------------------------------+----+



                                                                                