In [1]:
import os
os.environ['SPARK_HOME'] = "C:\Spark\spark-3.5.3-bin-hadoop3"

In [2]:
import os
import sys

os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable

In [3]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession

# set up Spark config
conf = SparkConf() \
    .setAppName("ALS") \
    .setMaster("spark://192.168.0.136:7077") \
    .set("spark.driver.memory", "8g") \
    .set("spark.executor.memory", "8g")

spark = SparkSession.builder.config(conf=conf).getOrCreate()

df = spark.read.csv('dataset.csv', header=True, inferSchema=True)

In [4]:
df

DataFrame[User_Id: int, Movie_Name: string, Rating: double, Genre: string, Year: double, Movie_Id: int]

In [5]:
from pyspark.sql.functions import col

df = df.withColumn("User_Id", col("User_Id").cast("integer"))
df = df.withColumn("Rating", col("Rating").cast("float"))

In [6]:
(training, test) = df.randomSplit([0.8, 0.2], seed=42)

In [7]:
training = training.na.drop(subset=["Movie_Id", "User_Id"])

In [8]:
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder

als = ALS(
    maxIter=10,
    regParam=0.1,
    userCol="User_Id",
    itemCol="Movie_Id",
    ratingCol="Rating",
    coldStartStrategy="drop"
)

model = als.fit(training)

In [9]:
model.save("als_model_1")

In [10]:
evaluator = RegressionEvaluator(
    metricName="rmse",
    labelCol="Rating",
    predictionCol="prediction"
)

predictions = model.transform(test)
rmse = evaluator.evaluate(predictions)

print(f"Root-mean-square error: {rmse}")

Root-mean-square error: 0.8182747915480578


In [11]:
from pyspark.sql.functions import col, lit

# assume 'user_id' is the ID of the user we want to recommend movies to
user_id = 69

# generate a DataFrame with all possible user-movie combinations for the given user
user_movies = (
    training.select('Movie_Id').distinct()
    .withColumn('User_Id', lit(user_id))
)

# predict ratings for these user-movie combinations
predictions = model.transform(user_movies)

# filter out the movies the user has already rated
rated_movies = training.filter(col('User_Id') == user_id).select('Movie_Id')
predictions = predictions.join(rated_movies, 'Movie_Id', 'left_anti')

top_5_recommendations = predictions.orderBy(col('prediction').desc()).limit(5)

# join with the original dataset to get movie details
top_5_details = top_5_recommendations.join(training.select('Movie_Id', 'Movie_Name', 'Year').distinct(), on='Movie_Id')

# show the top 5 recommended movies with their names and years
top_5_details.select('Movie_Name', 'Year', 'prediction').show()

+--------------------+------+----------+
|          Movie_Name|  Year|prediction|
+--------------------+------+----------+
|Year of the Hare,...|1977.0| 4.8107734|
|            Bob Funk|2009.0| 4.8844776|
|Doggiewoggiez! Po...|2012.0| 4.7515864|
|   Myra Breckinridge|1970.0|  5.275251|
|          Bad Ronald|1974.0|  5.313532|
+--------------------+------+----------+



In [12]:
from pyspark.ml.recommendation import ALSModel

# load the model from the specified directory
loaded_model = ALSModel.load("../experiments/als_model")

# example DataFrame for inference
new_data = spark.createDataFrame([
    (1, 10),  # (User_Id, Movie_ID)
    (1, 20),
    (2, 10)
], ["User_Id", "Movie_Id"])

# make predictions using the loaded model
predictions = loaded_model.transform(new_data)
predictions.show()

+-------+--------+----------+
|User_Id|Movie_Id|prediction|
+-------+--------+----------+
|      1|      10|  4.148014|
|      1|      20| 3.7615376|
|      2|      10| 4.9369745|
+-------+--------+----------+



In [13]:
all_movies = df.select("Movie_Id", "Movie_Name", "Year").distinct()

In [14]:
df.show()

+-------+--------------------+------+--------------------+------+--------+
|User_Id|          Movie_Name|Rating|               Genre|  Year|Movie_Id|
+-------+--------------------+------+--------------------+------+--------+
|      1|             Jumanji|   3.5|Adventure|Childre...|1995.0|       1|
|      1|City of Lost Chil...|   3.5|Adventure|Drama|F...|1995.0|       2|
|      1|Twelve Monkeys (a...|   3.5|Mystery|Sci-Fi|Th...|1995.0|       3|
|      1|Seven (a.k.a. Se7en)|   3.5|    Mystery|Thriller|1995.0|       4|
|      1| Usual Suspects, The|   3.5|Crime|Mystery|Thr...|1995.0|       5|
|      1|Rumble in the Bro...|   3.5|Action|Adventure|...|1995.0|       6|
|      1|             Rob Roy|   4.0|Action|Drama|Roma...|1995.0|       7|
|      1|              Clerks|   4.0|              Comedy|1994.0|       8|
|      1|Interview with th...|   4.0|        Drama|Horror|1994.0|       9|
|      1|Star Wars: Episod...|   4.0|Action|Adventure|...|1977.0|      10|
|      1|Léon: The Profes

In [15]:
all_movies.show()

+--------+--------------------+------+
|Movie_Id|          Movie_Name|  Year|
+--------+--------------------+------+
|      60|                Jaws|1975.0|
|     480|   Conspiracy Theory|1997.0|
|     581|  Autumn in New York|2000.0|
|     741|    Inspector Gadget|1999.0|
|    1209|     Lethal Weapon 3|1992.0|
|    1463|            Ref, The|1994.0|
|    1661|Home for the Holi...|1995.0|
|    2202|            Fog, The|1980.0|
|    2417|      Bodyguard, The|1992.0|
|    2434|Postcards From th...|1990.0|
|    2576|Love and Death on...|1997.0|
|    2583|Things to Do in D...|1995.0|
|    2625|     American Hustle|2013.0|
|    2774|Skin I Live In, T...|2011.0|
|    3071|Andromeda Strain,...|1971.0|
|    3109|    My Friend Flicka|1943.0|
|    3185|      My Man Godfrey|1957.0|
|    3913|              Capote|2005.0|
|    4157|               Buddy|1997.0|
|    4270|             Country|1984.0|
+--------+--------------------+------+
only showing top 20 rows

