# Recommendation System with ALS

In [3]:
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating

In [4]:
from pyspark.sql import SparkSession
from pyspark.mllib.recommendation import ALS, Rating

# Create a Spark session
spark = SparkSession.builder.appName("MovieLensRecommendation").getOrCreate()


In [5]:
# Load and parse the MovieLens 25M Dataset
data = spark.read.option("header", "true").csv("ratings.csv")
ratings = data.rdd.map(lambda l: l.value.split(',')) \
    .map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))


In [6]:
# Extract relevant columns and convert to RDD
ratings = data.select("userId", "movieId", "rating").rdd.map(lambda row: Rating(int(row.userId), int(row.movieId), float(row.rating)))

# Build the recommendation model using ALS
rank = 10
numIterations = 10
model = ALS.train(ratings, rank, numIterations)

In [7]:
# Evaluate the model on training data
testdata = ratings.map(lambda p: (p[0], p[1]))
predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
print("Mean Squared Error = " + str(MSE))


Mean Squared Error = 0.47958724590974827


In [8]:
# Save and load model
model.save(spark.sparkContext, "target/tmp/myCollaborativeFilter")
sameModel = MatrixFactorizationModel.load(spark.sparkContext, "target/tmp/myCollaborativeFilter")

## Build the recommendation model using ALS with implicit feedback


In [9]:
model = ALS.trainImplicit(ratings, rank, numIterations, alpha=0.01)

In [14]:
user_id = 123

# Generate all recommendations for the user
all_recommendations = model.recommendProducts(user_id, 10)  # Adjust the number as needed

# Sort the recommendations by rating in descending order
sorted_recommendations = sorted(all_recommendations, key=lambda x: x.rating, reverse=True)

# Display the top N recommendations
top_n = 10
for recommendation in sorted_recommendations[:top_n]:
    print(f"Item ID: {recommendation.product}, Score: {recommendation.rating}")

Item ID: 780, Score: 0.5159844888222065
Item ID: 589, Score: 0.48177784795337475
Item ID: 733, Score: 0.46783176842841345
Item ID: 648, Score: 0.4618629639947861
Item ID: 457, Score: 0.4482309971176398
Item ID: 380, Score: 0.44359031858646353
Item ID: 480, Score: 0.4413461975980634
Item ID: 736, Score: 0.43905081933755336
Item ID: 377, Score: 0.43535410787109874
Item ID: 32, Score: 0.41443134907794255
