In [1]:
!pip install pyspark

Collecting pyspark
  Downloading pyspark-3.5.1.tar.gz (317.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.0/317.0 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.5.1-py2.py3-none-any.whl size=317488491 sha256=3978e49f4bafee688131a122994c28b96ece1988569057c0ec9890be3be683d1
  Stored in directory: /root/.cache/pip/wheels/80/1d/60/2c256ed38dddce2fdd93be545214a63e02fbd8d74fb0b7f3a6
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.5.1


In [2]:
from pyspark import SparkContext
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating

# Initialize SparkContext
sc = SparkContext(appName="PythonCollaborativeFilteringExample")

# Load and parse the data
data = sc.textFile("/content/u.data")

# Convert data to (UserID, MovieID, rating) format
ratings = data.map(lambda l: l.strip().split('\t'))\
              .map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))

# Display the first few ratings to verify the data
ratings.take(5)

[Rating(user=196, product=242, rating=3.0),
 Rating(user=186, product=302, rating=3.0),
 Rating(user=22, product=377, rating=1.0),
 Rating(user=244, product=51, rating=2.0),
 Rating(user=166, product=346, rating=1.0)]

In [3]:
rank = 10
numIterations = 10
model = ALS.train(ratings, rank, numIterations)

# Evaluate the model on training data
testdata = ratings.map(lambda p: (p[0], p[1]))
predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))

# Join input rating ((user, product), rate1) with predicted rating
# ((user, product), rate2) to create ((user, product), (rate1, rate2))
ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
print("Mean Squared Error = " + str(MSE))

# Save and load model
model.save(sc, "/content/myCollaborativeFilter")
sameModel = MatrixFactorizationModel.load(sc, "/content/myCollaborativeFilter")

Mean Squared Error = 0.4808234959187335


In [5]:
user_id = 196
user_ratings = ratings.filter(lambda r: r[0] == user_id).collect()

print(f"Ratings given by user {user_id}:")
for r in user_ratings:
    print(f"Movie ID: {r.product}, Rating: {r.rating}")

# Generate top 10 movie recommendations for a specific user
recommendations = sameModel.recommendProducts(user_id, 10)

print(f"\nTop 10 recommendations for user {user_id}:")
for r in recommendations:
    print(f"Movie ID: {r.product}, Predicted Rating: {r.rating}")
"""
print(f"Ratings given by user {user_id}: {user_ratings}")

# Generate top 10 movie recommendations for a specific user
recommendations = sameModel.recommendProducts(user_id, 10)
print(f"Top 10 recommendations for user {user_id}: {recommendations}")
"""



Ratings given by user 196:
Movie ID: 242, Rating: 3.0
Movie ID: 393, Rating: 4.0
Movie ID: 381, Rating: 4.0
Movie ID: 251, Rating: 3.0
Movie ID: 655, Rating: 5.0
Movie ID: 67, Rating: 5.0
Movie ID: 306, Rating: 4.0
Movie ID: 238, Rating: 4.0
Movie ID: 663, Rating: 5.0
Movie ID: 111, Rating: 4.0
Movie ID: 580, Rating: 2.0
Movie ID: 25, Rating: 4.0
Movie ID: 286, Rating: 5.0
Movie ID: 94, Rating: 3.0
Movie ID: 692, Rating: 5.0
Movie ID: 8, Rating: 5.0
Movie ID: 428, Rating: 4.0
Movie ID: 1118, Rating: 4.0
Movie ID: 70, Rating: 3.0
Movie ID: 66, Rating: 3.0
Movie ID: 257, Rating: 2.0
Movie ID: 108, Rating: 4.0
Movie ID: 202, Rating: 3.0
Movie ID: 340, Rating: 3.0
Movie ID: 287, Rating: 3.0
Movie ID: 116, Rating: 3.0
Movie ID: 382, Rating: 4.0
Movie ID: 285, Rating: 5.0
Movie ID: 1241, Rating: 3.0
Movie ID: 1007, Rating: 4.0
Movie ID: 411, Rating: 4.0
Movie ID: 153, Rating: 5.0
Movie ID: 13, Rating: 2.0
Movie ID: 762, Rating: 3.0
Movie ID: 173, Rating: 2.0
Movie ID: 1022, Rating: 4.0
Movie

'\nprint(f"Ratings given by user {user_id}: {user_ratings}")\n\n# Generate top 10 movie recommendations for a specific user\nrecommendations = sameModel.recommendProducts(user_id, 10)\nprint(f"Top 10 recommendations for user {user_id}: {recommendations}")\n'