In [1]:
import pandas as pd
import numpy as np

In [None]:
rating_data = pd.read_csv(r"..\data\ratings.csv")

In [10]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# 1. Prepare the data
reader = Reader(rating_scale=(rating_data['rating'].min(), rating_data['rating'].max()))
data = Dataset.load_from_df(rating_data[['userId', 'movieId', 'rating']], reader)

# 2. Split into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# 3. Train the SVD model
svd = SVD()
svd.fit(trainset)

# 4. Evaluate performance on test set
predictions = svd.test(testset)
print('RMSE:', accuracy.rmse(predictions))
print('MAE:', accuracy.mae(predictions))

# 5. Predict rating for a user-movie pair
def predict_rating(user_id, movie_id):
    pred = svd.predict(user_id, movie_id)
    return pred.est

for user_id in range(1, 11):
    pred_rating = predict_rating(user_id, 10)
    
    # Get original rating if exists
    original_rating = rating_data[
        (rating_data['userId'] == user_id) & (rating_data['movieId'] == 10)
    ]['rating']
    
    original_rating = original_rating.values[0] if not original_rating.empty else None
    
    print(f"User {user_id}: Original rating = {original_rating}, Predicted rating = {pred_rating:.2f}")

    



RMSE: 0.8793
RMSE: 0.879338835505816
MAE:  0.6746
MAE: 0.6745733942544827
User 1: Original rating = None, Predicted rating = 4.12
User 2: Original rating = None, Predicted rating = 3.93
User 3: Original rating = None, Predicted rating = 2.74
User 4: Original rating = None, Predicted rating = 3.58
User 5: Original rating = None, Predicted rating = 3.26
User 6: Original rating = 3.0, Predicted rating = 3.78
User 7: Original rating = None, Predicted rating = 3.35
User 8: Original rating = 2.0, Predicted rating = 3.07
User 9: Original rating = None, Predicted rating = 3.08
User 10: Original rating = None, Predicted rating = 3.30


In [11]:
import pickle


with open(r'..\data\svd_model.pkl', 'wb') as f:
    pickle.dump(svd, f)