In [4]:
import pandas as pd
from surprise import Dataset, Reader, SVD
import pickle

# Load merged data again
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
ratings = pd.read_csv('../data/ml-100k/u.data', sep='\t', names=column_names)
movies = pd.read_csv('../data/ml-100k/u.item', sep='|', encoding='latin-1', header=None)
movies = movies[[0, 1]]
movies.columns = ['item_id', 'title']
data_merged = pd.merge(ratings, movies, on='item_id')

# Prepare data for Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data_merged[['user_id', 'item_id', 'rating']], reader)

# Load trained model
with open('../models/svd_model.pkl', 'rb') as f:
    model = pickle.load(f)


In [5]:
from collections import defaultdict

def get_top_n(predictions, n=10):
    top_n = defaultdict(list)

    # Group predictions by user and keep top N items
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Sort and keep only top n per user
    for uid, user_ratings in top_n.items():
        top_n[uid] = sorted(user_ratings, key=lambda x: x[1], reverse=True)[:n]

    return top_n


In [6]:
trainset = data.build_full_trainset()
model.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2345dbcf9a0>

In [7]:
testset = trainset.build_anti_testset()
predictions = model.test(testset)


In [8]:
top_n = get_top_n(predictions, n=10)


In [9]:
item_id_to_title = dict(zip(data_merged['item_id'], data_merged['title']))


In [12]:
user_id = 10
print(f"Top 10 movie recommendations for user {user_id}:\n")
for iid, rating in top_n[user_id]:
    print(f"{item_id_to_title[int(iid)]} — predicted rating: {rating:.2f}")


Top 10 movie recommendations for user 10:

Schindler's List (1993) — predicted rating: 5.00
Godfather: Part II, The (1974) — predicted rating: 4.87
To Kill a Mockingbird (1962) — predicted rating: 4.85
Titanic (1997) — predicted rating: 4.82
Arsenic and Old Lace (1944) — predicted rating: 4.74
Wrong Trousers, The (1993) — predicted rating: 4.70
Good Will Hunting (1997) — predicted rating: 4.68
His Girl Friday (1940) — predicted rating: 4.67
Raise the Red Lantern (1991) — predicted rating: 4.64
Mr. Smith Goes to Washington (1939) — predicted rating: 4.64
