SASRec and SVD Hybrid Recommender

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import ndcg_score, precision_score, recall_score
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler

ratings = pd.read_csv('ratings.csv')
movies_df = pd.read_csv('movies.csv')
ratings['rating'] /= 5

# Build user-item matrix
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Prepare SVD model
svd = TruncatedSVD(n_components=50, random_state=42)
user_factors = svd.fit_transform(user_item_matrix)
item_factors = svd.components_.T
reconstructed = np.dot(user_factors, item_factors.T)

# Recommend top-N items for user 1
user_index = 0  # userId = 1
user_ratings = user_item_matrix.iloc[user_index].values
reconstructed_scores = reconstructed[user_index]

# Filter out already seen movies
seen = user_ratings > 0
reconstructed_scores[seen] = -np.inf

# Get top-N recommendations
top_n = 10
top_indices = np.argsort(reconstructed_scores)[-top_n:][::-1]
top_movie_ids = user_item_matrix.columns[top_indices]
movie_titles = dict(zip(movies_df['movieId'], movies_df['title']))

print("SVD Recommendations for user 1:")
for i, mid in enumerate(top_movie_ids, 1):
    print(f"{i}. {movie_titles.get(mid, f'Unknown ({mid})')}")

# Evaluate SVD model

def evaluate_svd(user_item_matrix, reconstructed, top_k=10, sample_users=100):
    hit, ndcg, precision, recall = [], [], [], []
    users_with_enough = user_item_matrix[user_item_matrix.gt(0).sum(axis=1) > 5]
    sampled_users = users_with_enough.sample(n=min(sample_users, len(users_with_enough)), random_state=42)

    for user_id, row in sampled_users.iterrows():
        true_items = row[row > 0].index.tolist()
        if len(true_items) < 2:
            continue
        held_out = true_items[-1]
        user_seen = row.copy()
        user_seen[held_out] = 0

        scores = reconstructed[user_id - 1].copy()
        scores[user_seen > 0] = -np.inf

        top_preds = np.argsort(scores)[::-1][:top_k]
        held_out_idx = user_item_matrix.columns.get_loc(held_out)

        hit.append(int(held_out_idx in top_preds))
        rel = [1 if i == held_out_idx else 0 for i in top_preds]
        ndcg.append(ndcg_score([rel], [list(range(top_k, 0, -1))]))

        precision.append(np.sum(rel) / top_k)
        recall.append(np.sum(rel))  # 1 relevant item

    results = pd.DataFrame({
        'Metric': ['Hit@10', 'NDCG@10', 'Precision@10', 'Recall@10'],
        'Score': [np.mean(hit), np.mean(ndcg), np.mean(precision), np.mean(recall)]
    })
    print("\nEvaluation Metrics Table:")
    print(results.to_string(index=False))

# Run evaluation
evaluate_svd(user_item_matrix, reconstructed, top_k=10, sample_users=100)


Metric    Score

Hit@10 0.150000

NDCG@10 0.120666

Precision@10 0.015000

Recall@10 0.150000