In [1]:
# === Imports ===
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate, train_test_split
import pandas as pd

In [2]:
# === Load Data Again (from merged ratings + titles) ===
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
ratings = pd.read_csv('../data/ml-100k/u.data', sep='\t', names=column_names)

In [3]:
# === Define Reader for Surprise ===
reader = Reader(rating_scale=(1, 5))

In [4]:
# === Load Dataset into Surprise Format ===
data = Dataset.load_from_df(ratings[['user_id', 'item_id', 'rating']], reader)

In [5]:
# === Train-Test Split ===
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [6]:
# === Initialize SVD Model ===
model = SVD()

In [7]:
# === Train Model ===
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x170dd9da7a0>

In [8]:
print("✅ Model training complete!")

✅ Model training complete!


In [9]:
from surprise.model_selection import cross_validate

# Re-initialize the model
svd_model = SVD()

# Evaluate with cross-validation (5-fold)
cross_validate(svd_model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9405  0.9404  0.9341  0.9370  0.9354  0.9375  0.0026  
MAE (testset)     0.7399  0.7416  0.7335  0.7361  0.7416  0.7386  0.0032  
Fit time          2.68    2.70    2.67    2.71    2.69    2.69    0.01    
Test time         0.30    0.40    0.29    0.38    0.27    0.33    0.05    


{'test_rmse': array([0.94054339, 0.94038228, 0.93405158, 0.93704768, 0.93544163]),
 'test_mae': array([0.7398703 , 0.74162067, 0.7335333 , 0.73612041, 0.74162602]),
 'fit_time': (2.6800949573516846,
  2.70141339302063,
  2.6742055416107178,
  2.7099320888519287,
  2.685656785964966),
 'test_time': (0.29901671409606934,
  0.39967846870422363,
  0.291290283203125,
  0.3790395259857178,
  0.27028536796569824)}