# Notebook 01: Collaborative Filtering using Surprise SVD
This notebook trains a collaborative filtering model using the Surprise library to generate rating predictions for the MovieLens 100k dataset.

In [None]:
# Install Surprise if needed
# !pip install scikit-surprise
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse
import pandas as pd

## Load and Prepare Data

In [None]:
# Load u_data.csv using pandas
ratings_df = pd.read_csv("u_data.csv", sep=",", header=0)
ratings_df["userID"] = ratings_df["user_id"].astype(int)
ratings_df["itemID"] = ratings_df["item_id"].astype(int)
ratings_df["rating"] = ratings_df["rating"].astype(int)
ratings_df = ratings_df[["userID", "itemID", "rating"]]

## Prepare Data for Surprise

In [None]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_df[["userID", "itemID", "rating"]], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

## Train SVD Model

In [None]:
svd_model = SVD(n_factors=50, lr_all=0.005, reg_all=0.02, n_epochs=20)
svd_model.fit(trainset)

## Evaluate RMSE on Test Set

In [None]:
predictions = svd_model.test(testset)
rmse(predictions)

## Save Predictions for Hybrid Model

In [None]:
# Convert predictions into a DataFrame
pred_df = pd.DataFrame([{
    "userID": int(pred.uid),
    "itemID": int(pred.iid),
    "cf_pred": round(pred.est, 4)
} for pred in predictions])

# Save for use in hybrid model
pred_df.to_csv("models/cf_predictions.csv", index=False)
print("Saved CF predictions to models/cf_predictions.csv")