# Project 2 Notebook # 


### Part 1: Training Model with SGD  ### 

In [1]:
import autograd.numpy as ag_np
import numpy as np
import pandas as pd
import os

from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

# %pip install autograd 

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

from CollabFilterOneVectorPerItem import CollabFilterOneVectorPerItem
from train_valid_test_loader import load_train_valid_test_datasets

DATA_DIR = './data_movie_lens_100k'
 

In [None]:
train_tuple, valid_tuple, test_tuple, n_users, n_items = load_train_valid_test_datasets()

# Hyperparameters
n_epochs = 25
batch_size = 10000
step_size = 0.1
k_vals = [2, 10, 50]
alpha = 0.0  # No regularization

results = []

for k in k_vals:
    print(f"\nTraining model with k = {k}, alpha = {alpha}")
    model = CollabFilterOneVectorPerItem(
        n_epochs=n_epochs,
        batch_size=batch_size,
        step_size=step_size,
        n_factors=k,
        alpha=alpha,
    )
    model.init_parameter_dict(n_users, n_items, train_tuple)

    best_mse_valid = float('inf')
    best_epoch = 0
    best_params = None

    for epoch in range(n_epochs):
        # Train the model for one epoch
        model.fit(train_tuple, valid_tuple)

        # Validation metrics
        user_id_valid, item_id_valid, y_valid = valid_tuple
        y_pred_valid = model.predict(user_id_valid, item_id_valid)
        mse_valid = ag_np.mean((y_pred_valid - y_valid) ** 2)

        if mse_valid < best_mse_valid:
            best_mse_valid = mse_valid
            best_epoch = epoch
            best_params = model.param_dict.copy()

        print(f"Epoch {epoch + 1}/{n_epochs} - Validation MSE: {mse_valid:.4f}")

    # Load the best parameters
    model.param_dict = best_params

    # Compute final MAE metrics
    user_id_test, item_id_test, y_test = test_tuple
    y_pred_test = model.predict(user_id_test, item_id_test)
    mae_test = ag_np.mean(ag_np.abs(y_pred_test - y_test))

    mae_valid = ag_np.mean(ag_np.abs(model.predict(user_id_valid, item_id_valid) - y_valid))

    # Save results
    results.append({
        "k": k,
        "alpha": alpha,
        "mae_valid": mae_valid,
        "mae_test": mae_test,
        "best_epoch": best_epoch,
        "params": best_params
    })

    print(f"Results for k={k}, alpha={alpha}:")
    print(f"  Best epoch: {best_epoch}")
    print(f"  Validation MAE: {mae_valid:.4f}")
    print(f"  Test MAE: {mae_test:.4f}")

# Final results summary
for res in results:
    print(f"k={res['k']}, Validation MAE={res['mae_valid']:.4f}, Test MAE={res['mae_test']:.4f}")


In [None]:
## training with different alpha vals ##


train_tuple, valid_tuple, test_tuple, n_users, n_items = load_train_valid_test_datasets()

# Hyperparameters
n_epochs = 25
batch_size = 10000
step_size = 0.1
alpha_vals = [0.01, 0.1, 1, 10, 100]
k = 50

results = []

for alpha in alpha_vals:
    print(f"\nTraining model with k = {k}, alpha = {alpha}")
    model = CollabFilterOneVectorPerItem(
        n_epochs=n_epochs,
        batch_size=batch_size,
        step_size=step_size,
        n_factors=k,
        alpha=alpha,
    )
    model.init_parameter_dict(n_users, n_items, train_tuple)

    best_mse_valid = float('inf')
    best_epoch = 0
    best_params = None

    for epoch in range(n_epochs):
        # Train the model for one epoch
        model.fit(train_tuple, valid_tuple)

        # Validation metrics
        user_id_valid, item_id_valid, y_valid = valid_tuple
        y_pred_valid = model.predict(user_id_valid, item_id_valid)
        mse_valid = ag_np.mean((y_pred_valid - y_valid) ** 2)

        if mse_valid < best_mse_valid:
            best_mse_valid = mse_valid
            best_epoch = epoch
            best_params = model.param_dict.copy()

        print(f"Epoch {epoch + 1}/{n_epochs} - Validation MSE: {mse_valid:.4f}")

    # Load the best parameters
    model.param_dict = best_params

    # Compute final MAE metrics
    user_id_test, item_id_test, y_test = test_tuple
    y_pred_test = model.predict(user_id_test, item_id_test)
    mae_test = ag_np.mean(ag_np.abs(y_pred_test - y_test))

    mae_valid = ag_np.mean(ag_np.abs(model.predict(user_id_valid, item_id_valid) - y_valid))

    # Save results
    results.append({
        "k": k,
        "alpha": alpha,
        "mae_valid": mae_valid,
        "mae_test": mae_test,
        "best_epoch": best_epoch,
        "params": best_params
    })

    print(f"Results for k={k}, alpha={alpha}:")
    print(f"  Best epoch: {best_epoch}")
    print(f"  Validation MAE: {mae_valid:.4f}")
    print(f"  Test MAE: {mae_test:.4f}")

# Final results summary
for res in results:
    print(f"k={res['k']}, Validation MAE={res['mae_valid']:.4f}, Test MAE={res['mae_test']:.4f}")
