In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from pathlib import Path

from recommender.collaborative import CollaborativeRecommender
from recommender.hybrid import HybridRecommender
from recommender.data_loader import load_ratings
from recommender.evaluation import rmse, mae, precision_at_k


In [None]:
ratings = load_ratings()

train, test = train_test_split(
    ratings,
    test_size=0.2,
    random_state=42
)

train.head(), test.head()


In [None]:
# Quick hack: temporarily overwrite ratings.csv with train
# or better: modify CollaborativeRecommender to accept a ratings df.
# Here weâ€™ll manually inject.

collab = CollaborativeRecommender(cf_mode="user", use_nmf=True)
collab.ratings = train
collab.items = collab.items or None  # items will be loaded in fit()
collab.user_item_matrix = None
collab.similarity_matrix = None
collab.user_factors = None
collab.item_factors = None

# We slightly patch: call internal logic but using train
from recommender.data_loader import create_user_item_matrix
collab.items = collab.items or pd.read_csv(Path("..") / "data" / "items.csv")
collab.user_item_matrix = create_user_item_matrix(train)

matrix_filled = collab.user_item_matrix.fillna(0).values
from sklearn.metrics.pairwise import cosine_similarity
collab.similarity_matrix = cosine_similarity(matrix_filled)  # user-based

# NMF part
from sklearn.decomposition import NMF
nmf_input = collab.user_item_matrix.fillna(0).values
collab.nmf_model = NMF(
    n_components=20,
    init="random",
    random_state=42,
    max_iter=200
)
collab.user_factors = collab.nmf_model.fit_transform(nmf_input)
collab.item_factors = collab.nmf_model.components_


In [None]:
def predict_cf(row):
    return collab.predict_rating_cf(row["user_id"], row["item_id"])

def predict_nmf(row):
    return collab.predict_rating_nmf(row["user_id"], row["item_id"])

def predict_hybrid(row, alpha=0.6):
    cf_pred = collab.predict_rating_cf(row["user_id"], row["item_id"])
    nmf_pred = collab.predict_rating_nmf(row["user_id"], row["item_id"])
    if cf_pred is None and nmf_pred is None:
        return None
    if cf_pred is None:
        return nmf_pred
    if nmf_pred is None:
        return cf_pred
    return alpha * cf_pred + (1 - alpha) * nmf_pred


In [None]:
test_sample = test.copy().reset_index(drop=True)

test_sample["pred_cf"] = test_sample.apply(predict_cf, axis=1)
test_sample["pred_nmf"] = test_sample.apply(predict_nmf, axis=1)
test_sample["pred_hybrid"] = test_sample.apply(predict_hybrid, axis=1)

# Drop rows where all predictions are None
mask_valid = test_sample[["pred_cf", "pred_nmf", "pred_hybrid"]].notna().any(axis=1)
test_sample = test_sample[mask_valid]

test_sample.head()


In [None]:
y_true = test_sample["rating"].values

metrics = {}

for name, col in [
    ("CF", "pred_cf"),
    ("NMF", "pred_nmf"),
    ("Hybrid", "pred_hybrid")
]:
    preds = test_sample[col].values
    metrics[name] = {
        "RMSE": rmse(y_true, preds),
        "MAE": mae(y_true, preds)
    }

metrics


In [None]:
def precision_at_k_for_model(df, model_col, k=10, threshold=4.0):
    precisions = []

    for user_id, group in df.groupby("user_id"):
        # True relevant items (ground truth)
        relevant = group[group["rating"] >= threshold]["item_id"].tolist()

        # Recommended items based on predicted scores
        sorted_group = group.sort_values(by=model_col, ascending=False)
        recommended = sorted_group["item_id"].tolist()

        if not relevant:
            continue  # skip users with no relevant items in test

        p_at_k = precision_at_k(recommended, relevant, k=k)
        precisions.append(p_at_k)

    if not precisions:
        return 0.0
    return float(np.mean(precisions))

prec_cf = precision_at_k_for_model(test_sample, "pred_cf", k=5)
prec_nmf = precision_at_k_for_model(test_sample, "pred_nmf", k=5)
prec_hybrid = precision_at_k_for_model(test_sample, "pred_hybrid", k=5)

precision_results = {
    "CF": prec_cf,
    "NMF": prec_nmf,
    "Hybrid": prec_hybrid
}
precision_results


In [None]:
summary = []

for model_name in ["CF", "NMF", "Hybrid"]:
    summary.append({
        "Model": model_name,
        "RMSE": metrics[model_name]["RMSE"],
        "MAE": metrics[model_name]["MAE"],
        "Precision@5": precision_results[model_name]
    })

summary_df = pd.DataFrame(summary)
summary_df
