# Notebook 05: Evaluation and Visualisation
This notebook evaluates and compares the performance of the CF, CBF, and hybrid stacked models using RMSE and Recall@3.5.

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import joblib

## Load Predictions and True Ratings

In [None]:
cf_df = pd.read_csv("models/cf_predictions.csv")
cbf_df = pd.read_csv("models/cbf_predictions.csv")
meta_df = pd.read_csv("models/meta_features.csv")
y_true = meta_df["rating"].values

## Evaluate RMSE

In [None]:
cf_rmse = np.sqrt(mean_squared_error(y_true, cf_df["cf_pred"]))
cbf_rmse = np.sqrt(mean_squared_error(y_true, cbf_df["cbf_pred"]))

meta_model = joblib.load("models/meta_model.pkl")
X_meta = meta_df.drop(columns=["rating"])
hybrid_preds = meta_model.predict(X_meta)
hybrid_rmse = np.sqrt(mean_squared_error(y_true, hybrid_preds))

print(f"CF RMSE: {cf_rmse:.4f}")
print(f"CBF RMSE: {cbf_rmse:.4f}")
print(f"Hybrid RMSE: {hybrid_rmse:.4f}")

## Evaluate Recall@3.5

In [None]:
def recall_at_threshold(y_true, y_pred, threshold=3.5):
    y_true_bin = (y_true >= threshold).astype(int)
    y_pred_bin = (y_pred >= threshold).astype(int)
    tp = ((y_true_bin == 1) & (y_pred_bin == 1)).sum()
    fn = ((y_true_bin == 1) & (y_pred_bin == 0)).sum()
    return tp / (tp + fn) if (tp + fn) > 0 else 0.0

cf_recall = recall_at_threshold(y_true, cf_df["cf_pred"])
cbf_recall = recall_at_threshold(y_true, cbf_df["cbf_pred"])
hybrid_recall = recall_at_threshold(y_true, hybrid_preds)

print(f"CF Recall@3.5: {cf_recall:.4f}")
print(f"CBF Recall@3.5: {cbf_recall:.4f}")
print(f"Hybrid Recall@3.5: {hybrid_recall:.4f}")

## Visualise RMSE and Recall

In [None]:
# Plot RMSE
plt.bar(["CF", "CBF", "Hybrid"], [cf_rmse, cbf_rmse, hybrid_rmse])
plt.title("RMSE Comparison")
plt.ylabel("RMSE")
plt.show()

# Plot Recall
plt.bar(["CF", "CBF", "Hybrid"], [cf_recall, cbf_recall, hybrid_recall])
plt.title("Recall@3.5 Comparison")
plt.ylabel("Recall")
plt.show()