In [6]:
# imports
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

import numpy as np
from pathlib import Path
import sys

from sklearn.model_selection import train_test_split

# add folder src/ to the python file
sys.path.append(str(Path().resolve().parent / "src"))

from data_preprocessing import load_data
from collaborative_filtering import (
    predict_rating_knn_item,
    evaluate_model,
    get_top_n_recommendations
    )

In [7]:
# loading
project_root = Path().resolve().parent
data_dir = project_root / "data"

movie_genres, ratings, user_movie_matrix, movies = load_data(
    movies_path=str(data_dir / "u.item"),
    ratings_path=str(data_dir / "u.data")
)

# Split train/test
# we split ratings (not users)
train_df, test_df = train_test_split(
    ratings,
    test_size=0.2,
    random_state=42
)

print(f"Train: {len(train_df)} notes | Test: {len(test_df)} notes")

train_matrix = train_df.pivot_table(index="user_id", columns="title", values="rating")

Taux de remplissage (density) : 0.0635
Sparsité (sparsity) : 0.9365
Train: 80000 notes | Test: 20000 notes


In [8]:
# we transpose because sklearn want to have the items in line
item_item_similarity = pd.DataFrame(
    cosine_similarity(train_matrix.T.fillna(0)),  # replace NaN by 0
    index=train_matrix.columns,
    columns=train_matrix.columns
)

In [9]:
rmse_knn, mae_knn = evaluate_model(
    lambda uid, title, matrix: predict_rating_knn_item(uid, title, matrix, item_item_similarity, k=10),
    test_df,
    train_matrix
)

print(f"RMSE (KNN Item): {rmse_knn:.4f}")
print(f"MAE  (KNN Item): {mae_knn:.4f}")

RMSE (KNN Item): 0.9639
MAE  (KNN Item): 0.7514


In [10]:
# 🎯 Display personnalized recommendations for a user
user_id = 42  # Examples but we can change to simulate an other user

top_n_df = get_top_n_recommendations(
    user_id=user_id,
    ratings_matrix=train_matrix,
    similarity_matrix=item_item_similarity,
    k=10,
    N=10
)

if not top_n_df.empty:
    display(top_n_df)
else:
    print("There is no recommendation valid for that user.")

Unnamed: 0,Film recommandé,Note prédite (estimation d'appréciation)
0,Cinderella (1950),4.91
1,"Sword in the Stone, The (1963)",4.81
2,Oliver & Company (1988),4.81
3,Mr. Smith Goes to Washington (1939),4.8
4,12 Angry Men (1957),4.8
5,Speed (1994),4.79
6,"Great Escape, The (1963)",4.78
7,"Little Princess, A (1995)",4.72
8,Alice in Wonderland (1951),4.72
9,Tomorrow Never Dies (1997),4.71
