# User-Based Collaborative Filtering

## Import Libraries

In [104]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

from src.user_cf import get_top_k_recommendations

## Load Train & Test Data

In [105]:
# Load train data
train = pd.read_csv("../data/curated/train.csv")

# Filter active users
active_users = train["user_id"].value_counts()
active_users = active_users[active_users >= 10].index
train = train[train["user_id"].isin(active_users)]

print(f"Filtered train to {len(train)} ratings from {len(active_users)} active users.")

# Load Test Data
test = pd.read_csv("../data/curated/test.csv")
print(f"Test size: {test.shape}")

Filtered train to 80367 ratings from 943 active users.
Test size: (19633, 3)


## Build User-Item Matrix & Cosine Similarity

In [106]:
user_item_matrix = train.pivot(index="user_id", columns="item_id", values="rating").fillna(0)

similarity = cosine_similarity(user_item_matrix)
similarity_matrix = pd.DataFrame(similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

print(f"User Item Matrix Size: {user_item_matrix.shape}")
print(similarity_matrix.iloc[:5, :5])

User Item Matrix Size: (943, 1651)
user_id         1         2         3         4         5
user_id                                                  
1        1.000000  0.150846  0.047732  0.021228  0.298226
2        0.150846  1.000000  0.101018  0.082677  0.049558
3        0.047732  0.101018  1.000000  0.363461  0.026743
4        0.021228  0.082677  0.363461  1.000000  0.011819
5        0.298226  0.049558  0.026743  0.011819  1.000000


## Pick a Target User

In [107]:
# Keep only test users that are also in the train matrix
test = test[test["user_id"].isin(user_item_matrix.index)]
print(f"Filtered test set to {len(test)} entries with known users.")

target_user_id = test["user_id"].sample(1).iloc[0]
print(f"Target user: {target_user_id}")

Filtered test set to 19633 entries with known users.
Target user: 589


## Generate Top-K Recommendations

In [108]:
# Force reload the updated module to get the new function signature
import importlib
import src.user_cf

importlib.reload(src.user_cf)

from src.user_cf import get_top_k_recommendations

In [109]:
k_movies = 1
top_k_recs = get_top_k_recommendations(
    user_id=target_user_id,
    ratings=train,
    user_item_matrix=user_item_matrix,
    similarity_matrix=similarity_matrix,
    k=k_movies,
    top_n_neighbors=50
)
print(f"Top {k_movies} recommended items for user {target_user_id}:")
top_k_recs

Top 1 recommended items for user 589:


[(11, 5.0)]

## Evaluate Precision@K

In [110]:
from src.evaluation import evaluate_precision_at_k

precision = evaluate_precision_at_k(
    test_df=test,
    train_df=train,
    user_item_matrix=user_item_matrix,
    similarity_matrix=similarity_matrix,
    k=k_movies
)

print(f"Average Precision@5: {precision:.4f}")

Average Precision@5: 0.0021
