# User-Based Collaborative Filtering

## Import Libraries

In [40]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

from src.user_cf import (
    get_top_k_recommendations
)

## Load Train & Test Data

In [41]:
test = pd.read_csv("../data/curated/test.csv")
train = pd.read_csv("../data/curated/train.csv")

print(f"Train size: {train.shape}, Test size: {test.shape}")

Train size: (80367, 3), Test size: (19633, 3)


## Build User-Item Matrix & Cosine Similarity

In [42]:
user_item_matrix = train.pivot(index="user_id", columns="item_id", values="rating").fillna(0)

similarity = cosine_similarity(user_item_matrix)
similarity_matrix = pd.DataFrame(similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

print(f"User Item Matrix Size: {user_item_matrix.shape}")
print(similarity_matrix.iloc[:5, :5])

User Item Matrix Size: (943, 1651)
user_id         1         2         3         4         5
user_id                                                  
1        1.000000  0.150846  0.047732  0.021228  0.298226
2        0.150846  1.000000  0.101018  0.082677  0.049558
3        0.047732  0.101018  1.000000  0.363461  0.026743
4        0.021228  0.082677  0.363461  1.000000  0.011819
5        0.298226  0.049558  0.026743  0.011819  1.000000


## Pick a Target User

In [43]:
target_user_id = test["user_id"].sample(1).iloc[0]
print(f"Target user: {target_user_id}")

Target user: 8


## Generate Top-K Recommendations

In [44]:
top_k_recs = get_top_k_recommendations(
    user_id=target_user_id,
    ratings=train,
    user_item_matrix=user_item_matrix,
    similarity_matrix=similarity_matrix,
    k=5
)

print(f"Top 5 recommended items for user {target_user_id}:")
top_k_recs

Top 5 recommended items for user 8:


[(1189, 5.000000000000001),
 (1628, 5.000000000000001),
 (1642, 5.000000000000001),
 (814, 5.0),
 (850, 5.0)]

## Evaluate Precision@K

In [45]:
from src.evaluation import evaluate_precision_at_k

precision = evaluate_precision_at_k(
    test_df=test,
    train_df=train,
    user_item_matrix=user_item_matrix,
    similarity_matrix=similarity_matrix,
    k=5
)

print(f"Average Precision@5: {precision:.4f}")

Average Precision@5: 0.0011
