# Item Based Collaborative Filtering

## Import Libraries

In [33]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

## Load Data

In [35]:
train_df = pd.read_csv("../data/curated/train.csv")
test_df = pd.read_csv("../data/curated/test.csv")
print(train_df.head())
print(test_df.head())

   user_id  item_id  rating
0        1      170       5
1        1      175       5
2        1      119       5
3        1      181       5
4        1      109       5
   user_id  item_id  rating
0        1       69       3
1        1      101       2
2        1      149       2
3        1      145       2
4        1       27       2


## Build Item Similarity Matrix

In [36]:
user_item_matrix = train_df.pivot(index="user_id", columns="item_id", values="rating").fillna(0)

item_similarity = cosine_similarity(user_item_matrix.T)

item_similarity_matrix = pd.DataFrame(
    item_similarity,
    index=user_item_matrix.columns,
    columns=user_item_matrix.columns
)

print("Sample Item-Item Similarity Matrix:")
print(item_similarity_matrix.iloc[:5, :5])

Sample Item-Item Similarity Matrix:
item_id         1         2         3         4         5
item_id                                                  
1        1.000000  0.352007  0.261793  0.393437  0.273510
2        0.352007  1.000000  0.227923  0.430580  0.280015
3        0.261793  0.227923  1.000000  0.228298  0.184168
4        0.393437  0.430580  0.228298  1.000000  0.279786
5        0.273510  0.280015  0.184168  0.279786  1.000000


## Define Recommendation Function (Item Based)

In [37]:
from src.item_cf import recommend_for_user_itemcf

## Generate Sample Recommendation

In [38]:
# Pick a test user who exists in training
test_df = test_df[test_df["user_id"].isin(user_item_matrix.index)]
target_user_id = test_df["user_id"].sample(1).iloc[0]
print(f"Target user: {target_user_id}")

# Variable for number of recommendations
k_movies = 1

# Get top 5 recommendations
top_k = recommend_for_user_itemcf(
    user_id=target_user_id,
    ratings_df=train_df,
    user_item_matrix=user_item_matrix,
    item_similarity_matrix=item_similarity_matrix,
    k=k_movies
)

print(f"Top 5 item-based recommendations for user {target_user_id}:")
for item, score in top_k:
    print(f"Item {item} → Predicted Score: {score:.2f}")


Target user: 840
Top 5 item-based recommendations for user 840:
Item 788 → Predicted Score: 5.00
Item 1175 → Predicted Score: 4.88
Item 1304 → Predicted Score: 4.74
Item 1309 → Predicted Score: 4.74
Item 1310 → Predicted Score: 4.74


### Imports## Model Evaluation

### Imports

In [39]:
import numpy as np

### Precision@K

In [40]:
precs = []
users = test_df["user_id"].unique()

for user_id in users:
    if user_id not in user_item_matrix.index:
        continue

    actual_items = set(test_df[test_df["user_id"] == user_id]["item_id"])

    top_k = recommend_for_user_itemcf(
        user_id=user_id,
        ratings_df=train_df,
        user_item_matrix=user_item_matrix,
        item_similarity_matrix=item_similarity_matrix,
        k=k_movies
    )

    predicted_items = [item for item, _ in top_k]
    hits = [item for item in predicted_items if item in actual_items]
    precision = len(hits) / k_movies if k_movies else 0
    precs.append(precision)

precision = np.mean(precs) if precs else 0.0

print(f"Average Precision@{k_movies}: {precision * 100:.4f}%")


Average Precision@5: 0.1060%


### Recall

In [41]:
recs = []
users = test_df["user_id"].unique()

for user_id in users:
    if user_id not in user_item_matrix.index:
        continue

    actual_items = set(test_df[test_df["user_id"] == user_id]["item_id"])

    top_k = recommend_for_user_itemcf(
        user_id=user_id,
        ratings_df=train_df,
        user_item_matrix=user_item_matrix,
        item_similarity_matrix=item_similarity_matrix,
        k=k_movies
    )

    predicted_items = [item for item, _ in top_k]
    hits = len(set(predicted_items) & actual_items)
    recall = hits / len(actual_items) if actual_items else 0
    recs.append(recall)

recall = np.mean(recs) if recs else 0.0

print(f"Average Recall@{k_movies}: {recall * 100:.4f}%")

Average Recall@5: 0.0360%
