# Product Recommendation System - MovieLens (Evaluation)

We compare:
- Popularity baseline
- Item-base collaborative filtering

Metrics:
- Precision@K
- Recall@K
- MAP@K

evaluation sues a **time-based split per user**

In [6]:
import sys

from pathlib import Path

PROJECT_ROOT = Path('..').resolve()

if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))


In [17]:
import pandas as pd
import numpy as np

from src.data import load_movielens, time_based_split
from src.metrics import evaluate_topK
from src.baselines import PopularityRecommender
from src.filtering import fit_item_cf, recommend_for_user_item_cf

In [20]:
DATA_DIR = Path('..') / 'data'
ratings, movies = load_movielens(DATA_DIR)

train, test = time_based_split(
    ratings,
    test_ratio = 0.2,
    min_ratings_per_user = 5
)

print('--------------- Train -------------')
print(train)
print('--------------- Test -------------')
print(test)

--------------- Train -------------
        userId  movieId  rating   timestamp
43           1      804     4.0   964980499
73           1     1210     5.0   964980499
120          1     2018     5.0   964980523
171          1     2628     4.0   964980523
183          1     2826     4.0   964980523
...        ...      ...     ...         ...
100272     610    55067     3.5  1493848671
100629     610   103219     3.5  1493848674
100231     610    51666     2.0  1493848680
100699     610   112727     3.0  1493848682
100407     610    71732     3.5  1493848688

[80672 rows x 4 columns]
--------------- Test -------------
        userId  movieId  rating   timestamp
76           1     1219     2.0   964983393
91           1     1348     4.0   964983393
174          1     2644     4.0   964983393
176          1     2654     5.0   964983393
83           1     1258     3.0   964983414
...        ...      ...     ...         ...
100612     610   101739     3.5  1495959269
99540      610       70

## Prepare relevance mapping (from test set)

For each user:
- Relevant items = movies rated in the test period

In [21]:
user_to_relevant = (
    test.groupby('userId')['movieId']
    .apply(lambda s: [int(x) for x in s.tolist()])
    .to_dict()
)

len(user_to_relevant)


610

## Popularity baseline evaluation

In [25]:
pop_model = PopularityRecommender(min_ratings = 50).fit(train)

def eval_popularity(k: int):
    user_to_recommended = {}

    for user_id in user_to_relevant.keys():
        recs = pop_model.recommend(user_id, train_df = train, k = k)

        if recs:
            user_to_recommended[user_id] = recs

    return evaluate_topK(user_to_recommended, user_to_relevant, k)

for k in [5, 10, 20]:
    print(f'K - {k}', eval_popularity(k))
          

K - 5 {'precision@K': 0.08721311475409836, 'recall@K': 0.02403020489241711, 'map@K': 0.057685792349726775, 'num_users': 610.0}
K - 10 {'precision@K': 0.07360655737704917, 'recall@K': 0.03893487264922297, 'map@K': 0.042133675328054725, 'num_users': 610.0}
K - 20 {'precision@K': 0.06549180327868853, 'recall@K': 0.06781060143312596, 'map@K': 0.03567173976378464, 'num_users': 610.0}


## Item-based collaborative filtering evaluation

In [27]:
item_cf_model = fit_item_cf(
    train_ratings = train,
    use_implicit = True,
    shrinkage = 0.0
)

def eval_item_cf(k: int):
    user_to_recommended = {}

    for user_id in user_to_relevant.keys():
        recs = recommend_for_user_item_cf(
            model = item_cf_model,
            user_id = user_id,
            train_ratings = train,
            k = k,
            candidate_pool= 200
        )

        if recs:
            user_to_recommended[user_id] = recs

    return evaluate_topK(user_to_recommended, user_to_relevant, k)

for k in [5, 10, 20]:
    print(f'k = {k}', eval_item_cf(k))

k = 5 {'precision@K': 0.10426229508196723, 'recall@K': 0.028497259905331127, 'map@K': 0.06400000000000002, 'num_users': 610.0}
k = 10 {'precision@K': 0.0980327868852459, 'recall@K': 0.0561115699916786, 'map@K': 0.05054400161704025, 'num_users': 610.0}
k = 20 {'precision@K': 0.08754098360655739, 'recall@K': 0.09800880588852393, 'map@K': 0.04564617368851854, 'num_users': 610.0}


## Summary

- Popularity is a strong baseline but non-personalized
- Item-based CF improves personalization and recall
- This mirrors real-world recommender system evaluation:
  baseline -> collaborative filtering -> (future) hybrid models