# Evaluation — Popularity vs Content vs ALS vs Hybrid

In [None]:

import pandas as pd
import numpy as np

from src.data_load import load_movielens
from src.preprocess import filter_min_interactions, last_item_test_split
from src.recommenders.popularity import fit_popularity, recommend as recommend_pop
from src.recommenders.content_based import fit_content_model, recommend_content
from src.recommenders.collaborative import fit_als, recommend_als
from src.recommenders.hybrid import hybrid_recommend
from src.eval import precision_at_k, recall_at_k, hit_rate_at_k

# 1) Load + prep
ratings, movies = load_movielens('data/raw')
ratings = filter_min_interactions(ratings, min_user=3, min_item=10)

# 2) Chronological last-item split per user
train, test = last_item_test_split(ratings)
test_truth = test.groupby('userId')['movieId'].apply(list)

# 3) Fit models on train only
pop_table = fit_popularity(train, m=50)
content_model = fit_content_model(movies)
try:
    als_bundle = fit_als(train, factors=32, reg=0.05, iterations=10)
except Exception as e:
    print("ALS kurulamadı:", e)
    als_bundle = None

# 4) Evaluate
K = 10
users = sorted(train['userId'].unique().tolist())
rows = []
for u in users:
    seen = set(train.loc[train['userId']==u, 'movieId'].tolist())
    gt = test_truth.get(u, [])
    if len(gt)==0:
        continue

    pop_ids = recommend_pop(pop_table, k=K, exclude_ids=seen)
    c_ids = recommend_content(u, train, movies, content_model, k=K, min_like=4.0) or []
    als_ids = recommend_als(u, als_bundle, k=K, exclude_ids=seen) if als_bundle else []
    hyb_ids = hybrid_recommend(u, train, movies, pop_table, content_model, recommend_content,
                               als_bundle, recommend_als, k=K, min_like=4.0, w_cf=0.6, w_content=0.4)

    for name, recs in [('Popularity', pop_ids), ('Content', c_ids), ('ALS', als_ids), ('Hybrid', hyb_ids)]:
        rows.append({
            'algo': name,
            'userId': u,
            'precision@10': precision_at_k(recs, gt, k=K),
            'recall@10': recall_at_k(recs, gt, k=K),
            'hit@10': hit_rate_at_k(recs, gt, k=K),
        })

res = pd.DataFrame(rows)
summary = res.groupby('algo')[['precision@10','recall@10','hit@10']].mean().sort_values('hit@10', ascending=False)
summary


In [None]:

import os
os.makedirs("reports", exist_ok=True)
summary.to_csv("reports/eval_results.csv")
"reports/eval_results.csv"
