# Model Analysis

Evaluate the content-based recommendation model: leave-one-out hit rate, rating correlation, and recommendation quality.

In [None]:
import pandas as pd
import numpy as np
from src.data.merge import load_rated_movies
from src.features.vectorize import MovieVectorizer
from src.features.profile import build_taste_profile, profile_summary
from src.model.evaluate import leave_one_out_eval, rating_correlation
from src.model.recommend import generate_recommendations
from src.data.tmdb import fetch_popular_movies

df = load_rated_movies()
print(f"Rated movies: {len(df)}")

vectorizer = MovieVectorizer()
vectors = vectorizer.fit_transform(df)
print(f"Feature vector shape: {vectors.shape}")

In [None]:
# Leave-one-out evaluation
loo = leave_one_out_eval(df, vectorizer, min_rating=4.0, top_k=50)
print(f"Leave-one-out (4.0+ movies, top-{loo['top_k']}):")
print(f"  Hit rate: {loo['hit_rate']:.2%}")
print(f"  MRR: {loo['mean_reciprocal_rank']:.4f}")
print(f"  Evaluated: {loo['n_evaluated']} movies")

In [None]:
# Rating correlation
corr = rating_correlation(df, vectorizer)
print(f"Rating-similarity correlation:")
print(f"  Spearman r: {corr['spearman_r']:.4f}")
print(f"  p-value: {corr['p_value']:.6f}")

In [None]:
# Profile summary
summary = profile_summary(df)
print(f"\nTaste Profile:")
print(f"  Total rated: {summary['total_rated']}")
print(f"  Avg rating: {summary['avg_rating']}")
print(f"  Top genres: {', '.join(summary['top_genres'])}")
print(f"  Top directors: {', '.join(summary['top_directors'])}")

In [None]:
# Spot-check recommendations
candidates = fetch_popular_movies(pages=10)  # smaller pool for quick check
candidates_df = pd.DataFrame(candidates)
recs = generate_recommendations(df, candidates_df, vectorizer, top_n=10)

print("\nTop 10 Recommendations:")
for i, (_, row) in enumerate(recs.iterrows()):
    print(f"  {i+1}. {row['title']} ({row.get('year', '?')}) â€” {row['similarity_score']:.4f}")
    for reason in row.get('explanation', []):
        print(f"     {reason}")