# 04: Ablation Study

This notebook runs the ablation study to measure the contribution
of each component:

1. NeuMF (baseline)
2. NeuMF + Genre
3. NeuMF + Synopsis
4. NeuMF + Genre + Synopsis
5. NeuMF + Genre + Synopsis + Gated Fusion (Final)

In [None]:
# Imports
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import json

from src.config import config
from src.models.neumf_plus import create_neumf_plus_variant
from src.train import train_model
from src.evaluate import evaluate_model, evaluate_cold_start
from src.negative_sampling import build_user_history

# Set device
config._set_device()
print(f"Using device: {config.train.DEVICE}")

## Load Data

In [None]:
# Load data
train_df = pd.read_pickle(config.paths.train_path)
val_df = pd.read_pickle(config.paths.val_path)
test_df = pd.read_pickle(config.paths.test_path)
cold_start_df = pd.read_pickle(config.paths.cold_start_test_path)

# Load mappings
import pickle
with open(config.paths.mappings_path, 'rb') as f:
    mappings = pickle.load(f)

num_users = mappings['num_users']
num_items = mappings['num_items']
num_genres = mappings['num_genres']

# Prepare data
train_users = train_df['userId'].values
train_items = train_df['movieId'].values
val_users = val_df['userId'].values
val_items = val_df['movieId'].values
test_users = test_df['userId'].values
test_items = test_df['movieId'].values

# Build user history
user_history = build_user_history(train_users, train_items)

# Prepare content features
genre_features = np.stack(train_df['genre_features'].values)
val_genre_features = np.stack(val_df['genre_features'].values)

# For simplicity, we'll use random embeddings for synopsis
# (In production, you would pre-compute these with Sentence-BERT)
synopsis_embeddings = np.random.randn(num_items, config.model.SYNOPSIS_EMBEDDING_DIM).astype(np.float32)

print(f"Dataset: {num_users:,} users, {num_items:,} items")
print(f"Train: {len(train_df):,} ratings")
print(f"Test: {len(test_df):,} ratings")
print(f"Cold-start test: {len(cold_start_df):,} ratings")

## Run Ablation Study

In [None]:
# Define variants
variants = [
    'neumf',
    'neumf_genre',
    'neumf_synopsis',
    'neumf_genre_synopsis',
    'neumf_plus',
]

ablation_results = {}

for variant in variants:
    print("\n" + "="*60)
    print(f"TRAINING: {variant.upper()}")
    print("="*60)
    
    # Create model
    model = create_neumf_plus_variant(
        variant=variant,
        num_users=num_users,
        num_items=num_items,
        num_genres=num_genres,
    )
    
    print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    # Prepare kwargs for training
    train_kwargs = {}
    val_kwargs = {}
    
    if 'genre' in variant:
        train_kwargs['genre_features'] = genre_features
        val_kwargs['genre_features'] = val_genre_features
    
    if 'synopsis' in variant:
        train_kwargs['synopsis_embeddings'] = synopsis_embeddings
        val_kwargs['synopsis_embeddings'] = synopsis_embeddings
    
    # Train
    history = train_model(
        model=model,
        train_users=train_users,
        train_items=train_items,
        val_data={
            'users': val_users,
            'items': val_items,
            **val_kwargs,
        },
        num_items=num_items,
        num_epochs=config.train.NUM_EPOCHS,
        batch_size=config.train.BATCH_SIZE,
        learning_rate=config.train.LEARNING_RATE,
        weight_decay=config.train.WEIGHT_DECAY,
        num_negatives=config.train.NUM_NEGATIVES,
        device=config.train.DEVICE,
        save_dir=config.paths.TRAINED_MODELS_DIR,
        early_stopping_patience=config.train.EARLY_STOPPING_PATIENCE,
        log_dir=config.paths.TENSORBOARD_LOG_DIR + f"/{variant}",
    )
    
    # Store results
    ablation_results[variant] = {
        'history': history,
        'best_val_metric': max([m.get('hr@10', 0) for m in history['val_metrics']]) if history['val_metrics'] else 0,
    }

## Evaluate All Variants

In [None]:
from src.models.neumf_plus import NeuMFPlus

# Load and evaluate all models
final_results = {}

for variant in variants:
    print(f"\nEvaluating {variant}...")
    
    # Load best model
    model_path = f"{config.paths.TRAINED_MODELS_DIR}/{NeuMFPlus.__name__}_best.pt"
    model = NeuMFPlus.load(model_path, NeuMFPlus, num_users=num_users, num_items=num_items, num_genres=num_genres)
    
    # Evaluate on test set
    test_metrics = evaluate_model(
        model=model,
        users=test_users,
        items=test_items,
        k_values=config.eval.K_VALUES,
        device=config.train.DEVICE,
        num_items=num_items,
        user_history=user_history,
        genre_features=genre_features if 'genre' in variant else None,
        synopsis_embeddings=synopsis_embeddings if 'synopsis' in variant else None,
    )
    
    final_results[variant] = test_metrics
    
    print(f"  HR@10: {test_metrics['hr@10']:.4f}")
    print(f"  NDCG@10: {test_metrics['ndcg@10']:.4f}")
    print(f"  AUC: {test_metrics['auc']:.4f}")

## Cold-Start Evaluation

In [None]:
# Evaluate on cold-start set
cs_users = cold_start_df['userId'].values
cs_items = cold_start_df['movieId'].values
cs_is_cold_item = cold_start_df['is_cold_item'].values

cold_start_results = {}

for variant in variants:
    print(f"\nEvaluating {variant} on cold-start...")
    
    # Load model
    model_path = f"{config.paths.TRAINED_MODELS_DIR}/{NeuMFPlus.__name__}_best.pt"
    model = NeuMFPlus.load(model_path, NeuMFPlus, num_users=num_users, num_items=num_items, num_genres=num_genres)
    
    # Evaluate cold-start vs warm
    results = evaluate_cold_start(
        model=model,
        test_users=cs_users,
        test_items=cs_items,
        cold_start_mask=cs_is_cold_item,
        k_values=[10],
        device=config.train.DEVICE,
        num_items=num_items,
        user_history=user_history,
        genre_features=genre_features if 'genre' in variant else None,
        synopsis_embeddings=synopsis_embeddings if 'synopsis' in variant else None,
    )
    
    cold_start_results[variant] = results
    
    print(f"  Warm HR@10: {results['warm']['hr@10']:.4f}")
    if 'cold_start' in results:
        print(f"  Cold-start HR@10: {results['cold_start']['hr@10']:.4f}")

## Save Results

In [None]:
# Save ablation results
results_to_save = {
    'final_results': final_results,
    'cold_start_results': {k: {ik: float(iv) if isinstance(iv, (int, float)) else iv for ik, iv in v.items()} for k, v in cold_start_results.items()},
}

with open(f"{config.paths.EXPERIMENTS_DIR}/ablation_results.json", 'w') as f:
    json.dump(results_to_save, f, indent=2)

print("\nResults saved!")

## Summary

In [None]:
# Display comparison
results_df = pd.DataFrame(final_results).T
print("\n" + "="*60)
print("ABLATION STUDY RESULTS")
print("="*60)
print(results_df)

print("\n" + "="*60)
print("ABLATION STUDY COMPLETE!")
print("="*60)