# Model Inference Testing

This notebook tests the trained NeuMF+ models.

**Features:**
- Load trained model from checkpoint
- Verify model configuration (genre/synopsis)
- Predict scores for user-item pairs
- Generate top-K recommendations
- Test genre-only vs genre+synopsis

## Setup

In [7]:
import sys
sys.path.insert(0, '.')

import os
import pickle
import numpy as np
import pandas as pd
import torch

from inference import load_model, load_mappings, load_features, predict_score, recommend

print("✓ Imports loaded!")

ModuleNotFoundError: No module named 'inference'

import sys
import os

# Add project root to Python path
# Handle both local and Colab environments
if os.path.exists('inference.py'):
    # Running from project root
    sys.path.insert(0, '.')
elif os.path.exists('../inference.py'):
    # Running from notebooks directory
    sys.path.insert(0, '..')
else:
    # Try absolute path
    project_root = os.path.abspath('..')
    if os.path.exists(os.path.join(project_root, 'inference.py')):
        sys.path.insert(0, project_root)

import pickle
import numpy as np
import pandas as pd
import torch

# Check if inference module is available
try:
    from inference import load_model, load_mappings, load_features, predict_score, recommend
    print("✓ Inference module loaded!")
    print(f"  Working directory: {os.getcwd()}")
except ModuleNotFoundError as e:
    print(f"❌ Error: {e}")
    print(f"\nCurrent working directory: {os.getcwd()}")
    print(f"\nPlease check that inference.py exists in the project root.")
    print("You may need to navigate to the project directory first.")

In [8]:
# Load model
checkpoint_path = 'experiments/trained_models/NeuMFPlus_genre_synopsis_best.pt'

if not os.path.exists(checkpoint_path):
    print(f"❌ Checkpoint not found: {checkpoint_path}")
    print("\nAvailable checkpoints:")
    trained_dir = 'experiments/trained_models'
    if os.path.exists(trained_dir):
        for f in os.listdir(trained_dir):
            if f.endswith('.pt'):
                print(f"  - {f}")
else:
    model, checkpoint = load_model(checkpoint_path, device='cpu')
    config = checkpoint['model_config']
    
    print("\n" + "="*70)
    print("MODEL CONFIGURATION")
    print("="*70)
    print(f"use_genre: {config.get('use_genre')}")
    print(f"use_synopsis: {config.get('use_synopsis')}")
    print(f"use_gated_fusion: {config.get('use_gated_fusion')}")
    print(f"\nParameters: {sum(p.numel() for p in model.parameters()):,}")
    
    if 'metrics' in checkpoint:
        metrics = checkpoint['metrics']
        print("\nValidation Metrics:")
        for k, v in metrics.items():
            print(f"  {k}: {v:.4f}")

❌ Checkpoint not found: experiments/trained_models/NeuMFPlus_genre_synopsis_best.pt

Available checkpoints:


In [None]:
# Load mappings and features
data = load_mappings()
features = load_features()

print("\n" + "="*70)
print("DATA LOADED")
print("="*70)
print(f"\nUsers: {data['num_users']:,}")
print(f"Items: {data['num_items']:,}")
print(f"Genres: {len(data['genre_names'])}")
print(f"\nGenre names: {data['genre_names']}")

if 'genre_features' in features:
    print(f"\n✓ Genre features loaded: {features['genre_features'].shape}")
else:
    print("\n⚠️  Genre features not loaded")

if 'synopsis_embeddings' in features:
    print(f"✓ Synopsis embeddings loaded: {features['synopsis_embeddings'].shape}")
else:
    print("⚠️  Synopsis embeddings not loaded")

## Test 1: Single User-Item Prediction

In [None]:
# Predict score for a specific user-item pair
user_id = 100
item_id = 500

print("="*70)
print(f"PREDICTION: User {user_id} -> Item {item_id}")
print("="*70)

# Prepare inputs based on model config
genre_vector = None
synopsis_embedding = None

if config.get('use_genre') and 'genre_features' in features:
    genre_vector = features['genre_features'][item_id]
    print(f"\nGenre vector (first 10): {genre_vector[:10]}")

if config.get('use_synopsis') and 'synopsis_embeddings' in features:
    synopsis_embedding = features['synopsis_embeddings'][item_id]
    print(f"Synopsis embedding (first 5): {synopsis_embedding[:5]}")

# Predict
score = predict_score(
    model,
    user_id=user_id,
    item_id=item_id,
    genre_vector=genre_vector,
    synopsis_embedding=synopsis_embedding,
    device='cpu'
)

print(f"\n✅ Predicted score: {score:.4f}")
print(f"   (Probability user will like this item)")

## Test 2: Top-K Recommendations

In [None]:
# Get top-K recommendations for a user
user_id = 100
k = 10

print("="*70)
print(f"TOP-{k} RECOMMENDATIONS FOR USER {user_id}")
print("="*70)

recommendations = recommend(
    model,
    user_id=user_id,
    k=k,
    item_genre_features=features.get('genre_features'),
    item_synopsis_embeddings=features.get('synopsis_embeddings'),
    seen_items=None,
    device='cpu'
)

print(f"\n{'Rank':<6} {'Item ID':<10} {'Score':<10}")
print("-" * 30)
for rec in recommendations:
    print(f"{rec['rank']:<6} {rec['item_id']:<10} {rec['score']:.4f}")

## Test 3: Genre-Only vs Full Model

In [None]:
# Compare genre-only vs genre+synopsis
if config.get('use_synopsis'):
    print("="*70)
    print("COMPARISON: Genre-Only vs Genre+Synopsis")
    print("="*70)
    
    test_user = 100
    test_item = 500
    
    # Genre only (synopsis = None or zeros)
    score_genre_only = predict_score(
        model,
        user_id=test_user,
        item_id=test_item,
        genre_vector=features['genre_features'][test_item],
        synopsis_embedding=None,  # No synopsis
        device='cpu'
    )
    
    # Genre + Synopsis
    score_full = predict_score(
        model,
        user_id=test_user,
        item_id=test_item,
        genre_vector=features['genre_features'][test_item],
        synopsis_embedding=features['synopsis_embeddings'][test_item],
        device='cpu'
    )
    
    print(f"\nUser {test_user}, Item {test_item}:")
    print(f"  Genre only:      {score_genre_only:.4f}")
    print(f"  Genre+Synopsis:  {score_full:.4f}")
    print(f"  Difference:      {score_full - score_genre_only:+.4f}")
else:
    print("\n⚠️  Model not trained with synopsis - skipping comparison")

## Test 4: Multiple Users Batch Prediction

In [None]:
# Test predictions for multiple users
test_users = [0, 50, 100, 500, 1000]
test_item = 500

print("="*70)
print(f"BATCH PREDICTION: Item {test_item} for Multiple Users")
print("="*70)

print(f"\n{'User ID':<10} {'Score':<10}")
print("-" * 22)

for user_id in test_users:
    if user_id >= data['num_users']:
        print(f"{user_id:<10} (invalid user)")
        continue
    
    score = predict_score(
        model,
        user_id=user_id,
        item_id=test_item,
        genre_vector=features.get('genre_features', [None])[test_item] if 'genre_features' in features else None,
        synopsis_embedding=features.get('synopsis_embeddings', [None])[test_item] if 'synopsis_embeddings' in features else None,
        device='cpu'
    )
    print(f"{user_id:<10} {score:.4f}")

## Summary

**Tests completed:**
1. ✅ Single user-item prediction
2. ✅ Top-K recommendations
3. ✅ Genre-only vs full model comparison
4. ✅ Batch prediction for multiple users

**Next steps:**
- Test with different users/items
- Analyze recommendation quality
- Compare different model checkpoints