# Lab B.2 Solutions: Neural Collaborative Filtering

This notebook contains complete solutions to the exercises from Lab B.2.

---

In [None]:
# Setup
import sys
from pathlib import Path

module_dir = Path.cwd().parent if 'solutions' in str(Path.cwd()) else Path.cwd()
sys.path.insert(0, str(module_dir / 'scripts'))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm

from data_utils import download_movielens, leave_one_out_split
from models import NeuMF

np.random.seed(42)
torch.manual_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Load and prepare data
ratings_df, movies_df = download_movielens('100k')
train_df, test_df = leave_one_out_split(ratings_df, by_time=True)

num_users = ratings_df['user_id'].nunique()
num_items = ratings_df['item_id'].nunique()

user_positive_items = train_df.groupby('user_id')['item_id'].apply(set).to_dict()

---

## Exercise 1 Solution: Different Numbers of Negative Samples

In [None]:
class NCFDataset(Dataset):
    def __init__(self, interactions_df, num_items, user_positive_items, num_negatives=4):
        self.users = interactions_df['user_id'].values
        self.items = interactions_df['item_id'].values
        self.num_items = num_items
        self.user_positive_items = user_positive_items
        self.num_negatives = num_negatives
        
    def __len__(self):
        return len(self.users)
    
    def __getitem__(self, idx):
        user = self.users[idx]
        pos_item = self.items[idx]
        
        neg_items = []
        positives = self.user_positive_items.get(user, set())
        
        while len(neg_items) < self.num_negatives:
            neg_item = np.random.randint(0, self.num_items)
            if neg_item not in positives:
                neg_items.append(neg_item)
        
        users = [user] * (1 + self.num_negatives)
        items = [pos_item] + neg_items
        labels = [1.0] + [0.0] * self.num_negatives
        
        return (
            torch.LongTensor(users),
            torch.LongTensor(items),
            torch.FloatTensor(labels)
        )


def collate_fn(batch):
    users = torch.cat([b[0] for b in batch])
    items = torch.cat([b[1] for b in batch])
    labels = torch.cat([b[2] for b in batch])
    return users, items, labels

In [None]:
def train_and_evaluate_ncf(num_negatives, epochs=5):
    """Train NeuMF with specified number of negatives and return HR@10."""
    # Create dataset
    dataset = NCFDataset(train_df, num_items, user_positive_items, num_negatives)
    loader = DataLoader(dataset, batch_size=256, shuffle=True, collate_fn=collate_fn)
    
    # Create model
    model = NeuMF(num_users, num_items).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Training
    for epoch in range(epochs):
        model.train()
        for users, items, labels in loader:
            users, items, labels = users.to(device), items.to(device), labels.to(device)
            predictions = model(users, items)
            loss = F.binary_cross_entropy(predictions, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    # Evaluation
    model.eval()
    hits = 0
    test_users = test_df['user_id'].unique()
    
    with torch.no_grad():
        for user_id in test_users:
            test_item = test_df[test_df['user_id'] == user_id]['item_id'].values[0]
            scores = model.predict_all_items(user_id)
            
            train_items = list(user_positive_items.get(user_id, set()))
            scores[train_items] = -float('inf')
            
            _, top_10 = torch.topk(scores, 10)
            if test_item in top_10.cpu().numpy():
                hits += 1
    
    return hits / len(test_users)


# Test different numbers of negatives
negative_counts = [1, 4, 8, 16]
results = {}

for num_neg in negative_counts:
    print(f"\nTesting num_negatives = {num_neg}...")
    hr = train_and_evaluate_ncf(num_neg, epochs=5)
    results[num_neg] = hr
    print(f"  HR@10: {hr:.4f}")

print("\n" + "="*40)
print("RESULTS SUMMARY")
print("="*40)
for num_neg, hr in sorted(results.items()):
    print(f"  {num_neg} negatives: HR@10 = {hr:.4f}")

In [None]:
# Visualize
plt.figure(figsize=(8, 5))
plt.bar([str(n) for n in results.keys()], results.values(), color='steelblue', edgecolor='black')
plt.xlabel('Number of Negative Samples')
plt.ylabel('Hit Rate @ 10')
plt.title('Effect of Negative Sampling on HR@10')
plt.grid(axis='y', alpha=0.3)
plt.show()

print("\nðŸ“Š Observations:")
print("   - Too few negatives (1): Easy training, weaker discrimination")
print("   - Moderate (4-8): Good balance of difficulty and learning")
print("   - Many negatives (16): Harder but potentially better generalization")

---

## Exercise 2 Solution: Different MLP Architectures

In [None]:
def train_architecture(mlp_layers, epochs=5):
    """Train NeuMF with specified MLP architecture."""
    dataset = NCFDataset(train_df, num_items, user_positive_items, num_negatives=4)
    loader = DataLoader(dataset, batch_size=256, shuffle=True, collate_fn=collate_fn)
    
    model = NeuMF(
        num_users, num_items, 
        gmf_dim=32, mlp_dim=64, 
        mlp_layers=mlp_layers
    ).to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    for epoch in range(epochs):
        model.train()
        for users, items, labels in loader:
            users, items, labels = users.to(device), items.to(device), labels.to(device)
            predictions = model(users, items)
            loss = F.binary_cross_entropy(predictions, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    # Evaluate
    model.eval()
    hits = 0
    test_users = test_df['user_id'].unique()
    
    with torch.no_grad():
        for user_id in test_users:
            test_item = test_df[test_df['user_id'] == user_id]['item_id'].values[0]
            scores = model.predict_all_items(user_id)
            
            train_items = list(user_positive_items.get(user_id, set()))
            scores[train_items] = -float('inf')
            
            _, top_10 = torch.topk(scores, 10)
            if test_item in top_10.cpu().numpy():
                hits += 1
    
    params = sum(p.numel() for p in model.parameters())
    return hits / len(test_users), params


# Test different architectures
architectures = {
    'Shallow [64, 32]': [64, 32],
    'Standard [128, 64, 32]': [128, 64, 32],
    'Deep [256, 128, 64, 32]': [256, 128, 64, 32],
    'Wide [512, 256]': [512, 256],
}

arch_results = {}

for name, layers in architectures.items():
    print(f"\nTesting {name}...")
    hr, params = train_architecture(layers, epochs=5)
    arch_results[name] = {'hr': hr, 'params': params}
    print(f"  HR@10: {hr:.4f}, Parameters: {params:,}")

In [None]:
# Visualize architecture comparison
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

names = list(arch_results.keys())
hrs = [arch_results[n]['hr'] for n in names]
params = [arch_results[n]['params'] for n in names]

# HR comparison
axes[0].barh(names, hrs, color='steelblue', edgecolor='black')
axes[0].set_xlabel('Hit Rate @ 10')
axes[0].set_title('Architecture Performance')
axes[0].grid(axis='x', alpha=0.3)

# Parameters comparison
axes[1].barh(names, [p/1000 for p in params], color='coral', edgecolor='black')
axes[1].set_xlabel('Parameters (thousands)')
axes[1].set_title('Model Size')
axes[1].grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.show()

print("\nðŸ“Š Observations:")
print("   - Deeper networks can learn more complex patterns")
print("   - But more parameters = more overfitting risk")
print("   - For small datasets, simpler architectures often win")

---

## Bonus: Pre-training GMF and MLP

In [None]:
# The original NeuMF paper suggests pre-training:
# 1. Train GMF alone
# 2. Train MLP alone
# 3. Initialize NeuMF with their weights
# 4. Fine-tune the combined model

from models import GMF, MLP

def pretrain_gmf(epochs=10):
    """Pre-train GMF model."""
    dataset = NCFDataset(train_df, num_items, user_positive_items, num_negatives=4)
    loader = DataLoader(dataset, batch_size=256, shuffle=True, collate_fn=collate_fn)
    
    model = GMF(num_users, num_items, embedding_dim=32).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    for epoch in range(epochs):
        model.train()
        for users, items, labels in loader:
            users, items, labels = users.to(device), items.to(device), labels.to(device)
            predictions = model(users, items)
            loss = F.binary_cross_entropy(predictions, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    return model


def pretrain_mlp(epochs=10):
    """Pre-train MLP model."""
    dataset = NCFDataset(train_df, num_items, user_positive_items, num_negatives=4)
    loader = DataLoader(dataset, batch_size=256, shuffle=True, collate_fn=collate_fn)
    
    model = MLP(num_users, num_items, embedding_dim=64, hidden_layers=[128, 64, 32]).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    for epoch in range(epochs):
        model.train()
        for users, items, labels in loader:
            users, items, labels = users.to(device), items.to(device), labels.to(device)
            predictions = model(users, items)
            loss = F.binary_cross_entropy(predictions, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    return model


print("Pre-training GMF...")
pretrained_gmf = pretrain_gmf(epochs=5)

print("Pre-training MLP...")
pretrained_mlp = pretrain_mlp(epochs=5)

print("\nâœ… Pre-training complete!")
print("\nNext steps would be:")
print("  1. Copy GMF embeddings to NeuMF's GMF path")
print("  2. Copy MLP embeddings to NeuMF's MLP path")
print("  3. Fine-tune the combined model with smaller learning rate")

---

## Key Takeaways

1. **Negative sampling**: 4-8 negatives per positive is typically optimal

2. **MLP depth**: Deeper isn't always better, especially for small datasets

3. **Pre-training**: Can help but adds complexity. Usually worth it for large datasets.

4. **Trade-offs**: Always balance model complexity vs dataset size