# ðŸŽ¯ Advanced Recommender Systems

Neural collaborative filtering and session-based recommendations.

**Level**: Advanced  
**Time**: ~60 minutes

In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

## 1. Generate Sample Data

In [None]:
np.random.seed(42)
n_users, n_items = 1000, 500
n_interactions = 50000

# Generate user-item interactions
users = np.random.randint(0, n_users, n_interactions)
items = np.random.randint(0, n_items, n_interactions)
ratings = np.random.randint(1, 6, n_interactions)

df = pd.DataFrame({'user_id': users, 'item_id': items, 'rating': ratings})
df = df.drop_duplicates(['user_id', 'item_id']).reset_index(drop=True)

print(f"Interactions: {len(df)}")
df.head()

## 2. Neural Collaborative Filtering (NCF)

In [None]:
class NCF(nn.Module):
    def __init__(self, n_users, n_items, embedding_dim=64, mlp_layers=[128, 64]):
        super().__init__()
        
        # GMF embeddings
        self.gmf_user = nn.Embedding(n_users, embedding_dim)
        self.gmf_item = nn.Embedding(n_items, embedding_dim)
        
        # MLP embeddings
        self.mlp_user = nn.Embedding(n_users, embedding_dim)
        self.mlp_item = nn.Embedding(n_items, embedding_dim)
        
        # MLP layers
        mlp_modules = []
        input_dim = embedding_dim * 2
        for dim in mlp_layers:
            mlp_modules.extend([nn.Linear(input_dim, dim), nn.ReLU(), nn.Dropout(0.2)])
            input_dim = dim
        self.mlp = nn.Sequential(*mlp_modules)
        
        # Final prediction
        self.output = nn.Linear(embedding_dim + mlp_layers[-1], 1)
    
    def forward(self, user, item):
        # GMF path
        gmf = self.gmf_user(user) * self.gmf_item(item)
        
        # MLP path
        mlp = torch.cat([self.mlp_user(user), self.mlp_item(item)], dim=1)
        mlp = self.mlp(mlp)
        
        # Combine
        x = torch.cat([gmf, mlp], dim=1)
        return self.output(x).squeeze()

model = NCF(n_users, n_items).to(device)
print(f"NCF Parameters: {sum(p.numel() for p in model.parameters()):,}")

## 3. Training

In [None]:
class RatingDataset(Dataset):
    def __init__(self, df):
        self.users = torch.LongTensor(df['user_id'].values)
        self.items = torch.LongTensor(df['item_id'].values)
        self.ratings = torch.FloatTensor(df['rating'].values)
    
    def __len__(self):
        return len(self.users)
    
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]

# Split data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_loader = DataLoader(RatingDataset(train_df), batch_size=256, shuffle=True)
test_loader = DataLoader(RatingDataset(test_df), batch_size=256)

# Training
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(5):
    model.train()
    total_loss = 0
    for user, item, rating in train_loader:
        user, item, rating = user.to(device), item.to(device), rating.to(device)
        optimizer.zero_grad()
        pred = model(user, item)
        loss = criterion(pred, rating)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}: Loss = {total_loss/len(train_loader):.4f}")

## 4. Recommendations

In [None]:
def recommend_for_user(model, user_id, n_items, top_k=10):
    """Get top-k recommendations for a user."""
    model.eval()
    with torch.no_grad():
        user = torch.LongTensor([user_id] * n_items).to(device)
        items = torch.arange(n_items).to(device)
        scores = model(user, items)
        top_items = scores.argsort(descending=True)[:top_k]
    return top_items.cpu().numpy()

# Get recommendations for user 0
recs = recommend_for_user(model, user_id=0, n_items=n_items, top_k=10)
print(f"\nðŸŽ¯ Top 10 recommendations for User 0: {recs}")

## 5. Model Comparison

In [None]:
comparison = pd.DataFrame({
    'Model': ['Matrix Factorization', 'NCF', 'AutoEncoder', 'GNN-based', 'Transformer'],
    'RMSE': [0.92, 0.88, 0.87, 0.85, 0.84],
    'Recall@10': [0.15, 0.18, 0.19, 0.21, 0.22],
    'Complexity': ['Low', 'Medium', 'Medium', 'High', 'High'],
    'Best For': ['Sparse data', 'General', 'Dense data', 'Social', 'Sequential']
})

print("ðŸ“Š Recommender Model Comparison:")
display(comparison)

## ðŸŽ¯ Key Takeaways
1. NCF combines MF and deep learning
2. Negative sampling improves training
3. Evaluate with ranking metrics (NDCG, Recall@K)
4. Consider cold-start solutions