# Neural Collaborative Filtering for Rating Prediction

## Motivation

The original NCF (He et al., 2017) was designed for **implicit feedback** (binary interactions: clicked/not clicked). It uses:
- **Binary Cross-Entropy loss** (BCEWithLogitsLoss)
- **Negative sampling** to create (user, item, 0/1) pairs
- Output passed through **sigmoid** → probability of interaction

However, for **explicit feedback** (ratings 1-5), we need to:
1. Replace BCE loss with **MSE loss** (regression task)
2. Remove negative sampling (use actual ratings as targets)
3. Scale output to rating range [1, 5] or keep unbounded

## Key Modifications

| Component | Original NCF (Implicit) | Rating NCF (Explicit) |
|-----------|------------------------|----------------------|
| Loss | BCEWithLogitsLoss | MSELoss |
| Output | Logits → Sigmoid → [0,1] | Direct score → [1,5] |
| Data | Binary labels (0/1) | Actual ratings (1-5) |
| Sampling | Negative sampling | No negative sampling |
| Evaluation | HR@K, NDCG@K | RMSE, MAE |

## Step 1: Setup and Imports

In [None]:
import os
import sys
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from sklearn.model_selection import train_test_split

# Add src to path
current_dir = os.getcwd()
path = current_dir
while True:
    if os.path.basename(path) == "src":
        if path not in sys.path:
            sys.path.insert(0, path)
        break
    parent = os.path.dirname(path)
    if parent == path:
        break
    path = parent

from helpers import download_ml1m_dataset

# Reproducibility
torch.manual_seed(42)
np.random.seed(42)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")

## Step 2: Hyperparameters

In [None]:
# Paths
DATA_DIR = os.path.join(os.path.dirname(os.getcwd()), '..', 'data')
MODEL_PATH = os.path.join(os.path.dirname(os.getcwd()), '..', 'models')
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(MODEL_PATH, exist_ok=True)

# Hyperparameters
LEARNING_RATE = 0.001
DROPOUT_RATE = 0.2
BATCH_SIZE = 256
EPOCHS = 50
FACTOR_NUM = 32         # Embedding dimension
NUM_LAYERS = 3          # MLP depth
EARLY_STOPPING_PATIENCE = 10
TEST_RATIO = 0.2        # 80/20 train/test split

# Rating range for ML-1M
RATING_MIN = 1.0
RATING_MAX = 5.0

## Step 3: Load and Prepare Data with Ratings

In [None]:
# Download dataset
ratings_file = download_ml1m_dataset(DATA_DIR)

# Load ratings with actual values
ratings_df = pd.read_csv(
    ratings_file,
    sep='::',
    engine='python',
    names=['user_id', 'item_id', 'rating', 'timestamp'],
    encoding='latin-1'
)

print(f"Total ratings: {len(ratings_df):,}")
print(f"Users: {ratings_df['user_id'].nunique():,}")
print(f"Items: {ratings_df['item_id'].nunique():,}")
print(f"Rating distribution:\n{ratings_df['rating'].value_counts().sort_index()}")

In [None]:
# Re-index users and items to start from 0
user_ids = ratings_df['user_id'].unique()
item_ids = ratings_df['item_id'].unique()

user_to_idx = {uid: idx for idx, uid in enumerate(user_ids)}
item_to_idx = {iid: idx for idx, iid in enumerate(item_ids)}

ratings_df['user_idx'] = ratings_df['user_id'].map(user_to_idx)
ratings_df['item_idx'] = ratings_df['item_id'].map(item_to_idx)

num_users = len(user_ids)
num_items = len(item_ids)

print(f"Indexed users: {num_users}, items: {num_items}")

In [None]:
# Train/Test split (random 80/20)
train_df, test_df = train_test_split(
    ratings_df,
    test_size=TEST_RATIO,
    random_state=42
)

print(f"Train samples: {len(train_df):,}")
print(f"Test samples: {len(test_df):,}")

## Step 4: PyTorch Dataset for Rating Prediction

**Key difference from original NCF**: No negative sampling. We use actual ratings as targets.

In [None]:
class RatingDataset(data.Dataset):
    """
    Dataset for rating prediction (explicit feedback).
    
    Unlike NCFData which uses binary labels and negative sampling,
    this dataset provides actual rating values as targets.
    """
    
    def __init__(self, df):
        """
        Args:
            df: DataFrame with columns ['user_idx', 'item_idx', 'rating']
        """
        self.users = torch.LongTensor(df['user_idx'].values)
        self.items = torch.LongTensor(df['item_idx'].values)
        self.ratings = torch.FloatTensor(df['rating'].values)
    
    def __len__(self):
        return len(self.ratings)
    
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]


# Create datasets and loaders
train_dataset = RatingDataset(train_df)
test_dataset = RatingDataset(test_df)

train_loader = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"Train batches: {len(train_loader)}, Test batches: {len(test_loader)}")

## Step 5: NCF Model Modified for Rating Prediction

**Modification**: Output layer maps to rating range [1, 5] using scaled sigmoid.

In [None]:
class NCFRating(nn.Module):
    """
    Neural Collaborative Filtering for Rating Prediction.
    
    Key differences from original NCF:
    1. Output activation: Scaled sigmoid to map to [rating_min, rating_max]
    2. Task: Regression (predict exact rating) instead of classification
    
    Architecture remains the same:
    - GMF path: Element-wise product (linear interaction)
    - MLP path: Deep neural network (non-linear interaction)
    - NeuMF: Combination of both
    """
    
    def __init__(self, num_users, num_items, factor_num, num_layers,
                 dropout, model_name='NeuMF-end', rating_min=1.0, rating_max=5.0):
        super(NCFRating, self).__init__()
        
        self.model_name = model_name
        self.dropout = dropout
        self.rating_min = rating_min
        self.rating_max = rating_max
        
        # ================================================================
        # GMF Embeddings (for linear interaction)
        # ================================================================
        if model_name != 'MLP':
            self.embed_user_GMF = nn.Embedding(num_users, factor_num)
            self.embed_item_GMF = nn.Embedding(num_items, factor_num)
        
        # ================================================================
        # MLP Embeddings (for non-linear interaction)
        # ================================================================
        if model_name != 'GMF':
            mlp_embed_dim = factor_num * (2 ** (num_layers - 1))
            self.embed_user_MLP = nn.Embedding(num_users, mlp_embed_dim)
            self.embed_item_MLP = nn.Embedding(num_items, mlp_embed_dim)
        
        # ================================================================
        # MLP Layers
        # ================================================================
        if model_name != 'GMF':
            layers = []
            for i in range(num_layers):
                input_size = factor_num * (2 ** (num_layers - i))
                layers.append(nn.Dropout(p=dropout))
                layers.append(nn.Linear(input_size, input_size // 2))
                layers.append(nn.ReLU())
            self.MLP_layers = nn.Sequential(*layers)
        
        # ================================================================
        # Prediction Layer
        # ================================================================
        if model_name in ['MLP', 'GMF']:
            predict_size = factor_num
        else:
            predict_size = factor_num * 2
        
        self.predict_layer = nn.Linear(predict_size, 1)
        
        # Initialize weights
        self._init_weights()
    
    def _init_weights(self):
        """Xavier/Kaiming initialization for better convergence."""
        for name, param in self.named_parameters():
            if 'embed' in name:
                nn.init.normal_(param, std=0.01)
            elif 'weight' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                nn.init.zeros_(param)
    
    def forward(self, user, item):
        """
        Forward pass: predict rating for (user, item) pairs.
        
        Returns:
            Predicted ratings in range [rating_min, rating_max]
        """
        # GMF path
        if self.model_name != 'MLP':
            user_gmf = self.embed_user_GMF(user)
            item_gmf = self.embed_item_GMF(item)
            output_gmf = user_gmf * item_gmf  # Element-wise product
        
        # MLP path
        if self.model_name != 'GMF':
            user_mlp = self.embed_user_MLP(user)
            item_mlp = self.embed_item_MLP(item)
            interaction = torch.cat([user_mlp, item_mlp], dim=-1)
            output_mlp = self.MLP_layers(interaction)
        
        # Combine paths
        if self.model_name == 'GMF':
            concat = output_gmf
        elif self.model_name == 'MLP':
            concat = output_mlp
        else:
            concat = torch.cat([output_gmf, output_mlp], dim=-1)
        
        # Predict raw score
        logits = self.predict_layer(concat).view(-1)
        
        # Scale to rating range using sigmoid
        # sigmoid output in [0, 1] → scale to [rating_min, rating_max]
        rating_range = self.rating_max - self.rating_min
        prediction = torch.sigmoid(logits) * rating_range + self.rating_min
        
        return prediction

## Step 6: Evaluation Metrics (RMSE, MAE)

In [None]:
def evaluate(model, data_loader, device):
    """
    Evaluate model using RMSE and MAE.
    
    RMSE (Root Mean Squared Error): sqrt(mean((pred - actual)^2))
    MAE (Mean Absolute Error): mean(|pred - actual|)
    """
    model.eval()
    predictions = []
    actuals = []
    
    with torch.no_grad():
        for user, item, rating in data_loader:
            user, item = user.to(device), item.to(device)
            pred = model(user, item)
            predictions.extend(pred.cpu().numpy())
            actuals.extend(rating.numpy())
    
    predictions = np.array(predictions)
    actuals = np.array(actuals)
    
    rmse = np.sqrt(np.mean((predictions - actuals) ** 2))
    mae = np.mean(np.abs(predictions - actuals))
    
    return rmse, mae

## Step 7: Training Loop

In [None]:
# Create model
model = NCFRating(
    num_users=num_users,
    num_items=num_items,
    factor_num=FACTOR_NUM,
    num_layers=NUM_LAYERS,
    dropout=DROPOUT_RATE,
    model_name='NeuMF-end',
    rating_min=RATING_MIN,
    rating_max=RATING_MAX
).to(device)

# Loss and optimizer
# MSE Loss for regression (rating prediction)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Count parameters
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Model: NCFRating (NeuMF-end)")
print(f"Trainable parameters: {total_params:,}")
print(f"Loss function: MSELoss (regression)")

In [None]:
# Training
best_rmse = float('inf')
patience_counter = 0
history = {'train_loss': [], 'test_rmse': [], 'test_mae': []}

print("="*60)
print("Training NCF for Rating Prediction")
print("="*60)

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    start_time = time.time()
    
    for user, item, rating in train_loader:
        user, item, rating = user.to(device), item.to(device), rating.to(device)
        
        optimizer.zero_grad()
        prediction = model(user, item)
        loss = criterion(prediction, rating)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item() * len(rating)
    
    avg_loss = total_loss / len(train_dataset)
    
    # Evaluate
    test_rmse, test_mae = evaluate(model, test_loader, device)
    
    elapsed = time.time() - start_time
    
    # Store history
    history['train_loss'].append(avg_loss)
    history['test_rmse'].append(test_rmse)
    history['test_mae'].append(test_mae)
    
    # Early stopping check
    if test_rmse < best_rmse:
        best_rmse = test_rmse
        patience_counter = 0
        # Save best model
        torch.save(model.state_dict(), os.path.join(MODEL_PATH, 'NCF-Rating.pth'))
        marker = " ✓ Best"
    else:
        patience_counter += 1
        marker = ""
    
    print(f"Epoch {epoch+1:02d}/{EPOCHS} | Loss: {avg_loss:.4f} | "
          f"RMSE: {test_rmse:.4f} | MAE: {test_mae:.4f} | "
          f"Time: {elapsed:.1f}s{marker}")
    
    if patience_counter >= EARLY_STOPPING_PATIENCE:
        print(f"\nEarly stopping at epoch {epoch+1}")
        break

print("\n" + "="*60)
print(f"Best Test RMSE: {best_rmse:.4f}")
print("="*60)

## Step 8: Visualize Training Progress

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Training loss
axes[0].plot(history['train_loss'], 'b-', label='Train Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('MSE Loss')
axes[0].set_title('Training Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Test metrics
axes[1].plot(history['test_rmse'], 'r-', label='RMSE')
axes[1].plot(history['test_mae'], 'g-', label='MAE')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Error')
axes[1].set_title('Test Metrics')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(MODEL_PATH, 'ncf_rating_training.png'), dpi=150)
plt.show()

## Step 9: Example Predictions

In [None]:
# Load best model
model.load_state_dict(torch.load(os.path.join(MODEL_PATH, 'NCF-Rating.pth')))
model.eval()

# Sample predictions
sample_df = test_df.sample(10, random_state=42)

with torch.no_grad():
    users = torch.LongTensor(sample_df['user_idx'].values).to(device)
    items = torch.LongTensor(sample_df['item_idx'].values).to(device)
    preds = model(users, items).cpu().numpy()

print("Sample Predictions vs Actual Ratings")
print("="*50)
for i, (_, row) in enumerate(sample_df.iterrows()):
    print(f"User {row['user_id']:4d} | Item {row['item_id']:4d} | "
          f"Actual: {row['rating']:.0f} | Predicted: {preds[i]:.2f}")

## Summary

### Key Changes from Original NCF

1. **Loss Function**: `BCEWithLogitsLoss` → `MSELoss`
2. **Output Activation**: Scaled sigmoid to [1, 5] range
3. **Data Handling**: Direct ratings instead of binary labels
4. **Evaluation Metrics**: RMSE/MAE instead of HR@K/NDCG@K
5. **No Negative Sampling**: All training data has actual ratings

### References

- He, X., et al. (2017). Neural Collaborative Filtering. WWW'17.
- Koren, Y., et al. (2009). Matrix Factorization Techniques for Recommender Systems. Computer.