# Centralized BPR-MF Baseline with Bayesian Optimization

This notebook implements a **Centralized BPR-MF baseline** for comparison with federated learning approaches.

## Key Features
- **Bayesian Optimization** using Optuna for hyperparameter tuning
- **BPR-MF** - Bayesian Personalized Ranking Matrix Factorization
- **Sampled Evaluation** - 1 positive + 99 negatives (NCF paper protocol)
- **Metrics**: HR@K, NDCG@K, MRR (same as federated experiments)

## Hyperparameters Optimized
| Parameter | Search Space | Type |
|-----------|-------------|------|
| embedding_dim | [32, 64, 128, 256] | Categorical |
| learning_rate | [1e-4, 0.1] | Log-uniform |
| weight_decay | [1e-7, 1e-4] | Log-uniform |
| num_negatives | [1, 2, 4, 8] | Categorical |
| batch_size | [256, 512, 1024, 2048] | Categorical |

# Imports

In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
import os
from collections import defaultdict
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset as TorchDataset, DataLoader

# Bayesian Optimization
import optuna
from optuna.pruners import MedianPruner, SuccessiveHalvingPruner
from optuna.samplers import TPESampler
from optuna.visualization import (
    plot_optimization_history,
    plot_param_importances,
    plot_parallel_coordinate,
    plot_slice
)

print(f"PyTorch version: {torch.__version__}")
print(f"Optuna version: {optuna.__version__}")

PyTorch version: 2.10.0.dev20251122+cu128
Optuna version: 4.6.0


In [23]:
# Set random seed for reproducibility
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Using device: cuda
GPU: NVIDIA GeForce RTX 5090


# Configuration

In [24]:
class Config:
    """Configuration for BPR-MF experiments with Bayesian Optimization."""
    
    # Dataset
    DATASET = 'ml-1m'
    RANDOM_STATE = 42
    
    # Directories
    RESULTS_DIR = 'results/centralized'
    FIGURES_DIR = 'figures'
    
    # ==========================================================================
    # Bayesian Optimization Settings
    # ==========================================================================
    OPTUNA_N_TRIALS = 50           # Number of optimization trials
    OPTUNA_TIMEOUT = None          # Timeout in seconds (None = no timeout)
    OPTUNA_N_STARTUP_TRIALS = 10   # Random trials before TPE kicks in
    OPTUNA_PRUNING_WARMUP = 10     # Epochs before pruning can occur
    OPTUNA_PRUNING_INTERVAL = 5    # Evaluate every N epochs for pruning
    
    # Optimization objective
    OPTUNA_METRIC = 'ndcg@10'      # Metric to optimize (ndcg@10 or hr@10)
    OPTUNA_DIRECTION = 'maximize'  # maximize or minimize
    
    # ==========================================================================
    # Hyperparameter Search Space
    # ==========================================================================
    SEARCH_SPACE = {
        'embedding_dim': [32, 64, 128, 256],
        'lr_min': 1e-4,
        'lr_max': 0.1,
        'weight_decay_min': 1e-7,
        'weight_decay_max': 1e-4,
        'num_negatives': [1, 2, 4, 8],
        'batch_size': [256, 512, 1024, 2048],
    }
    
    # Fixed training parameters
    N_EPOCHS_TUNING = 50           # Epochs during hyperparameter search
    N_EPOCHS_FINAL = 100           # Epochs for final training with best params
    
    # ==========================================================================
    # Evaluation Settings (matching federated setup)
    # ==========================================================================
    EVAL_NUM_NEGATIVES = 99        # 1 positive + 99 negatives (NCF paper protocol)
    RANKING_K_VALUES = [5, 10, 20]
    EVAL_SAMPLE_USERS = 1000       # Sample users for fast evaluation during tuning
    
    # ==========================================================================
    # Default BPR-MF Parameters (fallback if not tuning)
    # ==========================================================================
    DEFAULT_PARAMS = {
        'embedding_dim': 128,
        'lr': 0.01,
        'weight_decay': 1e-6,
        'n_epochs': 100,
        'batch_size': 1024,
        'num_negatives': 4,
    }
    
    def __init__(self):
        os.makedirs(self.RESULTS_DIR, exist_ok=True)
        os.makedirs(self.FIGURES_DIR, exist_ok=True)

config = Config()
print("Configuration loaded.")
print(f"  - Optimization trials: {config.OPTUNA_N_TRIALS}")
print(f"  - Objective metric: {config.OPTUNA_METRIC}")
print(f"  - Epochs per trial: {config.N_EPOCHS_TUNING}")

Configuration loaded.
  - Optimization trials: 50
  - Objective metric: ndcg@10
  - Epochs per trial: 50


# Data Loading

In [25]:
def load_movielens_1m():
    """
    Load MovieLens-1M dataset as pandas DataFrame.
    
    Returns:
        df: DataFrame with user_idx, item_idx, rating, timestamp
        num_users: Number of unique users
        num_items: Number of unique items
    """
    print("="*60)
    print("LOADING MOVIELENS-1M DATASET")
    print("="*60)
    
    # Load from surprise's cached location (handles download automatically)
    from surprise import Dataset
    data = Dataset.load_builtin('ml-1m')
    
    # Convert to DataFrame
    raw_ratings = data.raw_ratings
    df = pd.DataFrame(raw_ratings, columns=['user_id', 'item_id', 'rating', 'timestamp'])
    
    # Convert types
    df['rating'] = df['rating'].astype(float)
    df['timestamp'] = df['timestamp'].astype(int)
    
    # Create contiguous ID mappings (required for embedding layers)
    unique_users = df['user_id'].unique()
    unique_items = df['item_id'].unique()
    
    user_to_idx = {user: idx for idx, user in enumerate(unique_users)}
    item_to_idx = {item: idx for idx, item in enumerate(unique_items)}
    
    df['user_idx'] = df['user_id'].map(user_to_idx)
    df['item_idx'] = df['item_id'].map(item_to_idx)
    
    num_users = len(unique_users)
    num_items = len(unique_items)
    num_ratings = len(df)
    
    print(f"\u2713 Dataset loaded successfully")
    print(f"  - Users: {num_users:,}")
    print(f"  - Items: {num_items:,}")
    print(f"  - Ratings: {num_ratings:,}")
    print(f"  - Sparsity: {(1 - num_ratings/(num_users*num_items))*100:.2f}%")
    print()
    
    return df, num_users, num_items


def train_test_split_leave_one_out(df):
    """
    Leave-one-out split: last interaction per user for test.
    
    Returns:
        train_df: Training interactions
        test_df: Test interactions (one per user)
        train_user_items: Dict mapping user_idx -> set of positive item_idx
    """
    print("Splitting data (leave-one-out)...")
    
    # Sort by timestamp
    df = df.sort_values(['user_idx', 'timestamp'])
    
    # Get last interaction per user for test
    test_df = df.groupby('user_idx').last().reset_index()
    
    # Remove test interactions from training
    test_pairs = set(zip(test_df['user_idx'], test_df['item_idx']))
    train_mask = ~df.apply(lambda x: (x['user_idx'], x['item_idx']) in test_pairs, axis=1)
    train_df = df[train_mask].copy()
    
    # Build user -> positive items mapping
    train_user_items = defaultdict(set)
    for _, row in train_df.iterrows():
        train_user_items[row['user_idx']].add(row['item_idx'])
    
    print(f"\u2713 Train size: {len(train_df):,} ratings")
    print(f"\u2713 Test size:  {len(test_df):,} ratings")
    print()
    
    return train_df, test_df, train_user_items

# BPR-MF Model

Bayesian Personalized Ranking with Matrix Factorization.

**Architecture:**
- User embeddings: $\mathbf{p}_u \in \mathbb{R}^d$
- Item embeddings: $\mathbf{q}_i \in \mathbb{R}^d$
- Score: $\hat{r}_{ui} = \mathbf{p}_u^T \mathbf{q}_i + b_u + b_i + \mu$

**BPR Loss:**
$$\mathcal{L} = -\sum_{(u,i,j)} \log \sigma(\hat{r}_{ui} - \hat{r}_{uj})$$

In [26]:
class BPRMF(nn.Module):
    """
    Bayesian Personalized Ranking Matrix Factorization.
    
    This is the centralized version matching the federated implementation.
    """
    
    def __init__(self, num_users, num_items, embedding_dim=64, use_bias=True):
        super().__init__()
        
        self.num_users = num_users
        self.num_items = num_items
        self.embedding_dim = embedding_dim
        self.use_bias = use_bias
        
        # Embeddings
        self.user_embeddings = nn.Embedding(num_users, embedding_dim)
        self.item_embeddings = nn.Embedding(num_items, embedding_dim)
        
        # Biases
        if use_bias:
            self.user_bias = nn.Embedding(num_users, 1)
            self.item_bias = nn.Embedding(num_items, 1)
            self.global_bias = nn.Parameter(torch.zeros(1))
        
        self._init_weights()
    
    def _init_weights(self):
        """Initialize weights with Xavier uniform."""
        nn.init.xavier_uniform_(self.user_embeddings.weight)
        nn.init.xavier_uniform_(self.item_embeddings.weight)
        
        if self.use_bias:
            nn.init.normal_(self.user_bias.weight, mean=0.0, std=0.01)
            nn.init.normal_(self.item_bias.weight, mean=0.0, std=0.01)
            nn.init.zeros_(self.global_bias)
    
    def forward(self, user_ids, pos_item_ids, neg_item_ids=None):
        """
        Forward pass.
        
        Args:
            user_ids: (batch_size,)
            pos_item_ids: (batch_size,)
            neg_item_ids: (batch_size,) or None for inference
            
        Returns:
            If neg_item_ids provided: (pos_scores, neg_scores) for BPR loss
            Otherwise: pos_scores for inference
        """
        user_emb = self.user_embeddings(user_ids)
        pos_item_emb = self.item_embeddings(pos_item_ids)
        
        # Compute positive scores
        pos_scores = torch.sum(user_emb * pos_item_emb, dim=1)
        
        if self.use_bias:
            user_b = self.user_bias(user_ids).squeeze(-1)
            pos_item_b = self.item_bias(pos_item_ids).squeeze(-1)
            pos_scores = self.global_bias + user_b + pos_item_b + pos_scores
        
        if neg_item_ids is not None:
            neg_item_emb = self.item_embeddings(neg_item_ids)
            neg_scores = torch.sum(user_emb * neg_item_emb, dim=1)
            
            if self.use_bias:
                neg_item_b = self.item_bias(neg_item_ids).squeeze(-1)
                neg_scores = self.global_bias + user_b + neg_item_b + neg_scores
            
            return pos_scores, neg_scores
        
        return pos_scores
    
    def predict(self, user_ids, item_ids):
        """Predict scores for user-item pairs."""
        user_emb = self.user_embeddings(user_ids)
        item_emb = self.item_embeddings(item_ids)
        
        scores = torch.sum(user_emb * item_emb, dim=1)
        
        if self.use_bias:
            user_b = self.user_bias(user_ids).squeeze(-1)
            item_b = self.item_bias(item_ids).squeeze(-1)
            scores = self.global_bias + user_b + item_b + scores
        
        return scores

# BPR Training Dataset

In [27]:
class BPRDataset(TorchDataset):
    """
    Dataset for BPR training with negative sampling.
    """
    
    def __init__(self, df, num_items, num_negatives=1):
        self.num_items = num_items
        self.num_negatives = num_negatives
        
        # Store positive interactions
        self.users = df['user_idx'].values
        self.pos_items = df['item_idx'].values
        
        # Build user -> positive items mapping for negative sampling
        self.user_positive_items = defaultdict(set)
        for user, item in zip(self.users, self.pos_items):
            self.user_positive_items[user].add(item)
    
    def __len__(self):
        return len(self.users) * self.num_negatives
    
    def __getitem__(self, idx):
        pos_idx = idx // self.num_negatives
        user = self.users[pos_idx]
        pos_item = self.pos_items[pos_idx]
        
        # Sample negative item
        neg_item = np.random.randint(self.num_items)
        while neg_item in self.user_positive_items[user]:
            neg_item = np.random.randint(self.num_items)
        
        return (
            torch.tensor(user, dtype=torch.long),
            torch.tensor(pos_item, dtype=torch.long),
            torch.tensor(neg_item, dtype=torch.long)
        )

# Training Functions

In [28]:
def bpr_loss(pos_scores, neg_scores):
    """Bayesian Personalized Ranking loss."""
    return -torch.mean(torch.log(torch.sigmoid(pos_scores - neg_scores) + 1e-10))


def train_epoch(model, train_loader, optimizer, device):
    """Train BPR-MF for one epoch."""
    model.train()
    total_loss = 0.0
    num_batches = 0
    
    for user_ids, pos_item_ids, neg_item_ids in train_loader:
        user_ids = user_ids.to(device)
        pos_item_ids = pos_item_ids.to(device)
        neg_item_ids = neg_item_ids.to(device)
        
        optimizer.zero_grad()
        
        pos_scores, neg_scores = model(user_ids, pos_item_ids, neg_item_ids)
        loss = bpr_loss(pos_scores, neg_scores)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        num_batches += 1
    
    return total_loss / num_batches


def train_epoch_verbose(model, train_loader, optimizer, device, epoch):
    """Train BPR-MF for one epoch with progress bar."""
    model.train()
    total_loss = 0.0
    num_batches = 0
    
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}")
    for user_ids, pos_item_ids, neg_item_ids in pbar:
        user_ids = user_ids.to(device)
        pos_item_ids = pos_item_ids.to(device)
        neg_item_ids = neg_item_ids.to(device)
        
        optimizer.zero_grad()
        
        pos_scores, neg_scores = model(user_ids, pos_item_ids, neg_item_ids)
        loss = bpr_loss(pos_scores, neg_scores)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        num_batches += 1
        
        pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    return total_loss / num_batches

# Evaluation Functions

Sampled evaluation following the NCF paper protocol:
- For each test interaction (user, positive_item)
- Sample 99 negative items that the user hasn't interacted with
- Rank all 100 items (1 positive + 99 negatives)
- Compute HR@K, NDCG@K, MRR

In [29]:
def compute_ranking_metrics(
    model, 
    test_df, 
    train_user_items, 
    num_items,
    num_negatives=99, 
    k_values=[5, 10, 20], 
    device='cpu',
    sample_users=None,
    verbose=True
):
    """
    Compute ranking metrics using sampled evaluation protocol.
    
    Args:
        model: Trained BPR-MF model
        test_df: Test DataFrame with user_idx, item_idx
        train_user_items: Dict mapping user_idx -> set of positive item_idx
        num_items: Total number of items
        num_negatives: Number of negative samples (default: 99)
        k_values: List of K values for metrics
        device: Device to use
        sample_users: If set, sample this many users for faster evaluation
        verbose: Whether to show progress bar
    
    Returns:
        Dictionary of metrics
    """
    model.eval()
    
    # Initialize metrics
    hits = {k: [] for k in k_values}
    ndcgs = {k: [] for k in k_values}
    mrrs = []
    
    # Get unique test users and their positive items
    test_user_items = defaultdict(list)
    for _, row in test_df.iterrows():
        test_user_items[row['user_idx']].append(row['item_idx'])
    
    # Optionally sample users for faster evaluation
    user_list = list(test_user_items.keys())
    if sample_users is not None and sample_users < len(user_list):
        user_list = np.random.choice(user_list, sample_users, replace=False)
    
    iterator = tqdm(user_list, desc="Evaluating") if verbose else user_list
    
    with torch.no_grad():
        for user_idx in iterator:
            pos_items = test_user_items[user_idx]
            train_positives = train_user_items.get(user_idx, set())
            
            for pos_item in pos_items:
                # Sample negative items
                negatives = []
                all_positives = train_positives | {pos_item}
                
                while len(negatives) < num_negatives:
                    neg = np.random.randint(num_items)
                    if neg not in all_positives and neg not in negatives:
                        negatives.append(neg)
                
                # Create candidate list: positive + negatives
                candidates = [pos_item] + negatives
                
                # Get scores
                user_tensor = torch.tensor([user_idx] * len(candidates), dtype=torch.long, device=device)
                item_tensor = torch.tensor(candidates, dtype=torch.long, device=device)
                
                scores = model.predict(user_tensor, item_tensor)
                scores = scores.cpu().numpy()
                
                # Get ranking (descending order)
                ranking = np.argsort(-scores)
                
                # Position of positive item (0-indexed)
                pos_rank = np.where(ranking == 0)[0][0]
                
                # Compute metrics
                for k in k_values:
                    hits[k].append(1.0 if pos_rank < k else 0.0)
                    
                    if pos_rank < k:
                        ndcgs[k].append(1.0 / np.log2(pos_rank + 2))
                    else:
                        ndcgs[k].append(0.0)
                
                # MRR
                mrrs.append(1.0 / (pos_rank + 1))
    
    # Aggregate metrics
    results = {}
    for k in k_values:
        results[f'hr@{k}'] = np.mean(hits[k])
        results[f'ndcg@{k}'] = np.mean(ndcgs[k])
    results['mrr'] = np.mean(mrrs)
    
    return results

# Bayesian Optimization with Optuna

We use **Tree-structured Parzen Estimator (TPE)** sampler with **Median Pruning** for early stopping of unpromising trials.

**Key Features:**
- Automatic hyperparameter search
- Early stopping of bad trials (pruning)
- Visualization of optimization history
- Parameter importance analysis

In [30]:
class OptunaObjective:
    """
    Optuna objective function for BPR-MF hyperparameter optimization.
    
    Uses pruning to early-stop unpromising trials.
    """
    
    def __init__(self, train_df, test_df, train_user_items, num_users, num_items, device):
        self.train_df = train_df
        self.test_df = test_df
        self.train_user_items = train_user_items
        self.num_users = num_users
        self.num_items = num_items
        self.device = device
    
    def __call__(self, trial):
        # =================================================================
        # Sample hyperparameters
        # =================================================================
        embedding_dim = trial.suggest_categorical(
            'embedding_dim', 
            config.SEARCH_SPACE['embedding_dim']
        )
        lr = trial.suggest_float(
            'lr', 
            config.SEARCH_SPACE['lr_min'], 
            config.SEARCH_SPACE['lr_max'], 
            log=True
        )
        weight_decay = trial.suggest_float(
            'weight_decay', 
            config.SEARCH_SPACE['weight_decay_min'], 
            config.SEARCH_SPACE['weight_decay_max'], 
            log=True
        )
        num_negatives = trial.suggest_categorical(
            'num_negatives', 
            config.SEARCH_SPACE['num_negatives']
        )
        batch_size = trial.suggest_categorical(
            'batch_size', 
            config.SEARCH_SPACE['batch_size']
        )
        
        # =================================================================
        # Create dataset and model
        # =================================================================
        train_dataset = BPRDataset(
            self.train_df, 
            self.num_items, 
            num_negatives=num_negatives
        )
        train_loader = DataLoader(
            train_dataset, 
            batch_size=batch_size,
            shuffle=True,
            num_workers=4,
            pin_memory=True
        )
        
        model = BPRMF(
            num_users=self.num_users,
            num_items=self.num_items,
            embedding_dim=embedding_dim,
            use_bias=True
        ).to(self.device)
        
        optimizer = optim.Adam(
            model.parameters(),
            lr=lr,
            weight_decay=weight_decay
        )
        
        # =================================================================
        # Training loop with pruning
        # =================================================================
        best_metric = 0.0
        
        for epoch in range(1, config.N_EPOCHS_TUNING + 1):
            # Train one epoch
            train_epoch(model, train_loader, optimizer, self.device)
            
            # Evaluate periodically for pruning
            if epoch >= config.OPTUNA_PRUNING_WARMUP and epoch % config.OPTUNA_PRUNING_INTERVAL == 0:
                metrics = compute_ranking_metrics(
                    model=model,
                    test_df=self.test_df,
                    train_user_items=self.train_user_items,
                    num_items=self.num_items,
                    num_negatives=config.EVAL_NUM_NEGATIVES,
                    k_values=[10],  # Only compute @10 for speed
                    device=self.device,
                    sample_users=config.EVAL_SAMPLE_USERS,
                    verbose=False
                )
                
                current_metric = metrics[config.OPTUNA_METRIC]
                best_metric = max(best_metric, current_metric)
                
                # Report to Optuna for pruning
                trial.report(current_metric, epoch)
                
                # Check if trial should be pruned
                if trial.should_prune():
                    raise optuna.TrialPruned()
        
        # =================================================================
        # Final evaluation
        # =================================================================
        final_metrics = compute_ranking_metrics(
            model=model,
            test_df=self.test_df,
            train_user_items=self.train_user_items,
            num_items=self.num_items,
            num_negatives=config.EVAL_NUM_NEGATIVES,
            k_values=[10],
            device=self.device,
            sample_users=config.EVAL_SAMPLE_USERS,
            verbose=False
        )
        
        return final_metrics[config.OPTUNA_METRIC]

In [31]:
def run_bayesian_optimization(train_df, test_df, train_user_items, num_users, num_items):
    """
    Run Bayesian Optimization to find best hyperparameters.
    
    Returns:
        study: Optuna study object with optimization results
        best_params: Dictionary of best hyperparameters
    """
    print("\n" + "="*60)
    print("BAYESIAN OPTIMIZATION (Optuna)")
    print("="*60)
    print(f"\nSearch Space:")
    print(f"  - embedding_dim: {config.SEARCH_SPACE['embedding_dim']}")
    print(f"  - lr: [{config.SEARCH_SPACE['lr_min']}, {config.SEARCH_SPACE['lr_max']}] (log)")
    print(f"  - weight_decay: [{config.SEARCH_SPACE['weight_decay_min']}, {config.SEARCH_SPACE['weight_decay_max']}] (log)")
    print(f"  - num_negatives: {config.SEARCH_SPACE['num_negatives']}")
    print(f"  - batch_size: {config.SEARCH_SPACE['batch_size']}")
    print(f"\nOptimization Settings:")
    print(f"  - Number of trials: {config.OPTUNA_N_TRIALS}")
    print(f"  - Epochs per trial: {config.N_EPOCHS_TUNING}")
    print(f"  - Objective metric: {config.OPTUNA_METRIC}")
    print(f"  - Pruning warmup: {config.OPTUNA_PRUNING_WARMUP} epochs")
    print(f"  - Eval sample users: {config.EVAL_SAMPLE_USERS}")
    print()
    
    # Create objective function
    objective = OptunaObjective(
        train_df=train_df,
        test_df=test_df,
        train_user_items=train_user_items,
        num_users=num_users,
        num_items=num_items,
        device=device
    )
    
    # Create study with TPE sampler and median pruner
    sampler = TPESampler(
        seed=config.RANDOM_STATE,
        n_startup_trials=config.OPTUNA_N_STARTUP_TRIALS
    )
    
    pruner = MedianPruner(
        n_startup_trials=config.OPTUNA_N_STARTUP_TRIALS,
        n_warmup_steps=config.OPTUNA_PRUNING_WARMUP,
        interval_steps=config.OPTUNA_PRUNING_INTERVAL
    )
    
    study = optuna.create_study(
        direction=config.OPTUNA_DIRECTION,
        sampler=sampler,
        pruner=pruner,
        study_name='bpr_mf_optimization'
    )
    
    # Run optimization
    print("Starting optimization...\n")
    study.optimize(
        objective, 
        n_trials=config.OPTUNA_N_TRIALS,
        timeout=config.OPTUNA_TIMEOUT,
        show_progress_bar=True
    )
    
    # Print results
    print("\n" + "="*60)
    print("OPTIMIZATION RESULTS")
    print("="*60)
    print(f"\nBest trial:")
    print(f"  Value ({config.OPTUNA_METRIC}): {study.best_trial.value:.4f}")
    print(f"  Params:")
    for key, value in study.best_trial.params.items():
        if isinstance(value, float):
            print(f"    - {key}: {value:.6f}")
        else:
            print(f"    - {key}: {value}")
    
    # Statistics
    pruned_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED])
    complete_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])
    print(f"\nTrial Statistics:")
    print(f"  - Completed: {complete_trials}")
    print(f"  - Pruned: {pruned_trials}")
    print(f"  - Total: {len(study.trials)}")
    print()
    
    return study, study.best_trial.params

# Visualization Functions

In [32]:
def plot_optimization_results(study, save_dir=None):
    """
    Plot Optuna optimization results.
    """
    if save_dir:
        os.makedirs(save_dir, exist_ok=True)
    
    # 1. Optimization History
    fig = plot_optimization_history(study)
    fig.update_layout(title='Optimization History')
    if save_dir:
        fig.write_image(os.path.join(save_dir, 'optimization_history.png'))
    fig.show()
    
    # 2. Parameter Importances
    try:
        fig = plot_param_importances(study)
        fig.update_layout(title='Hyperparameter Importances')
        if save_dir:
            fig.write_image(os.path.join(save_dir, 'param_importances.png'))
        fig.show()
    except Exception as e:
        print(f"Could not plot parameter importances: {e}")
    
    # 3. Parallel Coordinate Plot
    try:
        fig = plot_parallel_coordinate(study)
        fig.update_layout(title='Parallel Coordinate Plot')
        if save_dir:
            fig.write_image(os.path.join(save_dir, 'parallel_coordinate.png'))
        fig.show()
    except Exception as e:
        print(f"Could not plot parallel coordinate: {e}")
    
    # 4. Slice Plot
    try:
        fig = plot_slice(study)
        fig.update_layout(title='Slice Plot')
        if save_dir:
            fig.write_image(os.path.join(save_dir, 'slice_plot.png'))
        fig.show()
    except Exception as e:
        print(f"Could not plot slice: {e}")


def plot_ranking_metrics(metrics, title="BPR-MF Results", save_path=None):
    """
    Plot ranking metrics (HR@K, NDCG@K).
    """
    k_values = config.RANKING_K_VALUES
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # HR@K
    hr_values = [metrics[f'hr@{k}'] for k in k_values]
    bars1 = axes[0].bar([f'HR@{k}' for k in k_values], hr_values, color='steelblue', edgecolor='black')
    axes[0].set_ylabel('Hit Rate', fontsize=12)
    axes[0].set_title('Hit Rate @ K', fontsize=14, fontweight='bold')
    axes[0].grid(True, alpha=0.3, axis='y')
    axes[0].set_ylim(0, 1)
    
    for bar, v in zip(bars1, hr_values):
        axes[0].text(bar.get_x() + bar.get_width()/2, v + 0.02, f'{v:.4f}', ha='center', fontsize=10)
    
    # NDCG@K
    ndcg_values = [metrics[f'ndcg@{k}'] for k in k_values]
    bars2 = axes[1].bar([f'NDCG@{k}' for k in k_values], ndcg_values, color='coral', edgecolor='black')
    axes[1].set_ylabel('NDCG', fontsize=12)
    axes[1].set_title('NDCG @ K', fontsize=14, fontweight='bold')
    axes[1].grid(True, alpha=0.3, axis='y')
    axes[1].set_ylim(0, 1)
    
    for bar, v in zip(bars2, ndcg_values):
        axes[1].text(bar.get_x() + bar.get_width()/2, v + 0.02, f'{v:.4f}', ha='center', fontsize=10)
    
    plt.suptitle(title, fontsize=16, fontweight='bold', y=1.02)
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"\u2713 Figure saved to {save_path}")
    
    plt.show()

# Utility Functions

In [33]:
def save_results(results, filename='bpr_mf_results.json'):
    """
    Save results to JSON file.
    """
    filepath = os.path.join(config.RESULTS_DIR, filename)

    def convert_to_serializable(obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, dict):
            return {k: convert_to_serializable(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [convert_to_serializable(v) for v in obj]
        else:
            return obj

    serializable_results = convert_to_serializable(results)

    with open(filepath, 'w') as f:
        json.dump(serializable_results, f, indent=4)

    print(f"\u2713 Results saved to {filepath}")


def print_comparison_table(centralized_metrics):
    """
    Print comparison table between centralized and federated results.
    """
    results = {
        'Centralized BPR-MF (Tuned)': {
            'HR@10': centralized_metrics.get('hr@10', 0) * 100,
            'NDCG@10': centralized_metrics.get('ndcg@10', 0) * 100,
            'MRR': centralized_metrics.get('mrr', 0) * 100,
            'Privacy': 'No'
        },
        'Federated BPR-MF (\u03b1)': {
            'HR@10': 59.73,
            'NDCG@10': 35.04,
            'MRR': 29.18,
            'Privacy': 'Yes'
        },
        'Federated Dual (Concat)': {
            'HR@10': 68.32,
            'NDCG@10': 42.70,
            'MRR': 36.38,
            'Privacy': 'Yes'
        },
        'PFedRec (SOTA)': {
            'HR@10': 70.0,
            'NDCG@10': 44.36,
            'MRR': None,
            'Privacy': 'Yes'
        }
    }
    
    print("\n" + "="*80)
    print("COMPARISON: CENTRALIZED vs FEDERATED")
    print("="*80)
    print(f"{'Method':<30} {'HR@10':>10} {'NDCG@10':>10} {'MRR':>10} {'Privacy':>10}")
    print("-"*80)
    
    for method, metrics in results.items():
        hr = f"{metrics['HR@10']:.2f}%" if metrics['HR@10'] else 'N/A'
        ndcg = f"{metrics['NDCG@10']:.2f}%" if metrics['NDCG@10'] else 'N/A'
        mrr = f"{metrics['MRR']:.2f}%" if metrics['MRR'] else 'N/A'
        print(f"{method:<30} {hr:>10} {ndcg:>10} {mrr:>10} {metrics['Privacy']:>10}")
    
    print("="*80)

# Main Execution

## Step 1: Load Data

In [34]:
# Load dataset
df, num_users, num_items = load_movielens_1m()

# Train-test split
train_df, test_df, train_user_items = train_test_split_leave_one_out(df)

LOADING MOVIELENS-1M DATASET
✓ Dataset loaded successfully
  - Users: 6,040
  - Items: 3,706
  - Ratings: 1,000,209
  - Sparsity: 95.53%

Splitting data (leave-one-out)...
✓ Train size: 994,169 ratings
✓ Test size:  6,040 ratings



## Step 2: Run Bayesian Optimization

In [None]:
# Run Bayesian Optimization
study, best_params = run_bayesian_optimization(
    train_df=train_df,
    test_df=test_df,
    train_user_items=train_user_items,
    num_users=num_users,
    num_items=num_items
)

[I 2026-01-14 12:22:25,528] A new study created in memory with name: bpr_mf_optimization



BAYESIAN OPTIMIZATION (Optuna)

Search Space:
  - embedding_dim: [32, 64, 128, 256]
  - lr: [0.0001, 0.1] (log)
  - weight_decay: [1e-07, 0.0001] (log)
  - num_negatives: [1, 2, 4, 8]
  - batch_size: [256, 512, 1024, 2048]

Optimization Settings:
  - Number of trials: 50
  - Epochs per trial: 50
  - Objective metric: ndcg@10
  - Pruning warmup: 10 epochs
  - Eval sample users: 1000

Starting optimization...



Best trial: 0. Best value: 0.41749:   2%|▏         | 1/50 [07:24<6:02:47, 444.24s/it]

[I 2026-01-14 12:29:49,763] Trial 0 finished with value: 0.4174897196745984 and parameters: {'embedding_dim': 64, 'lr': 0.00029380279387035364, 'weight_decay': 2.937538457632828e-07, 'num_negatives': 2, 'batch_size': 512}. Best is trial 0 with value: 0.4174897196745984.


Best trial: 0. Best value: 0.41749:   4%|▍         | 2/50 [11:09<4:12:25, 315.52s/it]

[I 2026-01-14 12:33:35,189] Trial 1 finished with value: 0.3793433957464953 and parameters: {'embedding_dim': 256, 'lr': 0.0019762189340280074, 'weight_decay': 7.476312062252299e-07, 'num_negatives': 1, 'batch_size': 512}. Best is trial 0 with value: 0.4174897196745984.


Best trial: 0. Best value: 0.41749:   6%|▌         | 3/50 [16:01<3:58:47, 304.83s/it]

[I 2026-01-14 12:38:27,293] Trial 2 finished with value: 0.2589272831566388 and parameters: {'embedding_dim': 128, 'lr': 0.00015673095467235422, 'weight_decay': 7.025166339242167e-05, 'num_negatives': 1, 'batch_size': 256}. Best is trial 0 with value: 0.4174897196745984.


Best trial: 0. Best value: 0.41749:   8%|▊         | 4/50 [34:15<7:52:22, 616.15s/it]

[I 2026-01-14 12:56:40,685] Trial 3 finished with value: 0.390211340050466 and parameters: {'embedding_dim': 64, 'lr': 0.0008612579192594886, 'weight_decay': 3.6324869566766083e-06, 'num_negatives': 4, 'batch_size': 256}. Best is trial 0 with value: 0.4174897196745984.


Best trial: 4. Best value: 0.419602:  10%|█         | 5/50 [36:52<5:38:07, 450.84s/it]

[I 2026-01-14 12:59:18,427] Trial 4 finished with value: 0.41960229243815944 and parameters: {'embedding_dim': 256, 'lr': 0.001465655388622534, 'weight_decay': 6.516990611177174e-07, 'num_negatives': 1, 'batch_size': 2048}. Best is trial 4 with value: 0.41960229243815944.


Best trial: 4. Best value: 0.419602:  12%|█▏        | 6/50 [41:34<4:48:22, 393.24s/it]

[I 2026-01-14 13:03:59,857] Trial 5 finished with value: 0.2718819231966579 and parameters: {'embedding_dim': 256, 'lr': 0.013199942261535026, 'weight_decay': 1.5382308040279007e-05, 'num_negatives': 1, 'batch_size': 256}. Best is trial 4 with value: 0.41960229243815944.


Best trial: 4. Best value: 0.419602:  14%|█▍        | 7/50 [59:43<7:24:49, 620.68s/it]

[I 2026-01-14 13:22:08,787] Trial 6 finished with value: 0.24296453050555814 and parameters: {'embedding_dim': 128, 'lr': 0.04588156549160976, 'weight_decay': 2.6100256506134772e-06, 'num_negatives': 4, 'batch_size': 256}. Best is trial 4 with value: 0.41960229243815944.


Best trial: 4. Best value: 0.419602:  16%|█▌        | 8/50 [1:02:34<5:34:19, 477.60s/it]

[I 2026-01-14 13:25:00,018] Trial 7 finished with value: 0.41250006465275335 and parameters: {'embedding_dim': 256, 'lr': 0.0008771380343280567, 'weight_decay': 3.355151022721482e-06, 'num_negatives': 1, 'batch_size': 1024}. Best is trial 4 with value: 0.41960229243815944.


Best trial: 4. Best value: 0.419602:  18%|█▊        | 9/50 [1:22:11<7:55:50, 696.36s/it]

[I 2026-01-14 13:44:37,413] Trial 8 finished with value: 0.33526629547049175 and parameters: {'embedding_dim': 32, 'lr': 0.025764174425233172, 'weight_decay': 3.628358380354914e-07, 'num_negatives': 8, 'batch_size': 2048}. Best is trial 4 with value: 0.41960229243815944.


Best trial: 4. Best value: 0.419602:  20%|██        | 10/50 [1:32:02<7:22:26, 663.66s/it]

[I 2026-01-14 13:54:27,839] Trial 9 finished with value: 0.4168029679018979 and parameters: {'embedding_dim': 64, 'lr': 0.001787446325623842, 'weight_decay': 4.6379219034580327e-07, 'num_negatives': 4, 'batch_size': 2048}. Best is trial 4 with value: 0.41960229243815944.


Best trial: 4. Best value: 0.419602:  22%|██▏       | 11/50 [1:34:00<5:22:46, 496.58s/it]

[I 2026-01-14 13:56:25,591] Trial 10 pruned. 


Best trial: 4. Best value: 0.419602:  24%|██▍       | 12/50 [1:35:28<3:55:55, 372.51s/it]

[I 2026-01-14 13:57:54,314] Trial 11 pruned. 


Best trial: 12. Best value: 0.43735:  26%|██▌       | 13/50 [1:42:48<4:02:17, 392.91s/it]

[I 2026-01-14 14:05:14,161] Trial 12 finished with value: 0.43735022617742414 and parameters: {'embedding_dim': 256, 'lr': 0.00037133853185137054, 'weight_decay': 1.0679463831061146e-06, 'num_negatives': 2, 'batch_size': 512}. Best is trial 12 with value: 0.43735022617742414.


Best trial: 12. Best value: 0.43735:  28%|██▊       | 14/50 [2:03:16<6:27:00, 645.00s/it]

[I 2026-01-14 14:25:41,682] Trial 13 finished with value: 0.4265915316871034 and parameters: {'embedding_dim': 256, 'lr': 0.0006183961600746829, 'weight_decay': 1.2836447245978256e-06, 'num_negatives': 8, 'batch_size': 1024}. Best is trial 12 with value: 0.43735022617742414.


Best trial: 12. Best value: 0.43735:  30%|███       | 15/50 [2:23:41<7:58:16, 819.91s/it]

[I 2026-01-14 14:46:06,932] Trial 14 finished with value: 0.435163696545538 and parameters: {'embedding_dim': 256, 'lr': 0.00048648249118773593, 'weight_decay': 1.3382450107499807e-06, 'num_negatives': 8, 'batch_size': 1024}. Best is trial 12 with value: 0.43735022617742414.


Best trial: 12. Best value: 0.43735:  32%|███▏      | 16/50 [2:27:51<6:07:30, 648.54s/it]

[I 2026-01-14 14:50:17,516] Trial 15 pruned. 


Best trial: 12. Best value: 0.43735:  34%|███▍      | 17/50 [2:28:52<4:19:31, 471.86s/it]

[I 2026-01-14 14:51:18,509] Trial 16 pruned. 


Best trial: 12. Best value: 0.43735:  36%|███▌      | 18/50 [2:34:39<3:51:29, 434.05s/it]

[I 2026-01-14 14:57:04,535] Trial 17 pruned. 


Best trial: 12. Best value: 0.43735:  38%|███▊      | 19/50 [2:36:08<2:50:44, 330.47s/it]

[I 2026-01-14 14:58:33,706] Trial 18 pruned. 


Best trial: 12. Best value: 0.43735:  40%|████      | 20/50 [2:40:04<2:31:07, 302.26s/it]

[I 2026-01-14 15:02:30,231] Trial 19 pruned. 


In [None]:
# Plot optimization results
plot_optimization_results(study, save_dir=config.FIGURES_DIR)

## Step 3: Train Final Model with Best Parameters

In [None]:
def train_final_model(train_df, test_df, train_user_items, num_users, num_items, params):
    """
    Train final model with best hyperparameters.
    """
    print("\n" + "="*60)
    print("TRAINING FINAL MODEL")
    print("="*60)
    print(f"\nBest Parameters:")
    for key, value in params.items():
        if isinstance(value, float):
            print(f"  - {key}: {value:.6f}")
        else:
            print(f"  - {key}: {value}")
    print(f"  - n_epochs: {config.N_EPOCHS_FINAL}")
    print()
    
    # Create dataset and dataloader
    train_dataset = BPRDataset(
        train_df, 
        num_items, 
        num_negatives=params['num_negatives']
    )
    train_loader = DataLoader(
        train_dataset, 
        batch_size=params['batch_size'],
        shuffle=True,
        num_workers=4,
        pin_memory=True
    )
    
    # Initialize model
    model = BPRMF(
        num_users=num_users,
        num_items=num_items,
        embedding_dim=params['embedding_dim'],
        use_bias=True
    ).to(device)
    
    optimizer = optim.Adam(
        model.parameters(),
        lr=params['lr'],
        weight_decay=params['weight_decay']
    )
    
    # Training loop
    start_time = datetime.now()
    losses = []
    
    for epoch in range(1, config.N_EPOCHS_FINAL + 1):
        loss = train_epoch_verbose(model, train_loader, optimizer, device, epoch)
        losses.append(loss)
        
        if epoch % 20 == 0:
            print(f"Epoch {epoch}: BPR Loss = {loss:.4f}")
    
    training_time = (datetime.now() - start_time).total_seconds()
    print(f"\n\u2713 Training completed in {training_time:.2f} seconds")
    
    # Final evaluation
    print("\n" + "="*60)
    print("FINAL EVALUATION")
    print("="*60)
    
    metrics = compute_ranking_metrics(
        model=model,
        test_df=test_df,
        train_user_items=train_user_items,
        num_items=num_items,
        num_negatives=config.EVAL_NUM_NEGATIVES,
        k_values=config.RANKING_K_VALUES,
        device=device,
        sample_users=None,  # Full evaluation
        verbose=True
    )
    
    # Print results
    print(f"\n{'='*60}")
    print("BPR-MF RESULTS (Centralized - Bayesian Optimized)")
    print(f"{'='*60}")
    for k in config.RANKING_K_VALUES:
        print(f"HR@{k}:   {metrics[f'hr@{k}']:.4f} ({metrics[f'hr@{k}']*100:.2f}%)")
        print(f"NDCG@{k}: {metrics[f'ndcg@{k}']:.4f} ({metrics[f'ndcg@{k}']*100:.2f}%)")
        print()
    print(f"MRR:     {metrics['mrr']:.4f} ({metrics['mrr']*100:.2f}%)")
    print(f"Training Time: {training_time:.2f}s")
    print(f"{'='*60}\n")
    
    return model, metrics, losses, training_time


# Train final model
final_model, final_metrics, losses, training_time = train_final_model(
    train_df=train_df,
    test_df=test_df,
    train_user_items=train_user_items,
    num_users=num_users,
    num_items=num_items,
    params=best_params
)

## Step 4: Save Results and Visualize

In [None]:
# Save results
results = {
    'model_name': 'BPR-MF (Centralized - Bayesian Optimized)',
    'dataset': config.DATASET,
    'timestamp': datetime.now().isoformat(),
    'optimization': {
        'method': 'Bayesian Optimization (Optuna)',
        'sampler': 'TPE',
        'pruner': 'MedianPruner',
        'n_trials': config.OPTUNA_N_TRIALS,
        'objective_metric': config.OPTUNA_METRIC,
        'best_trial_value': study.best_trial.value,
    },
    'best_params': best_params,
    'training': {
        'n_epochs': config.N_EPOCHS_FINAL,
        'final_loss': losses[-1],
        'training_time': training_time,
    },
    'eval_protocol': {
        'num_negatives': config.EVAL_NUM_NEGATIVES,
        'k_values': config.RANKING_K_VALUES
    },
    'metrics': final_metrics,
    'data_info': {
        'num_users': num_users,
        'num_items': num_items,
        'train_size': len(train_df),
        'test_size': len(test_df)
    }
}

save_results(results, 'bpr_mf_centralized_results.json')

# Plot metrics
plot_ranking_metrics(
    final_metrics, 
    title="BPR-MF (Centralized - Bayesian Optimized)",
    save_path=os.path.join(config.FIGURES_DIR, 'bpr_mf_ranking_metrics.png')
)

## Step 5: Comparison with Federated Results

In [None]:
# Print comparison table
print_comparison_table(final_metrics)

In [None]:
print("\n" + "="*60)
print("ALL EXPERIMENTS COMPLETED")
print("="*60)
print(f"\nResults saved to: {config.RESULTS_DIR}")
print(f"Figures saved to: {config.FIGURES_DIR}")
print("\nFiles created:")
print("  - bpr_mf_centralized_results.json")
print("  - bpr_mf_ranking_metrics.png")
print("  - optimization_history.png")
print("  - param_importances.png")
print("  - parallel_coordinate.png")
print("  - slice_plot.png")