# Personalized Ranking System with TorchRec

In [None]:
import torch
import torchrec
from typing import Dict, List, Tuple, NamedTuple
import numpy as np
from collections import defaultdict
from torchrec.sparse.jagged_tensor import KeyedJaggedTensor
from utils.data_generators import TorchRecDataGenerator
from utils.debugging import TorchRecDebugger

## Enhanced Feature Representation

In [None]:
class UserProfile(NamedTuple):
    """User profile information"""
    historical_items: torch.Tensor    # Previous interactions
    historical_cats: torch.Tensor     # Categories of previous interactions
    avg_price_level: torch.Tensor     # Average price preference
    activity_level: torch.Tensor      # User activity score
    category_preferences: torch.Tensor # Category affinity scores
    time_of_day: torch.Tensor         # Current time feature

class PersonalizedRankingFeatures:
    """Rich feature set for personalized ranking"""
    def __init__(
        self,
        user_id: torch.Tensor,
        item_id: torch.Tensor,
        user_profile: UserProfile,
        item_features: Dict[str, torch.Tensor],
        interaction_features: Dict[str, torch.Tensor]
    ):
        self.user_id = user_id
        self.item_id = item_id
        self.user_profile = user_profile
        self.item_features = item_features
        self.interaction_features = interaction_features
    
    def to(self, device: torch.device) -> 'PersonalizedRankingFeatures':
        return PersonalizedRankingFeatures(
            user_id=self.user_id.to(device),
            item_id=self.item_id.to(device),
            user_profile=UserProfile(
                *[f.to(device) for f in self.user_profile]
            ),
            item_features={k: v.to(device) for k, v in self.item_features.items()},
            interaction_features={k: v.to(device) for k, v in self.interaction_features.items()}
        )

## User Interest Model

In [None]:
class UserInterestModule(torch.nn.Module):
    """Model user interests from historical behavior"""
    def __init__(
        self,
        embedding_dim: int,
        hidden_dim: int,
        num_heads: int = 4
    ):
        super().__init__()
        
        self.attention = torch.nn.MultiheadAttention(
            embed_dim=embedding_dim,
            num_heads=num_heads,
            batch_first=True
        )
        
        self.interest_projection = torch.nn.Sequential(
            torch.nn.Linear(embedding_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, embedding_dim)
        )
    
    def forward(
        self,
        history_embeddings: torch.Tensor,
        current_context: torch.Tensor
    ) -> torch.Tensor:
        # Apply attention using current context as query
        attended_history, _ = self.attention(
            current_context.unsqueeze(1),
            history_embeddings,
            history_embeddings
        )
        
        # Project to interest space
        user_interests = self.interest_projection(attended_history.squeeze(1))
        
        return user_interests

## Personalized Ranking Model

In [None]:
class PersonalizedRankingModel(torch.nn.Module):
    """Enhanced ranking model with personalization"""
    def __init__(
        self,
        num_users: int,
        num_items: int,
        num_categories: int,
        embedding_dim: int = 64,
        hidden_dim: int = 128
    ):
        super().__init__()
        
        # Embedding tables
        self.embedding_tables = torchrec.EmbeddingBagCollection(
            tables=[
                torchrec.EmbeddingBagConfig(
                    name="user_embeddings",
                    embedding_dim=embedding_dim,
                    num_embeddings=num_users,
                    feature_names=["user_id"],
                ),
                torchrec.EmbeddingBagConfig(
                    name="item_embeddings",
                    embedding_dim=embedding_dim,
                    num_embeddings=num_items,
                    feature_names=["item_id"],
                ),
                torchrec.EmbeddingBagConfig(
                    name="category_embeddings",
                    embedding_dim=embedding_dim,
                    num_embeddings=num_categories,
                    feature_names=["category_id"],
                )
            ],
            device=torch.device("meta")
        )
        
        # User interest modeling
        self.user_interest_model = UserInterestModule(
            embedding_dim=embedding_dim,
            hidden_dim=hidden_dim
        )
        
        # User profile encoding
        self.profile_encoder = torch.nn.Sequential(
            torch.nn.Linear(5, hidden_dim),  # profile features
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, embedding_dim)
        )
        
        # Item feature encoding
        self.item_encoder = torch.nn.Sequential(
            torch.nn.Linear(embedding_dim + hidden_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, embedding_dim)
        )
        
        # Interaction modeling
        self.interaction_layers = torch.nn.Sequential(
            torch.nn.Linear(embedding_dim * 3, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(hidden_dim),
            torch.nn.Dropout(0.2),
            torch.nn.Linear(hidden_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(hidden_dim),
            torch.nn.Dropout(0.2),
            torch.nn.Linear(hidden_dim, 1)
        )
    
    def forward(
        self,
        features: PersonalizedRankingFeatures
    ) -> Dict[str, torch.Tensor]:
        # Get base embeddings
        embeddings = self.embedding_tables(
            KeyedJaggedTensor.from_lengths_sync(
                keys=["user_id", "item_id"],
                values=torch.cat([features.user_id, features.item_id]),
                lengths=torch.ones(len(features.user_id) * 2)
            )
        ).to_dict()
        
        # Encode user profile
        profile_features = torch.stack([
            features.user_profile.avg_price_level,
            features.user_profile.activity_level,
            features.user_profile.category_preferences,
            features.user_profile.time_of_day
        ], dim=1)
        
        profile_encoding = self.profile_encoder(profile_features)
        
        # Get historical item embeddings
        history_embeddings = self.embedding_tables(
            KeyedJaggedTensor.from_lengths_sync(
                keys=["item_id"],
                values=features.user_profile.historical_items,
                lengths=torch.ones(len(features.user_profile.historical_items))
            )
        ).to_dict()["item_embeddings"]
        
        # Model user interests
        user_interests = self.user_interest_model(
            history_embeddings,
            embeddings["user_embeddings"]
        )
        
        # Encode item features
        item_features = torch.cat([
            embeddings["item_embeddings"],
            torch.stack([
                features.item_features["price"],
                features.item_features["age"],
                features.item_features["popularity"]
            ], dim=1)
        ], dim=1)
        
        item_encoding = self.item_encoder(item_features)
        
        # Combine all representations
        combined_features = torch.cat([
            user_interests,
            profile_encoding,
            item_encoding
        ], dim=1)
        
        # Generate ranking score
        ranking_score = self.interaction_layers(combined_features)
        
        return {
            "score": ranking_score,
            "user_encoding": user_interests,
            "item_encoding": item_encoding
        }

## Enhanced Ranking Loss

In [None]:
class PersonalizedRankingLoss:
    """Enhanced ranking loss with multiple components"""
    def __init__(
        self,
        margin: float = 0.1,
        lambda_reg: float = 0.1
    ):
        self.margin = margin
        self.lambda_reg = lambda_reg
    
    def compute_loss(
        self,
        outputs: Dict[str, torch.Tensor],
        labels: torch.Tensor,
        user_history: torch.Tensor
    ) -> Dict[str, torch.Tensor]:
        scores = outputs["score"]
        user_encoding = outputs["user_encoding"]
        item_encoding = outputs["item_encoding"]
        
        # Basic ranking loss
        ranking_loss = torch.nn.functional.binary_cross_entropy_with_logits(
            scores.squeeze(),
            labels.float()
        )
        
        # Contrastive loss for similar items
        pos_encoding = item_encoding[labels == 1]
        neg_encoding = item_encoding[labels == 0]
        
        if len(pos_encoding) > 0 and len(neg_encoding) > 0:
            contrastive_loss = torch.nn.functional.triplet_margin_loss(
                anchor=user_encoding[labels == 1],
                positive=pos_encoding,
                negative=neg_encoding,
                margin=self.margin
            )
        else:
            contrastive_loss = torch.tensor(0.0, device=scores.device)
        
        # Regularization
        reg_loss = self.lambda_reg * (
            user_encoding.norm(2) + 
            item_encoding.norm(2)
        )
        
        total_loss = ranking_loss + contrastive_loss + reg_loss
        
        return {
            "total_loss": total_loss,
            "ranking_loss": ranking_loss,
            "contrastive_loss": contrastive_loss,
            "reg_loss": reg_loss
        }

## Enhanced Data Generation

In [None]:
class PersonalizedDataGenerator:
    """Generate data for personalized ranking"""
    def __init__(
        self,
        num_users: int,
        num_items: int,
        num_categories: int,
        max_history_length: int = 20
    ):
        self.num_users = num_users
        self.num_items = num_items
        self.num_categories = num_categories
        self.max_history_length = max_history_length
        
        # Generate item metadata
        self.item_categories = torch.randint(0, num_categories, (num_items,))
        self.item_prices = torch.rand(num_items) * 100
        self.item_ages = torch.rand(num_items) * 365
        self.item_popularity = torch.rand(num_items)
        
        # Generate user profiles
        self.user_histories = self._generate_user_histories()
        self.user_preferences = self._generate_user_preferences()
    
    def _generate_user_histories(self) -> Dict[int, List[int]]:
        """Generate synthetic user interaction histories"""
        histories = {}
        for user_id in range(self.num_users):
            history_length = np.random.randint(5, self.max_history_length)
            histories[user_id] = np.random.choice(
                self.num_items,
                size=history_length,
                replace=False
            ).tolist()
        return histories
    
    def _generate_user_preferences(self) -> Dict[int, Dict[str, float]]:
        """Generate synthetic user preferences"""
        preferences = {}
        for user_id in range(self.num_users):
            preferences[user_id] = {
                "price_sensitivity": np.random.beta(2, 5),  # Most users prefer lower prices
                "category_preferences": torch.rand(self.num_categories),
                "activity_level": np.random.beta(2, 2)
            }
        return preferences
    
    def generate_batch(
        self,
        batch_size: int,
        pos_ratio: float = 0.2
    ) -> Tuple[PersonalizedRankingFeatures, torch.Tensor]:
        # Sample users and items
        user_ids = torch.randint(0, self.num_users, (batch_size,))
        item_ids = torch.randint(0, self.num_items, (batch_size,))
        
        # Generate time features
        time_of_day = torch.rand(batch_size) * 24  # Hour of day
        
        # Build user profiles
        user_histories = []
        user_cats = []
        avg_prices = []
        activity_levels = []
        category_prefs = []
        
        for user_id in user_ids.numpy():
            # Get user history
            history = self.user_histories[user_id]
            user_histories.extend(history)
            user_cats.extend([self.item_categories[item_id] for item_id in history])
            
            # Get user preferences
            prefs = self.user_preferences[user_id]
            avg_prices.append(prefs["price_sensitivity"])
            activity_levels.append(prefs["activity_level"])
            category_prefs.append(prefs["category_preferences"])
        
        user_profile = UserProfile(
            historical_items=torch.tensor(user_histories),
            historical_cats=torch.tensor(user_cats),
            avg_price_level=torch.tensor(avg_prices),
            activity_level=torch.tensor(activity_levels),
            category_preferences=torch.stack(category_prefs),
            time_of_day=time_of_day
        )
        
        # Get item features
        item_features = {
            "price": self.item_prices[item_ids],
            "age": self.item_ages[item_ids],
            "popularity": self.item_popularity[item_ids],
            "category": self.item_categories[item_ids]
        }
        
        # Generate interaction features
        interaction_features = {
            "price_match": torch.abs(
                self.item_prices[item_ids] / 100 - 
                user_profile.avg_price_level
            ),
            "category_match": torch.tensor([
                prefs[cat.item()]
                for prefs, cat in zip(category_prefs, item_features["category"])
            ])
        }
        
        # Generate labels based on user preferences
        label_probs = (
            (1 - interaction_features["price_match"]) * 0.4 +
            interaction_features["category_match"] * 0.4 +
            item_features["popularity"] * 0.2
        )
        labels = torch.bernoulli(label_probs * pos_ratio)
        
        features = PersonalizedRankingFeatures(
            user_id=user_ids,
            item_id=item_ids,
            user_profile=user_profile,
            item_features=item_features,
            interaction_features=interaction_features
        )
        
        return features, labels

## Enhanced Training Infrastructure

In [None]:
class PersonalizedRankingTrainer:
    """Training infrastructure for personalized ranking"""
    def __init__(
        self,
        model: PersonalizedRankingModel,
        loss_fn: PersonalizedRankingLoss,
        learning_rate: float = 0.001,
        device: str = "cuda"
    ):
        self.model = model.to(device)
        self.loss_fn = loss_fn
        self.device = device
        
        # Optimizer with weight decay
        self.optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=learning_rate,
            weight_decay=0.01
        )
        
        # Learning rate scheduler
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer,
            mode='min',
            factor=0.5,
            patience=3,
            verbose=True
        )
    
    def train_step(
        self,
        features: PersonalizedRankingFeatures,
        labels: torch.Tensor,
        user_history: torch.Tensor
    ) -> Dict[str, float]:
        self.optimizer.zero_grad()
        
        # Move to device
        features = features.to(self.device)
        labels = labels.to(self.device)
        user_history = user_history.to(self.device)
        
        # Forward pass
        outputs = self.model(features)
        
        # Compute losses
        losses = self.loss_fn.compute_loss(outputs, labels, user_history)
        
        # Backward pass
        losses["total_loss"].backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
        
        # Update weights
        self.optimizer.step()
        
        return {k: v.item() for k, v in losses.items()}

## Enhanced Evaluation Metrics

In [None]:
class PersonalizedRankingEvaluator:
    """Advanced evaluation metrics for personalized ranking"""
    @staticmethod
    def compute_metrics(
        outputs: Dict[str, torch.Tensor],
        labels: torch.Tensor,
        features: PersonalizedRankingFeatures
    ) -> Dict[str, float]:
        scores = outputs["score"].detach().cpu().numpy()
        labels = labels.detach().cpu().numpy()
        
        metrics = {}
        
        # Standard ranking metrics
        metrics.update(PersonalizedRankingEvaluator._compute_ranking_metrics(
            scores, labels
        ))
        
        # Personalization metrics
        metrics.update(PersonalizedRankingEvaluator._compute_personalization_metrics(
            outputs, features
        ))
        
        return metrics
    
    @staticmethod
    def _compute_ranking_metrics(
        scores: np.ndarray,
        labels: np.ndarray
    ) -> Dict[str, float]:
        # Sort by scores
        sorted_indices = np.argsort(-scores.squeeze())
        sorted_labels = labels[sorted_indices]
        
        metrics = {}
        
        # NDCG@k
        for k in [5, 10, 20]:
            metrics[f"ndcg@{k}"] = PersonalizedRankingEvaluator._compute_ndcg(
                sorted_labels, k
            )
        
        # MAP@k
        for k in [5, 10, 20]:
            metrics[f"map@{k}"] = PersonalizedRankingEvaluator._compute_map(
                sorted_labels, k
            )
        
        return metrics
    
    @staticmethod
    def _compute_personalization_metrics(
        outputs: Dict[str, torch.Tensor],
        features: PersonalizedRankingFeatures
    ) -> Dict[str, float]:
        metrics = {}
        
        # Category diversity
        recommended_cats = features.item_features["category"][
            outputs["score"].squeeze().argsort(descending=True)[:10]
        ]
        metrics["category_diversity"] = len(set(recommended_cats.tolist())) / 10
        
        # Price range coverage
        recommended_prices = features.item_features["price"][
            outputs["score"].squeeze().argsort(descending=True)[:10]
        ]
        metrics["price_range"] = (
            recommended_prices.max() - recommended_prices.min()
        ) / 100
        
        return metrics
    
    @staticmethod
    def _compute_ndcg(labels: np.ndarray, k: int) -> float:
        """Compute NDCG@k"""
        if len(labels) < k:
            k = len(labels)
        
        dcg = np.sum(
            labels[:k] / np.log2(np.arange(2, k + 2))
        )
        
        ideal_labels = np.sort(labels)[::-1]
        idcg = np.sum(
            ideal_labels[:k] / np.log2(np.arange(2, k + 2))
        )
        
        return dcg / idcg if idcg > 0 else 0.0
    
    @staticmethod
    def _compute_map(labels: np.ndarray, k: int) -> float:
        """Compute MAP@k"""
        if len(labels) < k:
            k = len(labels)
        
        precision_sum = 0
        relevant_count = 0
        
        for i in range(k):
            if labels[i] == 1:
                relevant_count += 1
                precision_sum += relevant_count / (i + 1)
        
        return precision_sum / k if k > 0 else 0.0

## Complete Training Loop

In [None]:
def train_personalized_ranking_model():
    # Initialize components
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model = PersonalizedRankingModel(
        num_users=10000,
        num_items=1000,
        num_categories=100
    )
    
    loss_fn = PersonalizedRankingLoss()
    trainer = PersonalizedRankingTrainer(model, loss_fn, device=device)
    evaluator = PersonalizedRankingEvaluator()
    
    data_gen = PersonalizedDataGenerator(
        num_users=10000,
        num_items=1000,
        num_categories=100
    )
    
    # Training loop
    num_epochs = 10
    batch_size = 128
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}")
        
        # Training
        model.train()
        epoch_losses = defaultdict(list)
        
        for batch in range(100):
            features, labels = data_gen.generate_batch(batch_size)
            losses = trainer.train_step(
                features,
                labels,
                features.user_profile.historical_items
            )
            
            for k, v in losses.items():
                epoch_losses[k].append(v)
            
            if batch % 10 == 0:
                print(f"Batch {batch}, Loss: {losses['total_loss']:.4f}")
        
        # Print epoch metrics
        print("\nTraining Metrics:")
        for k, v in epoch_losses.items():
            print(f"{k}: {np.mean(v):.4f}")
        
        # Evaluation
        model.eval()
        eval_metrics = defaultdict(list)
        
        with torch.no_grad():
            for _ in range(10):  # 10 eval batches
                features, labels = data_gen.generate_batch(256)  # Larger eval batch
                features = features.to(device)
                labels = labels.to(device)
                
                outputs = model(features)
                metrics = evaluator.compute_metrics(outputs, labels, features)
                
                for k, v in metrics.items():
                    eval_metrics[k].append(v)
        
        print("\nEvaluation Metrics:")
        for k, v in eval_metrics.items():
            print(f"{k}: {np.mean(v):.4f}")
        
        # Update learning rate
        trainer.scheduler.step(np.mean(epoch_losses["total_loss"]))

if __name__ == "__main__":
    train_personalized_ranking_model()