In [2]:
# Simple BPR Reranking with Accuracy Metrics Only

import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.model_selection import train_test_split

def evaluate_bpr_accuracy(model, user_mapping, item_mapping, test_df, k=10):
    """
    Evaluate BPR model accuracy metrics only
    
    Parameters:
    - model: trained BPR model
    - user_mapping: mapping from original user IDs to matrix indices
    - item_mapping: mapping from original item IDs to matrix indices
    - test_df: test set DataFrame
    - k: number of recommendations to evaluate
    
    Returns:
    - metrics: dictionary with accuracy metrics
    """
    reverse_user_mapping = {v: k for k, v in user_mapping.items()}
    reverse_item_mapping = {v: k for k, v in item_mapping.items()}
    
    # Create test set ground truth
    test_relevant_items = defaultdict(list)
    test_relevant_scores = defaultdict(list)
    
    for _, row in test_df.iterrows():
        user_id = row['user_id']
        item_id = row['item_id']
        rating = row['rating']
        
        # Only include users and items that exist in our mappings
        if user_id in user_mapping and item_id in item_mapping:
            test_relevant_items[user_id].append(item_id)
            test_relevant_scores[user_id].append(rating)
    
    # Initialize metrics
    ndcg_scores = []
    precision_scores = []
    recall_scores = []
    
    # Evaluate for each user in test set
    for user_id in test_relevant_items:
        # Skip if user has no relevant items
        if not test_relevant_items[user_id]:
            continue
        
        # Get model predictions
        user_idx = user_mapping[user_id]
        recommended_items_idx = model.recommend(user_idx, n=k)
        recommended_items = [reverse_item_mapping[idx] for idx in recommended_items_idx]
        
        # Calculate NDCG
        ndcg = calculate_ndcg(recommended_items, 
                             test_relevant_items[user_id],
                             test_relevant_scores[user_id])
        ndcg_scores.append(ndcg)
        
        # Calculate Precision
        precision = calculate_precision(recommended_items, test_relevant_items[user_id])
        precision_scores.append(precision)
        
        # Calculate Recall
        recall = calculate_recall(recommended_items, test_relevant_items[user_id])
        recall_scores.append(recall)
    
    # Calculate average metrics
    avg_ndcg = np.mean(ndcg_scores) if ndcg_scores else 0
    avg_precision = np.mean(precision_scores) if precision_scores else 0
    avg_recall = np.mean(recall_scores) if recall_scores else 0
    
    print(f"NDCG@{k}: {avg_ndcg:.4f}")
    print(f"Precision@{k}: {avg_precision:.4f}")
    print(f"Recall@{k}: {avg_recall:.4f}")
    print(f"Users evaluated: {len(ndcg_scores)}")
    
    metrics = {
        f'ndcg@{k}': avg_ndcg,
        f'precision@{k}': avg_precision,
        f'recall@{k}': avg_recall
    }
    
    return metrics

def calculate_ndcg(recommended_items, relevant_items, relevant_scores, k=None):
    """
    Calculate Normalized Discounted Cumulative Gain
    
    Parameters:
    - recommended_items: list of recommended item IDs
    - relevant_items: list of ground truth relevant item IDs
    - relevant_scores: list of relevance scores (ratings) for the relevant items
    - k: cutoff for calculation (use all items if None)
    
    Returns:
    - ndcg: NDCG score
    """
    if k is None:
        k = len(recommended_items)
    else:
        k = min(k, len(recommended_items))
    
    # Create a dictionary mapping relevant items to their scores
    relevance_map = {item_id: score for item_id, score in zip(relevant_items, relevant_scores)}
    
    # Calculate DCG
    dcg = 0
    for i, item_id in enumerate(recommended_items[:k]):
        if item_id in relevance_map:
            # Use rating as relevance score
            rel = relevance_map[item_id]
            # DCG formula: (2^rel - 1) / log2(i+2)
            dcg += (2 ** rel - 1) / np.log2(i + 2)
    
    # Calculate ideal DCG (IDCG)
    # Sort relevant items by their relevance scores in descending order
    sorted_relevant = sorted(zip(relevant_items, relevant_scores), 
                           key=lambda x: x[1], reverse=True)
    
    idcg = 0
    for i, (item_id, rel) in enumerate(sorted_relevant[:k]):
        # IDCG formula: (2^rel - 1) / log2(i+2)
        idcg += (2 ** rel - 1) / np.log2(i + 2)
    
    # Avoid division by zero
    if idcg == 0:
        return 0
    
    # Calculate NDCG
    ndcg = dcg / idcg
    
    return ndcg

def calculate_precision(recommended_items, relevant_items):
    """
    Calculate Precision@k
    
    Parameters:
    - recommended_items: list of recommended item IDs
    - relevant_items: list of ground truth relevant item IDs
    
    Returns:
    - precision: Precision@k score
    """
    # Count number of relevant items in recommended items
    num_relevant_recommended = sum(1 for item in recommended_items if item in relevant_items)
    
    # Calculate precision
    precision = num_relevant_recommended / len(recommended_items) if recommended_items else 0
    
    return precision

def calculate_recall(recommended_items, relevant_items):
    """
    Calculate Recall@k
    
    Parameters:
    - recommended_items: list of recommended item IDs
    - relevant_items: list of ground truth relevant item IDs
    
    Returns:
    - recall: Recall@k score
    """
    # Count number of relevant items in recommended items
    num_relevant_recommended = sum(1 for item in recommended_items if item in relevant_items)
    
    # Calculate recall
    recall = num_relevant_recommended / len(relevant_items) if relevant_items else 0
    
    return recall

class SimpleReranker:
    """
    Simple reranker that balances original scores with diversity
    """
    def __init__(self, model, alpha=0.7):
        """
        Initialize reranker
        
        Parameters:
        - model: trained BPR model
        - alpha: weight for original scores (between 0 and 1)
                 higher alpha means more focus on accuracy
        """
        self.model = model
        self.alpha = alpha
        
        # Calculate item popularity
        self.item_popularity = np.zeros(model.n_items)
        for user in range(model.n_users):
            if user in model.user_items:
                for item in model.user_items[user]:
                    self.item_popularity[item] += 1
        
        # Normalize popularity
        max_pop = np.max(self.item_popularity)
        if max_pop > 0:
            self.norm_popularity = self.item_popularity / max_pop
        else:
            self.norm_popularity = np.zeros_like(self.item_popularity)
    
    def rerank(self, user_id, n=10):
        """
        Generate reranked recommendations
        
        Parameters:
        - user_id: user index in the model
        - n: number of recommendations to return
        
        Returns:
        - reranked_items: list of reranked item indices
        """
        # Get original scores for all items
        original_scores = np.dot(self.model.user_factors[user_id], self.model.item_factors.T)
        
        # Exclude seen items
        if user_id in self.model.user_items:
            seen_items = list(self.model.user_items[user_id])
            original_scores[seen_items] = -np.inf
        
        # Get candidate items (top k*3)
        candidates = np.argsort(original_scores)[::-1][:n*3]
        
        # Initialize selected items and scores
        selected = []
        
        # Iteratively select items
        while len(selected) < n:
            best_score = -np.inf
            best_item = None
            
            for item in candidates:
                if item in selected:
                    continue
                
                # Original score component (normalized)
                score_orig = original_scores[item]
                
                # Diversity component
                diversity_score = 0
                if selected:
                    # Use item factors to calculate similarity
                    item_factors = self.model.item_factors[item]
                    selected_factors = self.model.item_factors[selected]
                    
                    # Calculate average similarity
                    similarities = []
                    for i, sel_factors in enumerate(selected_factors):
                        # Cosine similarity
                        dot_product = np.dot(item_factors, sel_factors)
                        norm_product = np.linalg.norm(item_factors) * np.linalg.norm(sel_factors)
                        
                        if norm_product > 0:
                            sim = dot_product / norm_product
                            similarities.append(sim)
                    
                    if similarities:
                        avg_sim = np.mean(similarities)
                        diversity_score = 1 - avg_sim
                
                # Novelty component (inverse popularity)
                novelty_score = 1 - self.norm_popularity[item]
                
                # Calculate weighted score
                combined_score = (
                    self.alpha * score_orig + 
                    (1 - self.alpha) * 0.5 * diversity_score + 
                    (1 - self.alpha) * 0.5 * novelty_score
                )
                
                if combined_score > best_score:
                    best_score = combined_score
                    best_item = item
            
            if best_item is None:
                break
                
            selected.append(best_item)
            
        return selected

def simple_reranking_evaluation():
    """
    Run a simple evaluation of original BPR vs reranked recommendations
    """
    print("Loading MovieLens 100K dataset...")
    ratings_df, movie_df = load_movielens_100k()
    
    print("Splitting data for evaluation...")
    train_df, test_df = train_test_split(
        ratings_df, 
        test_size=0.2, 
        stratify=ratings_df['user_id'], 
        random_state=42
    )
    
    print("Creating user-item matrix...")
    user_item_matrix, user_mapping, item_mapping = create_user_item_matrix(train_df)
    
    print("Training BPR model...")
    model = BPRRecommender(factors=50, learning_rate=0.01, 
                          regularization=0.01, iterations=30)
    model.fit(user_item_matrix)
    
    print("\nEvaluating original BPR recommendations...")
    original_metrics = evaluate_bpr_accuracy(
        model=model,
        user_mapping=user_mapping,
        item_mapping=item_mapping,
        test_df=test_df,
        k=10
    )
    
    print("\nInitializing reranker...")
    reranker = SimpleReranker(model=model, alpha=0.7)
    
    print("\nEvaluating reranked recommendations...")
    # Initialize metrics
    ndcg_scores = []
    precision_scores = []
    recall_scores = []
    
    # Create test set ground truth
    test_relevant_items = defaultdict(list)
    test_relevant_scores = defaultdict(list)
    
    reverse_user_mapping = {v: k for k, v in user_mapping.items()}
    reverse_item_mapping = {v: k for k, v in item_mapping.items()}
    
    for _, row in test_df.iterrows():
        user_id = row['user_id']
        item_id = row['item_id']
        rating = row['rating']
        
        # Only include users and items that exist in our mappings
        if user_id in user_mapping and item_id in item_mapping:
            test_relevant_items[user_id].append(item_id)
            test_relevant_scores[user_id].append(rating)
    
    # Evaluate reranking for each user
    for user_id in test_relevant_items:
        # Skip if user has no relevant items
        if not test_relevant_items[user_id]:
            continue
        
        # Get user index
        user_idx = user_mapping[user_id]
        
        # Get reranked recommendations
        reranked_items_idx = reranker.rerank(user_idx, n=10)
        reranked_items = [reverse_item_mapping[idx] for idx in reranked_items_idx]
        
        # Calculate NDCG
        ndcg = calculate_ndcg(reranked_items, 
                            test_relevant_items[user_id],
                            test_relevant_scores[user_id])
        ndcg_scores.append(ndcg)
        
        # Calculate Precision
        precision = calculate_precision(reranked_items, test_relevant_items[user_id])
        precision_scores.append(precision)
        
        # Calculate Recall
        recall = calculate_recall(reranked_items, test_relevant_items[user_id])
        recall_scores.append(recall)
    
    # Calculate average metrics
    avg_ndcg = np.mean(ndcg_scores) if ndcg_scores else 0
    avg_precision = np.mean(precision_scores) if precision_scores else 0
    avg_recall = np.mean(recall_scores) if recall_scores else 0
    
    reranked_metrics = {
        'ndcg@10': avg_ndcg,
        'precision@10': avg_precision,
        'recall@10': avg_recall
    }
    
    print(f"NDCG@10: {avg_ndcg:.4f}")
    print(f"Precision@10: {avg_precision:.4f}")
    print(f"Recall@10: {avg_recall:.4f}")
    print(f"Users evaluated: {len(ndcg_scores)}")
    
    # Print comparison
    print("\nMetrics Comparison:")
    print(f"{'Metric':<15} {'Original':<15} {'Reranked':<15} {'Change (%)':<15}")
    print("-" * 60)
    
    for metric in ['ndcg@10', 'precision@10', 'recall@10']:
        orig = original_metrics[metric]
        rerank = reranked_metrics[metric]
        change = ((rerank - orig) / orig) * 100 if orig > 0 else float('inf')
        print(f"{metric:<15} {orig:.4f}{' '*10} {rerank:.4f}{' '*10} {change:+.2f}%")

# Include the BPR implementation directly instead of importing
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
import random
from collections import defaultdict
from sklearn.metrics.pairwise import cosine_similarity

class BPRRecommender:
    def __init__(self, factors=50, learning_rate=0.01, regularization=0.01, iterations=50, random_state=42):
        """
        Bayesian Personalized Ranking (BPR) recommender algorithm
        
        Parameters:
        - factors: dimensionality of latent factors
        - learning_rate: step size for gradient descent
        - regularization: regularization term to prevent overfitting
        - iterations: number of training iterations
        - random_state: random seed for reproducibility
        """
        self.factors = factors
        self.learning_rate = learning_rate
        self.regularization = regularization
        self.iterations = iterations
        self.random_state = random_state
        np.random.seed(random_state)
        
    def fit(self, user_item_matrix):
        """
        Train the BPR model on the user-item matrix
        
        Parameters:
        - user_item_matrix: scipy sparse matrix with user-item interactions
        
        Returns:
        - self
        """
        self.user_item_matrix = user_item_matrix
        self.n_users, self.n_items = user_item_matrix.shape
        
        # Initialize latent factors
        self.user_factors = np.random.normal(0, 0.1, (self.n_users, self.factors))
        self.item_factors = np.random.normal(0, 0.1, (self.n_items, self.factors))
        
        # Create a dictionary of items each user has interacted with
        self.user_items = defaultdict(set)
        for user, item in zip(*self.user_item_matrix.nonzero()):
            self.user_items[user].add(item)
        
        # Training loop
        for iteration in range(self.iterations):
            # Sample triplets for training
            for _ in range(user_item_matrix.nnz):
                user, pos_item, neg_item = self._sample_triplet()
                self._update_factors(user, pos_item, neg_item)
            
            # Print progress
            if (iteration + 1) % 10 == 0:
                print(f"Completed iteration {iteration + 1}/{self.iterations}")
                
        return self
    
    def _sample_triplet(self):
        """
        Sample a (user, positive_item, negative_item) triplet for training
        
        Returns:
        - user: sampled user index
        - pos_item: a positive item (one the user has interacted with)
        - neg_item: a negative item (one the user has not interacted with)
        """
        # Sample a user who has rated at least one item
        user = random.choice(list(self.user_items.keys()))
        
        # Sample a positive item (one the user has interacted with)
        pos_item = random.choice(list(self.user_items[user]))
        
        # Sample a negative item (one the user has not interacted with)
        neg_item = random.randint(0, self.n_items - 1)
        while neg_item in self.user_items[user]:
            neg_item = random.randint(0, self.n_items - 1)
            
        return user, pos_item, neg_item
    
    def _update_factors(self, user, pos_item, neg_item):
        """
        Update model parameters based on a triplet
        
        Parameters:
        - user: user index
        - pos_item: positive item index
        - neg_item: negative item index
        """
        # Calculate prediction for positive and negative items
        pos_pred = np.dot(self.user_factors[user], self.item_factors[pos_item])
        neg_pred = np.dot(self.user_factors[user], self.item_factors[neg_item])
        
        # Calculate prediction difference
        diff = neg_pred - pos_pred
        
        # Calculate sigmoid gradient
        sigmoid = 1.0 / (1.0 + np.exp(-diff))
        
        # Calculate gradients
        grad_user = sigmoid * (self.item_factors[neg_item] - self.item_factors[pos_item]) + self.regularization * self.user_factors[user]
        grad_pos_item = sigmoid * (-self.user_factors[user]) + self.regularization * self.item_factors[pos_item]
        grad_neg_item = sigmoid * self.user_factors[user] + self.regularization * self.item_factors[neg_item]
        
        # Update factors
        self.user_factors[user] -= self.learning_rate * grad_user
        self.item_factors[pos_item] -= self.learning_rate * grad_pos_item
        self.item_factors[neg_item] -= self.learning_rate * grad_neg_item
    
    def recommend(self, user_id, n=10, exclude_seen=True):
        """
        Generate item recommendations for a user
        
        Parameters:
        - user_id: user index
        - n: number of recommendations to generate
        - exclude_seen: whether to exclude items the user has already interacted with
        
        Returns:
        - list of n recommended item indices
        """
        # Calculate predicted scores for all items
        scores = np.dot(self.user_factors[user_id], self.item_factors.T)
        
        # If requested, exclude items the user has already interacted with
        if exclude_seen and user_id in self.user_items:
            seen_items = list(self.user_items[user_id])
            scores[seen_items] = -np.inf
            
        # Get top n items by score
        top_items = np.argsort(scores)[::-1][:n]
        
        return top_items
    
    def get_similar_items(self, item_id, n=10):
        """
        Find items similar to a given item based on latent factors
        
        Parameters:
        - item_id: item index
        - n: number of similar items to retrieve
        
        Returns:
        - list of n similar item indices
        """
        # Calculate cosine similarity between the item and all other items
        sim_scores = cosine_similarity([self.item_factors[item_id]], self.item_factors)[0]
        
        # Exclude the item itself
        sim_scores[item_id] = -np.inf
        
        # Get top n items by similarity
        similar_items = np.argsort(sim_scores)[::-1][:n]
        
        return similar_items


def load_movielens_100k(path="ml-100k"):
    """
    Load the MovieLens 100K dataset
    
    Parameters:
    - path: path to the ML-100K dataset directory
    
    Returns:
    - ratings_df: pandas DataFrame with columns 'user_id', 'item_id', 'rating'
    - movie_df: pandas DataFrame with movie information
    """
    # Load ratings
    ratings_df = pd.read_csv(f"{path}/u.data", sep='\t', 
                           names=['user_id', 'item_id', 'rating', 'timestamp'])
    
    # Load movie information
    movie_df = pd.read_csv(f"{path}/u.item", sep='|', encoding='latin-1',
                          names=['item_id', 'title', 'release_date', 'video_release_date',
                                 'IMDb_URL'] + [f'genre_{i}' for i in range(19)])
    
    return ratings_df, movie_df


def create_user_item_matrix(ratings_df):
    """
    Create a sparse user-item interaction matrix from ratings
    
    Parameters:
    - ratings_df: pandas DataFrame with columns 'user_id', 'item_id', 'rating'
    
    Returns:
    - user_item_matrix: scipy sparse matrix with user-item interactions
    - user_mapping: dict mapping original user IDs to matrix indices
    - item_mapping: dict mapping original item IDs to matrix indices
    """
    # Create mappings from original IDs to matrix indices
    user_ids = ratings_df['user_id'].unique()
    item_ids = ratings_df['item_id'].unique()
    
    user_mapping = {user_id: i for i, user_id in enumerate(user_ids)}
    item_mapping = {item_id: i for i, item_id in enumerate(item_ids)}
    
    # Map original IDs to matrix indices
    rows = ratings_df['user_id'].map(user_mapping)
    cols = ratings_df['item_id'].map(item_mapping)
    
    # Create binary matrix (1 if interaction exists, 0 otherwise)
    data = np.ones(len(ratings_df))
    user_item_matrix = csr_matrix((data, (rows, cols)), 
                                 shape=(len(user_mapping), len(item_mapping)))
    
    return user_item_matrix, user_mapping, item_mapping

if __name__ == "__main__":
    simple_reranking_evaluation()

Loading MovieLens 100K dataset...
Splitting data for evaluation...
Creating user-item matrix...
Training BPR model...
Completed iteration 10/30
Completed iteration 20/30
Completed iteration 30/30

Evaluating original BPR recommendations...
NDCG@10: 0.2808
Precision@10: 0.3031
Recall@10: 0.1953
Users evaluated: 943

Initializing reranker...

Evaluating reranked recommendations...
NDCG@10: 0.2786
Precision@10: 0.3043
Recall@10: 0.1946
Users evaluated: 943

Metrics Comparison:
Metric          Original        Reranked        Change (%)     
------------------------------------------------------------
ndcg@10         0.2808           0.2786           -0.78%
precision@10    0.3031           0.3043           +0.42%
recall@10       0.1953           0.1946           -0.35%


In [3]:
# BPR Reranking with Diversity Metrics Only

import numpy as np
import pandas as pd
from collections import defaultdict, Counter
import math
from sklearn.model_selection import train_test_split
from scipy.sparse import csr_matrix

def evaluate_diversity_metrics(model, recommendations, total_items, k=10):
    """
    Evaluate diversity metrics for a set of recommendations
    
    Parameters:
    - model: trained BPR model
    - recommendations: list of recommended item indices
    - total_items: total number of items in the catalog
    - k: number of recommendations per user
    
    Returns:
    - metrics: dictionary with diversity metrics
    """
    # Count recommendations for each item
    rec_counts = Counter(recommendations)
    
    # Calculate item popularity from the training data
    item_popularity = np.zeros(model.n_items)
    for user in range(model.n_users):
        if user in model.user_items:
            for item in model.user_items[user]:
                item_popularity[item] += 1
    
    # Sort items by popularity for tail calculation
    sorted_pop_indices = np.argsort(item_popularity)
    num_tail_items = int(len(sorted_pop_indices) * 0.2)  # 20% least popular items
    tail_items = set(sorted_pop_indices[:num_tail_items])
    
    # 1. Item Coverage
    recommended_items = len(rec_counts)
    item_coverage = recommended_items / total_items
    
    # 2. Gini Index
    sorted_counts = sorted(rec_counts.values())
    n = len(sorted_counts)
    
    if n == 0:
        gini_index = 0
    else:
        cumulative_sum = 0
        for i, count in enumerate(sorted_counts):
            cumulative_sum += (i + 1) * count
        
        # Gini index formula
        gini_index = (2 * cumulative_sum) / (n * sum(sorted_counts)) - (n + 1) / n
    
    # 3. Shannon Entropy
    recommendations_count = sum(rec_counts.values())
    probabilities = [count / recommendations_count for count in rec_counts.values()]
    entropy = -sum(p * np.log2(p) for p in probabilities if p > 0)
    
    # Normalize entropy
    max_entropy = np.log2(min(total_items, recommendations_count))
    normalized_entropy = entropy / max_entropy if max_entropy > 0 else 0
    
    # 4. Tail Percentage
    tail_recommendations = sum(1 for item in recommendations if item in tail_items)
    tail_percentage = tail_recommendations / len(recommendations) if recommendations else 0
    
    # Create results dictionary
    metrics = {
        'item_coverage': item_coverage,
        'gini_index': gini_index,
        'shannon_entropy': normalized_entropy,
        'tail_percentage': tail_percentage
    }
    
    print(f"Item Coverage: {item_coverage:.4f} ({recommended_items} out of {total_items} items)")
    print(f"Gini Index: {gini_index:.4f} (0 is perfect equality, 1 is perfect inequality)")
    print(f"Shannon Entropy: {normalized_entropy:.4f} (normalized, higher is more diverse)")
    print(f"Tail Percentage: {tail_percentage:.4f} ({tail_recommendations} out of {len(recommendations)} recommendations)")
    
    return metrics

class SimpleReranker:
    """
    Simple reranker that balances original scores with diversity
    """
    def __init__(self, model, alpha=0.7):
        """
        Initialize reranker
        
        Parameters:
        - model: trained BPR model
        - alpha: weight for original scores (between 0 and 1)
                 higher alpha means more focus on accuracy
        """
        self.model = model
        self.alpha = alpha
        
        # Calculate item popularity
        self.item_popularity = np.zeros(model.n_items)
        for user in range(model.n_users):
            if user in model.user_items:
                for item in model.user_items[user]:
                    self.item_popularity[item] += 1
        
        # Normalize popularity
        max_pop = np.max(self.item_popularity)
        if max_pop > 0:
            self.norm_popularity = self.item_popularity / max_pop
        else:
            self.norm_popularity = np.zeros_like(self.item_popularity)
    
    def rerank(self, user_id, n=10):
        """
        Generate reranked recommendations
        
        Parameters:
        - user_id: user index in the model
        - n: number of recommendations to return
        
        Returns:
        - reranked_items: list of reranked item indices
        """
        # Get original scores for all items
        original_scores = np.dot(self.model.user_factors[user_id], self.model.item_factors.T)
        
        # Exclude seen items
        if user_id in self.model.user_items:
            seen_items = list(self.model.user_items[user_id])
            original_scores[seen_items] = -np.inf
        
        # Get candidate items (top k*3)
        candidates = np.argsort(original_scores)[::-1][:n*3]
        
        # Initialize selected items and scores
        selected = []
        
        # Iteratively select items
        while len(selected) < n:
            best_score = -np.inf
            best_item = None
            
            for item in candidates:
                if item in selected:
                    continue
                
                # Original score component (normalized)
                score_orig = original_scores[item]
                
                # Diversity component
                diversity_score = 0
                if selected:
                    # Use item factors to calculate similarity
                    item_factors = self.model.item_factors[item]
                    selected_factors = self.model.item_factors[selected]
                    
                    # Calculate average similarity
                    similarities = []
                    for i, sel_factors in enumerate(selected_factors):
                        # Cosine similarity
                        dot_product = np.dot(item_factors, sel_factors)
                        norm_product = np.linalg.norm(item_factors) * np.linalg.norm(sel_factors)
                        
                        if norm_product > 0:
                            sim = dot_product / norm_product
                            similarities.append(sim)
                    
                    if similarities:
                        avg_sim = np.mean(similarities)
                        diversity_score = 1 - avg_sim
                
                # Novelty component (inverse popularity)
                novelty_score = 1 - self.norm_popularity[item]
                
                # Calculate weighted score
                combined_score = (
                    self.alpha * score_orig + 
                    (1 - self.alpha) * 0.5 * diversity_score + 
                    (1 - self.alpha) * 0.5 * novelty_score
                )
                
                if combined_score > best_score:
                    best_score = combined_score
                    best_item = item
            
            if best_item is None:
                break
                
            selected.append(best_item)
            
        return selected

def diversity_reranking_evaluation():
    """
    Run a simple evaluation of original BPR vs reranked recommendations using diversity metrics
    """
    print("Loading MovieLens 100K dataset...")
    ratings_df, movie_df = load_movielens_100k()
    
    print("Splitting data for evaluation...")
    train_df, test_df = train_test_split(
        ratings_df, 
        test_size=0.2, 
        stratify=ratings_df['user_id'], 
        random_state=42
    )
    
    print("Creating user-item matrix...")
    user_item_matrix, user_mapping, item_mapping = create_user_item_matrix(train_df)
    
    print("Training BPR model...")
    model = BPRRecommender(factors=50, learning_rate=0.01, 
                          regularization=0.01, iterations=30)
    model.fit(user_item_matrix)
    
    print("\nGenerating original BPR recommendations for all users...")
    all_original_recs = []
    # Get recommendations for a sample of users (or all users if feasible)
    num_users = model.n_users  # Use all users
    
    for user_idx in range(num_users):
        # Skip users with no interactions
        if user_idx not in model.user_items:
            continue
        # Get recommendations
        recs = model.recommend(user_idx, n=10)
        all_original_recs.extend(recs)
    
    print("\nEvaluating diversity metrics for original BPR recommendations...")
    original_metrics = evaluate_diversity_metrics(
        model=model,
        recommendations=all_original_recs,
        total_items=model.n_items
    )
    
    print("\nInitializing reranker...")
    reranker = SimpleReranker(model=model, alpha=0.7)
    
    print("\nGenerating reranked recommendations for all users...")
    all_reranked_recs = []
    
    for user_idx in range(num_users):
        # Skip users with no interactions
        if user_idx not in model.user_items:
            continue
        # Get reranked recommendations
        recs = reranker.rerank(user_idx, n=10)
        all_reranked_recs.extend(recs)
    
    print("\nEvaluating diversity metrics for reranked recommendations...")
    reranked_metrics = evaluate_diversity_metrics(
        model=model,
        recommendations=all_reranked_recs,
        total_items=model.n_items
    )
    
    # Print comparison
    print("\nDiversity Metrics Comparison:")
    print(f"{'Metric':<20} {'Original':<15} {'Reranked':<15} {'Change (%)':<15}")
    print("-" * 65)
    
    for metric in ['item_coverage', 'gini_index', 'shannon_entropy', 'tail_percentage']:
        orig = original_metrics[metric]
        rerank = reranked_metrics[metric]
        change = ((rerank - orig) / orig) * 100 if orig > 0 else float('inf')
        print(f"{metric:<20} {orig:.4f}{' '*10} {rerank:.4f}{' '*10} {change:+.2f}%")

# BPR Implementation
class BPRRecommender:
    def __init__(self, factors=50, learning_rate=0.01, regularization=0.01, iterations=50, random_state=42):
        """
        Bayesian Personalized Ranking (BPR) recommender algorithm
        
        Parameters:
        - factors: dimensionality of latent factors
        - learning_rate: step size for gradient descent
        - regularization: regularization term to prevent overfitting
        - iterations: number of training iterations
        - random_state: random seed for reproducibility
        """
        self.factors = factors
        self.learning_rate = learning_rate
        self.regularization = regularization
        self.iterations = iterations
        self.random_state = random_state
        np.random.seed(random_state)
        
    def fit(self, user_item_matrix):
        """
        Train the BPR model on the user-item matrix
        
        Parameters:
        - user_item_matrix: scipy sparse matrix with user-item interactions
        
        Returns:
        - self
        """
        self.user_item_matrix = user_item_matrix
        self.n_users, self.n_items = user_item_matrix.shape
        
        # Initialize latent factors
        self.user_factors = np.random.normal(0, 0.1, (self.n_users, self.factors))
        self.item_factors = np.random.normal(0, 0.1, (self.n_items, self.factors))
        
        # Create a dictionary of items each user has interacted with
        self.user_items = defaultdict(set)
        for user, item in zip(*self.user_item_matrix.nonzero()):
            self.user_items[user].add(item)
        
        # Training loop
        for iteration in range(self.iterations):
            # Sample triplets for training
            for _ in range(user_item_matrix.nnz):
                user, pos_item, neg_item = self._sample_triplet()
                self._update_factors(user, pos_item, neg_item)
            
            # Print progress
            if (iteration + 1) % 10 == 0:
                print(f"Completed iteration {iteration + 1}/{self.iterations}")
                
        return self
    
    def _sample_triplet(self):
        """
        Sample a (user, positive_item, negative_item) triplet for training
        
        Returns:
        - user: sampled user index
        - pos_item: a positive item (one the user has interacted with)
        - neg_item: a negative item (one the user has not interacted with)
        """
        # Sample a user who has rated at least one item
        user = random.choice(list(self.user_items.keys()))
        
        # Sample a positive item (one the user has interacted with)
        pos_item = random.choice(list(self.user_items[user]))
        
        # Sample a negative item (one the user has not interacted with)
        neg_item = random.randint(0, self.n_items - 1)
        while neg_item in self.user_items[user]:
            neg_item = random.randint(0, self.n_items - 1)
            
        return user, pos_item, neg_item
    
    def _update_factors(self, user, pos_item, neg_item):
        """
        Update model parameters based on a triplet
        
        Parameters:
        - user: user index
        - pos_item: positive item index
        - neg_item: negative item index
        """
        # Calculate prediction for positive and negative items
        pos_pred = np.dot(self.user_factors[user], self.item_factors[pos_item])
        neg_pred = np.dot(self.user_factors[user], self.item_factors[neg_item])
        
        # Calculate prediction difference
        diff = neg_pred - pos_pred
        
        # Calculate sigmoid gradient
        sigmoid = 1.0 / (1.0 + np.exp(-diff))
        
        # Calculate gradients
        grad_user = sigmoid * (self.item_factors[neg_item] - self.item_factors[pos_item]) + self.regularization * self.user_factors[user]
        grad_pos_item = sigmoid * (-self.user_factors[user]) + self.regularization * self.item_factors[pos_item]
        grad_neg_item = sigmoid * self.user_factors[user] + self.regularization * self.item_factors[neg_item]
        
        # Update factors
        self.user_factors[user] -= self.learning_rate * grad_user
        self.item_factors[pos_item] -= self.learning_rate * grad_pos_item
        self.item_factors[neg_item] -= self.learning_rate * grad_neg_item
    
    def recommend(self, user_id, n=10, exclude_seen=True):
        """
        Generate item recommendations for a user
        
        Parameters:
        - user_id: user index
        - n: number of recommendations to generate
        - exclude_seen: whether to exclude items the user has already interacted with
        
        Returns:
        - list of n recommended item indices
        """
        # Calculate predicted scores for all items
        scores = np.dot(self.user_factors[user_id], self.item_factors.T)
        
        # If requested, exclude items the user has already interacted with
        if exclude_seen and user_id in self.user_items:
            seen_items = list(self.user_items[user_id])
            scores[seen_items] = -np.inf
            
        # Get top n items by score
        top_items = np.argsort(scores)[::-1][:n]
        
        return top_items
    
    def get_similar_items(self, item_id, n=10):
        """
        Find items similar to a given item based on latent factors
        
        Parameters:
        - item_id: item index
        - n: number of similar items to retrieve
        
        Returns:
        - list of n similar item indices
        """
        # Calculate cosine similarity between the item and all other items
        sim_scores = cosine_similarity([self.item_factors[item_id]], self.item_factors)[0]
        
        # Exclude the item itself
        sim_scores[item_id] = -np.inf
        
        # Get top n items by similarity
        similar_items = np.argsort(sim_scores)[::-1][:n]
        
        return similar_items


def load_movielens_100k(path="ml-100k"):
    """
    Load the MovieLens 100K dataset
    
    Parameters:
    - path: path to the ML-100K dataset directory
    
    Returns:
    - ratings_df: pandas DataFrame with columns 'user_id', 'item_id', 'rating'
    - movie_df: pandas DataFrame with movie information
    """
    # Load ratings
    ratings_df = pd.read_csv(f"{path}/u.data", sep='\t', 
                           names=['user_id', 'item_id', 'rating', 'timestamp'])
    
    # Load movie information
    movie_df = pd.read_csv(f"{path}/u.item", sep='|', encoding='latin-1',
                          names=['item_id', 'title', 'release_date', 'video_release_date',
                                 'IMDb_URL'] + [f'genre_{i}' for i in range(19)])
    
    return ratings_df, movie_df


def create_user_item_matrix(ratings_df):
    """
    Create a sparse user-item interaction matrix from ratings
    
    Parameters:
    - ratings_df: pandas DataFrame with columns 'user_id', 'item_id', 'rating'
    
    Returns:
    - user_item_matrix: scipy sparse matrix with user-item interactions
    - user_mapping: dict mapping original user IDs to matrix indices
    - item_mapping: dict mapping original item IDs to matrix indices
    """
    # Create mappings from original IDs to matrix indices
    user_ids = ratings_df['user_id'].unique()
    item_ids = ratings_df['item_id'].unique()
    
    user_mapping = {user_id: i for i, user_id in enumerate(user_ids)}
    item_mapping = {item_id: i for i, item_id in enumerate(item_ids)}
    
    # Map original IDs to matrix indices
    rows = ratings_df['user_id'].map(user_mapping)
    cols = ratings_df['item_id'].map(item_mapping)
    
    # Create binary matrix (1 if interaction exists, 0 otherwise)
    data = np.ones(len(ratings_df))
    user_item_matrix = csr_matrix((data, (rows, cols)), 
                                 shape=(len(user_mapping), len(item_mapping)))
    
    return user_item_matrix, user_mapping, item_mapping

# Required for cosine similarity calculation
def cosine_similarity(X, Y):
    """
    Compute cosine similarity between samples in X and Y
    
    Parameters:
    - X: array-like of shape (n_samples_X, n_features)
    - Y: array-like of shape (n_samples_Y, n_features)
    
    Returns:
    - similarities: ndarray of shape (n_samples_X, n_samples_Y)
    """
    # Normalize vectors
    X_normalized = X / np.linalg.norm(X, axis=1, keepdims=True)
    Y_normalized = Y / np.linalg.norm(Y, axis=1, keepdims=True)
    
    # Replace NaN with 0 (for zero vectors)
    X_normalized = np.nan_to_num(X_normalized)
    Y_normalized = np.nan_to_num(Y_normalized)
    
    # Calculate cosine similarity
    return np.dot(X_normalized, Y_normalized.T)

if __name__ == "__main__":
    diversity_reranking_evaluation()

Loading MovieLens 100K dataset...
Splitting data for evaluation...
Creating user-item matrix...
Training BPR model...
Completed iteration 10/30
Completed iteration 20/30
Completed iteration 30/30

Generating original BPR recommendations for all users...

Evaluating diversity metrics for original BPR recommendations...
Item Coverage: 0.1552 (257 out of 1656 items)
Gini Index: 0.6976 (0 is perfect equality, 1 is perfect inequality)
Shannon Entropy: 0.6279 (normalized, higher is more diverse)
Tail Percentage: 0.0000 (0 out of 9430 recommendations)

Initializing reranker...

Generating reranked recommendations for all users...

Evaluating diversity metrics for reranked recommendations...
Item Coverage: 0.1697 (281 out of 1656 items)
Gini Index: 0.7007 (0 is perfect equality, 1 is perfect inequality)
Shannon Entropy: 0.6386 (normalized, higher is more diverse)
Tail Percentage: 0.0000 (0 out of 9430 recommendations)

Diversity Metrics Comparison:
Metric               Original        Reranked