In [2]:
# NeuMF Recommender with Diversity Reranking for LastFM

import numpy as np
import pandas as pd
from collections import defaultdict, Counter
import math
import random
import time
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

#################################
# DATASET AND HELPER FUNCTIONS (LASTFM)
#################################

def load_lastfm(path="lastfm/lastfm.inter"):
    """
    Load the LastFM dataset
    
    Parameters:
    - path: path to the lastfm.inter file
    
    Returns:
    - ratings_df: DataFrame with columns ['user_id', 'item_id', 'rating', 'timestamp']
    - dummy_df: Empty DataFrame with item information structure (for compatibility)
    """
    print("Loading LastFM dataset...")
    encodings_to_try = ['utf-8', 'latin-1', 'ISO-8859-1', 'cp1252']
    
    def generate_sample_data():
        print("Generating sample LastFM data for demonstration purposes...")
        np.random.seed(42)
        n_users = 100
        n_items = 50
        n_ratings = 1000
        user_ids = [f"user_{i}" for i in range(n_users)]
        item_ids = [f"artist_{i}" for i in range(n_items)]
        random_users = np.random.choice(user_ids, size=n_ratings)
        random_items = np.random.choice(item_ids, size=n_ratings)
        random_ratings = np.random.uniform(1, 5, size=n_ratings)
        constant_timestamp = np.full(n_ratings, 1111111111)
        sample_df = pd.DataFrame({
            'user_id': random_users,
            'item_id': random_items,
            'rating': random_ratings,
            'timestamp': constant_timestamp
        })
        dummy_df = pd.DataFrame(columns=['item_id', 'name', 'tags'])
        print(f"Generated sample data with {len(sample_df)} ratings from {sample_df['user_id'].nunique()} users on {sample_df['item_id'].nunique()} artists")
        return sample_df, dummy_df
    
    for encoding in encodings_to_try:
        try:
            print(f"Trying to load LastFM data with {encoding} encoding...")
            columns = ['user_id', 'artist_id', 'weight', 'tag_value']
            raw_df = pd.read_csv(path, sep='\t', names=columns, encoding=encoding)
            raw_df = raw_df.rename(columns={'artist_id': 'item_id', 'weight': 'rating'})
            raw_df['timestamp'] = 1111111111
            raw_df = raw_df.drop(columns=['tag_value'])
            dummy_df = pd.DataFrame(columns=['item_id', 'name', 'tags'])
            if len(raw_df) > 0:
                print(f"Successfully loaded LastFM dataset with {encoding} encoding")
                print(f"Loaded {len(raw_df)} interactions from {raw_df['user_id'].nunique()} users on {raw_df['item_id'].nunique()} artists")
                return raw_df, dummy_df
        except Exception as e:
            print(f"Error loading LastFM dataset with {encoding} encoding: {str(e)}")
            continue
    print("All attempts to load the LastFM dataset failed. Generating sample data instead.")
    return generate_sample_data()

def create_user_item_matrix(ratings_df):
    """
    Create a sparse user-item interaction matrix from ratings
    """
    user_ids = ratings_df['user_id'].unique()
    item_ids = ratings_df['item_id'].unique()
    user_mapping = {user_id: i for i, user_id in enumerate(user_ids)}
    item_mapping = {item_id: i for i, item_id in enumerate(item_ids)}
    rows = ratings_df['user_id'].map(user_mapping)
    cols = ratings_df['item_id'].map(item_mapping)
    data = np.ones(len(ratings_df))
    user_item_matrix = csr_matrix((data, (rows, cols)), 
                                  shape=(len(user_mapping), len(item_mapping)))
    return user_item_matrix, user_mapping, item_mapping

class NCFDataset(Dataset):
    """Dataset for NeuMF training"""
    def __init__(self, user_item_matrix, neg_samples=4):
        self.user_item_matrix = user_item_matrix
        self.users, self.items = user_item_matrix.nonzero()
        self.n_users = user_item_matrix.shape[0]
        self.n_items = user_item_matrix.shape[1]
        self.neg_samples = neg_samples
        self.user_item_set = set(zip(self.users, self.items))
        self.user_items = defaultdict(set)
        for u, i in zip(self.users, self.items):
            self.user_items[u].add(i)
    
    def __len__(self):
        return len(self.users) * (1 + self.neg_samples)
    
    def __getitem__(self, idx):
        if idx < len(self.users):
            user = self.users[idx]
            item = self.items[idx]
            label = 1.0
        else:
            pos_idx = idx % len(self.users)
            user = self.users[pos_idx]
            item = random.randint(0, self.n_items - 1)
            while item in self.user_items[user]:
                item = random.randint(0, self.n_items - 1)
            label = 0.0
        return user, item, label

#################################
# NEUMF RECOMMENDER IMPLEMENTATION
#################################

class GMF(nn.Module):
    """Generalized Matrix Factorization"""
    def __init__(self, n_users, n_items, latent_dim):
        super(GMF, self).__init__()
        self.user_embedding = nn.Embedding(n_users, latent_dim)
        self.item_embedding = nn.Embedding(n_items, latent_dim)
        self.output_layer = nn.Linear(latent_dim, 1)
        nn.init.normal_(self.user_embedding.weight, std=0.01)
        nn.init.normal_(self.item_embedding.weight, std=0.01)
        
    def forward(self, user_indices, item_indices):
        user_emb = self.user_embedding(user_indices)
        item_emb = self.item_embedding(item_indices)
        element_product = torch.mul(user_emb, item_emb)
        output = self.output_layer(element_product)
        return output.view(-1)

class MLP(nn.Module):
    """Multi-Layer Perceptron component"""
    def __init__(self, n_users, n_items, latent_dim, layers=[64, 32, 16, 8]):
        super(MLP, self).__init__()
        self.user_embedding = nn.Embedding(n_users, latent_dim)
        self.item_embedding = nn.Embedding(n_items, latent_dim)
        self.layers = nn.ModuleList()
        layer_dims = [2 * latent_dim] + layers
        for i in range(len(layer_dims) - 1):
            self.layers.append(nn.Linear(layer_dims[i], layer_dims[i+1]))
            self.layers.append(nn.ReLU())
        self.output_layer = nn.Linear(layer_dims[-1], 1)
        nn.init.normal_(self.user_embedding.weight, std=0.01)
        nn.init.normal_(self.item_embedding.weight, std=0.01)
        
    def forward(self, user_indices, item_indices):
        user_emb = self.user_embedding(user_indices)
        item_emb = self.item_embedding(item_indices)
        vector = torch.cat([user_emb, item_emb], dim=-1)
        for layer in self.layers:
            vector = layer(vector)
        output = self.output_layer(vector)
        return output.view(-1)

class NeuMF(nn.Module):
    """Neural Matrix Factorization combining GMF and MLP"""
    def __init__(self, n_users, n_items, latent_dim=32, mlp_layers=[64, 32, 16, 8]):
        super(NeuMF, self).__init__()
        self.gmf = GMF(n_users, n_items, latent_dim)
        self.mlp = MLP(n_users, n_items, latent_dim, mlp_layers)
        self.output_layer = nn.Linear(mlp_layers[-1] + latent_dim, 1)
        nn.init.normal_(self.output_layer.weight, std=0.01)
        
    def forward(self, user_indices, item_indices):
        # GMF path
        gmf_user = self.gmf.user_embedding(user_indices)
        gmf_item = self.gmf.item_embedding(item_indices)
        gmf_vector = torch.mul(gmf_user, gmf_item)
        # MLP path
        mlp_user = self.mlp.user_embedding(user_indices)
        mlp_item = self.mlp.item_embedding(item_indices)
        mlp_vector = torch.cat([mlp_user, mlp_item], dim=-1)
        for layer in self.mlp.layers:
            mlp_vector = layer(mlp_vector)
        # Concatenate both paths
        vector = torch.cat([gmf_vector, mlp_vector], dim=-1)
        output = self.output_layer(vector)
        return torch.sigmoid(output.view(-1))

class NeuMFRecommender:
    def __init__(self, latent_dim=32, mlp_layers=[64, 32, 16, 8], epochs=20, batch_size=256, 
                 lr=0.001, neg_samples=4, device=None, random_state=42):
        """
        NeuMF recommender using Neural Matrix Factorization
        
        Parameters:
        - latent_dim: size of latent factors
        - mlp_layers: list defining the MLP architecture
        - epochs: training epochs
        - batch_size: batch size
        - lr: learning rate
        - neg_samples: negative samples per positive instance
        - device: torch device (cpu or cuda)
        - random_state: seed for reproducibility
        """
        self.latent_dim = latent_dim
        self.mlp_layers = mlp_layers
        self.epochs = epochs
        self.batch_size = batch_size
        self.lr = lr
        self.neg_samples = neg_samples
        self.random_state = random_state
        
        random.seed(random_state)
        np.random.seed(random_state)
        torch.manual_seed(random_state)
        
        if device is None:
            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        else:
            self.device = device
        
    def fit(self, user_item_matrix):
        """
        Train the NeuMF model on the provided user-item matrix.
        """
        self.user_item_matrix = user_item_matrix
        self.n_users, self.n_items = user_item_matrix.shape
        
        # Build user-item interactions dictionary
        self.user_items = defaultdict(set)
        for u, i in zip(*self.user_item_matrix.nonzero()):
            self.user_items[u].add(i)
        
        # Create training dataset and dataloader
        dataset = NCFDataset(user_item_matrix, neg_samples=self.neg_samples)
        dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
        
        # Initialize NeuMF model
        self.model = NeuMF(self.n_users, self.n_items, self.latent_dim, self.mlp_layers).to(self.device)
        criterion = nn.BCELoss()
        optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        
        print(f"Training NeuMF model for {self.epochs} epochs...")
        self.model.train()
        for epoch in range(self.epochs):
            start_time = time.time()
            running_loss = 0.0
            for users, items, labels in dataloader:
                users = users.to(self.device)
                items = items.to(self.device)
                labels = labels.float().to(self.device)
                outputs = self.model(users, items)
                loss = criterion(outputs, labels)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            elapsed_time = time.time() - start_time
            if (epoch + 1) % 5 == 0 or epoch == 0:
                print(f"Epoch {epoch+1}/{self.epochs}, Loss: {running_loss/len(dataloader):.4f}, Time: {elapsed_time:.2f}s")
        self.model.eval()
        
        # Precompute embeddings for reranker compatibility
        with torch.no_grad():
            self.user_gmf_embeddings = self.model.gmf.user_embedding.weight.data
            self.item_gmf_embeddings = self.model.gmf.item_embedding.weight.data
            self.user_mlp_embeddings = self.model.mlp.user_embedding.weight.data
            self.item_mlp_embeddings = self.model.mlp.item_embedding.weight.data
        # Combine embeddings as item factors
        self.item_factors = np.concatenate([
            self.item_gmf_embeddings.cpu().numpy(),
            self.item_mlp_embeddings.cpu().numpy()
        ], axis=1)
        
        return self
    
    def recommend(self, user_id, n=10, exclude_seen=True):
        """
        Generate recommendations for a user.
        """
        if user_id not in self.user_items:
            all_items = list(range(self.n_items))
            recommendations = random.sample(all_items, min(n, len(all_items)))
            return np.array(recommendations)
        
        with torch.no_grad():
            user_tensor = torch.LongTensor([user_id] * self.n_items).to(self.device)
            item_tensor = torch.LongTensor(list(range(self.n_items))).to(self.device)
            scores = self.model(user_tensor, item_tensor).cpu().numpy()
        
        if exclude_seen:
            for i in self.user_items[user_id]:
                scores[i] = -np.inf
        top_items = np.argsort(scores)[::-1][:n]
        return top_items

#################################
# RERANKER IMPLEMENTATION (for NeuMF)
#################################

class SimpleRerankerNeuMF:
    """
    Simple reranker for NeuMF that balances original scores with diversity.
    """
    def __init__(self, model, alpha=0.7):
        """
        Initialize reranker.
        Parameters:
        - model: trained NeuMFRecommender
        - alpha: weight for original scores (0 to 1)
        """
        self.model = model
        self.alpha = alpha
        self.item_popularity = np.zeros(model.n_items)
        for user in range(model.n_users):
            if user in model.user_items:
                for item in model.user_items[user]:
                    self.item_popularity[item] += 1
        max_pop = np.max(self.item_popularity)
        if max_pop > 0:
            self.norm_popularity = self.item_popularity / max_pop
        else:
            self.norm_popularity = np.zeros_like(self.item_popularity)
    
    def rerank(self, user_id, n=10):
        """
        Generate reranked recommendations using NeuMF scores.
        """
        # Use NeuMF's recommend method to get a candidate pool
        candidates = self.model.recommend(user_id, n=n*3, exclude_seen=True)
        # Get predicted scores from the NeuMF model
        with torch.no_grad():
            user_tensor = torch.LongTensor([user_id] * self.model.n_items).to(self.model.device)
            item_tensor = torch.LongTensor(list(range(self.model.n_items))).to(self.model.device)
            scores = self.model.model(user_tensor, item_tensor).cpu().numpy()
        selected = []
        while len(selected) < n and candidates.size > 0:
            best_score = -np.inf
            best_item = None
            for item in candidates:
                if item in selected:
                    continue
                score_orig = scores[item]
                diversity_score = 0
                if selected:
                    item_factors = self.model.item_factors[item]
                    selected_factors = self.model.item_factors[selected]
                    similarities = []
                    for sel in selected_factors:
                        dot_product = np.dot(item_factors, sel)
                        norm_product = np.linalg.norm(item_factors) * np.linalg.norm(sel)
                        sim = dot_product / norm_product if norm_product > 0 else 0
                        similarities.append(sim)
                    if similarities:
                        avg_sim = np.mean(similarities)
                        diversity_score = 1 - avg_sim
                novelty_score = 1 - self.norm_popularity[item]
                combined_score = (self.alpha * score_orig +
                                  (1 - self.alpha) * 0.5 * diversity_score +
                                  (1 - self.alpha) * 0.5 * novelty_score)
                if combined_score > best_score:
                    best_score = combined_score
                    best_item = item
            if best_item is None:
                break
            selected.append(best_item)
            candidates = candidates[candidates != best_item]
        return np.array(selected)

class MMRRerankerNeuMF:
    """
    MMR Reranker for NeuMF.
    """
    def __init__(self, model, lambda_param=0.7):
        self.model = model
        self.lambda_param = lambda_param
        
    def calculate_item_similarity(self, item1, item2):
        factors1 = self.model.item_factors[item1]
        factors2 = self.model.item_factors[item2]
        dot = np.dot(factors1, factors2)
        norm = np.linalg.norm(factors1) * np.linalg.norm(factors2)
        return dot / norm if norm > 0 else 0
    
    def rerank(self, user_id, n=10, candidate_size=100):
        candidates = self.model.recommend(user_id, n=candidate_size, exclude_seen=True)
        with torch.no_grad():
            user_tensor = torch.LongTensor([user_id] * self.model.n_items).to(self.model.device)
            item_tensor = torch.LongTensor(list(range(self.model.n_items))).to(self.model.device)
            relevance_scores = self.model.model(user_tensor, item_tensor).cpu().numpy()
        candidate_scores = relevance_scores[candidates]
        min_score = np.min(candidate_scores)
        max_score = np.max(candidate_scores)
        score_range = max_score - min_score
        normalized_scores = ((candidate_scores - min_score) / score_range) if score_range > 0 else np.zeros_like(candidate_scores)
        
        selected = []
        if candidates.size > 0:
            selected.append(candidates[np.argmax(normalized_scores)])
            remaining = set(candidates) - set(selected)
        else:
            remaining = set()
        while len(selected) < n and remaining:
            max_mmr = -np.inf
            max_item = None
            for item in remaining:
                idx = np.where(candidates == item)[0][0]
                relevance = normalized_scores[idx]
                max_sim = 0
                for sel in selected:
                    sim = self.calculate_item_similarity(item, sel)
                    max_sim = max(max_sim, sim)
                mmr_score = self.lambda_param * relevance - (1 - self.lambda_param) * max_sim
                if mmr_score > max_mmr:
                    max_mmr = mmr_score
                    max_item = item
            if max_item is None:
                break
            selected.append(max_item)
            remaining.remove(max_item)
        return np.array(selected)

#################################
# EVALUATION METRICS (same as before)
#################################

def calculate_ndcg(recommended_items, relevant_items, relevant_scores, k=None):
    if k is None:
        k = len(recommended_items)
    else:
        k = min(k, len(recommended_items))
    
    # Build relevance map (ensuring all scores are floats)
    relevance_map = {}
    for item_id, score in zip(relevant_items, relevant_scores):
        try:
            score_float = float(score)
        except (ValueError, TypeError):
            score_float = 0.0
        score_float = min(score_float, 10.0)
        relevance_map[item_id] = score_float
    
    # Compute DCG for recommended items
    dcg = 0.0
    for i, item_id in enumerate(recommended_items[:k]):
        if item_id in relevance_map:
            rel = float(relevance_map[item_id])
            dcg += (2 ** rel - 1) / np.log2(i + 2)
    
    # Compute IDCG (ideal DCG)
    sorted_relevant = []
    for item_id, score in zip(relevant_items, relevant_scores):
        try:
            score_float = float(score)
        except (ValueError, TypeError):
            score_float = 0.0
        score_float = min(score_float, 10.0)
        sorted_relevant.append((item_id, score_float))
    sorted_relevant.sort(key=lambda x: x[1], reverse=True)
    
    idcg = 0.0
    for i, (item_id, rel) in enumerate(sorted_relevant[:k]):
        idcg += (2 ** float(rel) - 1) / np.log2(i + 2)
    
    return dcg / idcg if idcg > 0 else 0

def calculate_precision(recommended_items, relevant_items):
    num_relevant = sum(1 for item in recommended_items if item in relevant_items)
    return num_relevant / len(recommended_items) if recommended_items else 0

def calculate_recall(recommended_items, relevant_items):
    num_relevant = sum(1 for item in recommended_items if item in relevant_items)
    return num_relevant / len(relevant_items) if relevant_items else 0

def calculate_diversity_metrics(recommendations, item_popularity, total_items, tail_items=None):
    rec_counts = Counter(recommendations)
    item_coverage = len(rec_counts) / total_items
    sorted_counts = sorted(rec_counts.values())
    n = len(sorted_counts)
    if n == 0:
        gini_index = 0
    else:
        cumulative_sum = sum((i + 1) * count for i, count in enumerate(sorted_counts))
        gini_index = (2 * cumulative_sum) / (n * sum(sorted_counts)) - (n + 1) / n
    rec_total = sum(rec_counts.values())
    probabilities = [count / rec_total for count in rec_counts.values()]
    entropy = -sum(p * np.log2(p) for p in probabilities if p > 0)
    max_entropy = np.log2(min(total_items, rec_total))
    normalized_entropy = entropy / max_entropy if max_entropy > 0 else 0
    if tail_items is None:
        sorted_pop_indices = np.argsort(item_popularity)
        num_tail_items = int(len(sorted_pop_indices) * 0.2)
        tail_items = set(sorted_pop_indices[:num_tail_items])
    tail_recommendations = sum(1 for item in recommendations if item in tail_items)
    tail_percentage = tail_recommendations / len(recommendations) if recommendations else 0
    metrics = {
        'item_coverage': item_coverage,
        'gini_index': gini_index,
        'shannon_entropy': normalized_entropy,
        'tail_percentage': tail_percentage
    }
    return metrics, tail_items

#################################
# COMPREHENSIVE EVALUATION (NeuMF on LastFM)
#################################

def comprehensive_evaluation_multiple_rerankers(k=10, sample_size=None):
    """
    Evaluate NeuMF with diversity reranking on the LastFM dataset.
    """
    print("=" * 80)
    print(f"COMPREHENSIVE EVALUATION WITH MULTIPLE RERANKERS (k={k})")
    print("=" * 80)
    
    print("\nLoading LastFM dataset...")
    ratings_df, dummy_df = load_lastfm()
    
    print("Splitting data for evaluation...")
    value_counts = ratings_df['user_id'].value_counts()
    if value_counts.min() >= 2:
        print("Using stratified sampling...")
        train_df, test_df = train_test_split(
            ratings_df, test_size=0.2, stratify=ratings_df['user_id'], random_state=42
        )
    else:
        print("Using random sampling (some users have only 1 rating)...")
        train_df, test_df = train_test_split(
            ratings_df, test_size=0.2, random_state=42
        )
    
    print("Creating user-item matrix...")
    user_item_matrix, user_mapping, item_mapping = create_user_item_matrix(train_df)
    reverse_user_mapping = {v: k for k, v in user_mapping.items()}
    reverse_item_mapping = {v: k for k, v in item_mapping.items()}
    
    test_relevant_items = defaultdict(list)
    test_relevant_scores = defaultdict(list)
    for _, row in test_df.iterrows():
        uid, iid, rating = row['user_id'], row['item_id'], row['rating']
        if uid in user_mapping and iid in item_mapping:
            test_relevant_items[uid].append(iid)
            test_relevant_scores[uid].append(rating)
    
    print("\nTraining NeuMF model...")
    model = NeuMFRecommender(latent_dim=32, epochs=10, batch_size=256)
    model.fit(user_item_matrix)
    
    print("\nInitializing rerankers...")
    # Wir verwenden hier den SimpleRerankerNeuMF (du kannst auch den MMRRerankerNeuMF verwenden)
    simple_reranker = SimpleRerankerNeuMF(model=model, alpha=0.7)
    mmr_reranker = MMRRerankerNeuMF(model=model, lambda_param=0.7)
    
    rerankers = {
        "Original NeuMF": None,
        "Simple Reranker": simple_reranker,
        "MMR Reranker": mmr_reranker
    }
    
    all_results = {}
    if sample_size is not None and sample_size < len(test_relevant_items):
        eval_users = random.sample(list(test_relevant_items.keys()), sample_size)
    else:
        eval_users = list(test_relevant_items.keys())
    print(f"\nEvaluating {len(eval_users)} users...")
    
    for name, reranker in rerankers.items():
        print(f"\nEvaluating {name}...")
        ndcg_scores = []
        precision_scores = []
        recall_scores = []
        all_recs = []
        for uid in eval_users:
            if not test_relevant_items[uid]:
                continue
            user_idx = user_mapping[uid]
            if reranker is None:
                rec_idx = model.recommend(user_idx, n=k)
            else:
                rec_idx = reranker.rerank(user_idx, n=k)
            rec = [reverse_item_mapping[idx] for idx in rec_idx]
            all_recs.extend(rec_idx)
            ndcg_scores.append(calculate_ndcg(rec, test_relevant_items[uid], test_relevant_scores[uid]))
            precision_scores.append(calculate_precision(rec, test_relevant_items[uid]))
            recall_scores.append(calculate_recall(rec, test_relevant_items[uid]))
        accuracy_metrics = {
            f'ndcg@{k}': np.mean(ndcg_scores),
            f'precision@{k}': np.mean(precision_scores),
            f'recall@{k}': np.mean(recall_scores)
        }
        item_popularity = np.zeros(model.n_items)
        for user in range(model.n_users):
            if user in model.user_items:
                for item in model.user_items[user]:
                    item_popularity[item] += 1
        diversity_metrics, _ = calculate_diversity_metrics(
            recommendations=all_recs,
            item_popularity=item_popularity,
            total_items=model.n_items
        )
        all_results[name] = {
            'accuracy': accuracy_metrics,
            'diversity': diversity_metrics
        }
    
    print("\n" + "="*30 + " ACCURACY METRICS COMPARISON " + "="*30)
    print(f"{'Metric':<15}", end='')
    for name in rerankers.keys():
        print(f"{name:<20}", end='')
    print()
    print("-" * 80)
    for metric in [f'ndcg@{k}', f'precision@{k}', f'recall@{k}']:
        print(f"{metric:<15}", end='')
        baseline = all_results["Original NeuMF"]['accuracy'][metric]
        for name in rerankers.keys():
            value = all_results[name]['accuracy'][metric]
            change = ((value - baseline) / baseline * 100) if baseline > 0 else float('inf')
            if name == "Original NeuMF":
                print(f"{value:.4f}{' '*15}", end='')
            else:
                print(f"{value:.4f} ({change:+.1f}%){' '*5}", end='')
        print()
    
    print("\n" + "="*30 + " DIVERSITY METRICS COMPARISON " + "="*30)
    print(f"{'Metric':<15}", end='')
    for name in rerankers.keys():
        print(f"{name:<20}", end='')
    print()
    print("-" * 80)
    for metric in ['item_coverage', 'gini_index', 'shannon_entropy', 'tail_percentage']:
        print(f"{metric:<15}", end='')
        baseline = all_results["Original NeuMF"]['diversity'][metric]
        for name in rerankers.keys():
            value = all_results[name]['diversity'][metric]
            change = ((value - baseline) / baseline * 100) if baseline > 0 else float('inf')
            if name == "Original NeuMF":
                print(f"{value:.4f}{' '*15}", end='')
            else:
                print(f"{value:.4f} ({change:+.1f}%){' '*5}", end='')
        print()
    
    print("\n" + "="*30 + " METRIC INTERPRETATIONS " + "="*30)
    print("Accuracy Metrics:")
    print("- NDCG: Higher is better, measures ranking quality")
    print("- Precision: Higher is better, measures relevant item ratio")
    print("- Recall: Higher is better, measures coverage of relevant items")
    print("\nDiversity Metrics:")
    print("- Item Coverage: Higher means more catalog items are recommended")
    print("- Gini Index: Lower indicates more equality in recommendations")
    print("- Shannon Entropy: Higher means more diverse recommendations")
    print("- Tail Percentage: Higher means more niche items are recommended")
    
    return all_results

# Execute evaluation when run directly
if __name__ == "__main__":
    comprehensive_evaluation_multiple_rerankers(k=10)


COMPREHENSIVE EVALUATION WITH MULTIPLE RERANKERS (k=10)

Loading LastFM dataset...
Loading LastFM dataset...
Trying to load LastFM data with utf-8 encoding...
Successfully loaded LastFM dataset with utf-8 encoding
Loaded 92835 interactions from 1893 users on 17633 artists
Splitting data for evaluation...
Using random sampling (some users have only 1 rating)...
Creating user-item matrix...

Training NeuMF model...
Training NeuMF model for 10 epochs...
Epoch 1/10, Loss: 0.3847, Time: 3.95s
Epoch 5/10, Loss: 0.1355, Time: 3.86s
Epoch 10/10, Loss: 0.0497, Time: 3.05s

Initializing rerankers...

Evaluating 1870 users...

Evaluating Original NeuMF...

Evaluating Simple Reranker...

Evaluating MMR Reranker...

Metric         Original NeuMF      Simple Reranker     MMR Reranker        
--------------------------------------------------------------------------------
ndcg@10        0.1859               0.0545 (-70.7%)     0.1420 (-23.6%)     
precision@10   0.1388               0.0478 (-65.5%)  