In [3]:
import numpy as np
import pandas as pd
from collections import defaultdict, Counter
import math
import random
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split

#################################
# DATASET AND HELPER FUNCTIONS (LASTFM)
#################################

def load_lastfm(path="lastfm/lastfm.inter"):
    """
    Load the LastFM dataset

    Parameters:
    - path: path to the lastfm.inter file

    Returns:
    - ratings_df: DataFrame with columns ['user_id', 'item_id', 'rating', 'timestamp']
    - dummy_df: Empty DataFrame with item information structure (for compatibility)
    """
    print("Loading LastFM dataset...")
    encodings_to_try = ['utf-8', 'latin-1', 'ISO-8859-1', 'cp1252']

    def generate_sample_data():
        print("Generating sample LastFM data for demonstration purposes...")
        np.random.seed(42)
        n_users = 100
        n_items = 50
        n_ratings = 1000
        user_ids = [f"user_{i}" for i in range(n_users)]
        item_ids = [f"artist_{i}" for i in range(n_items)]
        random_users = np.random.choice(user_ids, size=n_ratings)
        random_items = np.random.choice(item_ids, size=n_ratings)
        random_ratings = np.random.uniform(1, 5, size=n_ratings)
        constant_timestamp = np.full(n_ratings, 1111111111)
        sample_df = pd.DataFrame({
            'user_id': random_users,
            'item_id': random_items,
            'rating': random_ratings,
            'timestamp': constant_timestamp
        })
        dummy_df = pd.DataFrame(columns=['item_id', 'name', 'tags'])
        print(f"Generated sample data with {len(sample_df)} ratings from {sample_df['user_id'].nunique()} users on {sample_df['item_id'].nunique()} artists")
        return sample_df, dummy_df

    for encoding in encodings_to_try:
        try:
            print(f"Trying to load LastFM data with {encoding} encoding...")
            columns = ['user_id', 'artist_id', 'weight', 'tag_value']
            raw_df = pd.read_csv(path, sep='\t', names=columns, encoding=encoding)
            raw_df = raw_df.rename(columns={'artist_id': 'item_id', 'weight': 'rating'})
            raw_df['timestamp'] = 1111111111
            raw_df = raw_df.drop(columns=['tag_value'])
            dummy_df = pd.DataFrame(columns=['item_id', 'name', 'tags'])
            if len(raw_df) > 0:
                print(f"Successfully loaded LastFM dataset with {encoding} encoding")
                print(f"Loaded {len(raw_df)} interactions from {raw_df['user_id'].nunique()} users on {raw_df['item_id'].nunique()} artists")
                return raw_df, dummy_df
        except Exception as e:
            print(f"Error loading LastFM dataset with {encoding} encoding: {str(e)}")
            continue
    print("All attempts to load the LastFM dataset failed. Generating sample data instead.")
    return generate_sample_data()

def create_user_item_matrix(ratings_df):
    """
    Create a sparse user-item interaction matrix from ratings
    """
    user_ids = ratings_df['user_id'].unique()
    item_ids = ratings_df['item_id'].unique()
    user_mapping = {user_id: i for i, user_id in enumerate(user_ids)}
    item_mapping = {item_id: i for i, item_id in enumerate(item_ids)}
    rows = ratings_df['user_id'].map(user_mapping)
    cols = ratings_df['item_id'].map(item_mapping)
    data = np.ones(len(ratings_df))
    user_item_matrix = csr_matrix((data, (rows, cols)), shape=(len(user_mapping), len(item_mapping)))
    return user_item_matrix, user_mapping, item_mapping

#################################
# POPULARITY-BASED RECOMMENDER IMPLEMENTATION
#################################

class PopRecommender:
    def __init__(self, random_state=42):
        """
        Popularity-based recommender algorithm

        Parameters:
        - random_state: seed for reproducibility
        """
        self.random_state = random_state
        random.seed(random_state)
        np.random.seed(random_state)
        
    def fit(self, user_item_matrix):
        """
        Compute global item popularity from the training data

        Parameters:
        - user_item_matrix: scipy sparse matrix with user-item interactions

        Returns:
        - self
        """
        self.user_item_matrix = user_item_matrix
        self.n_users, self.n_items = user_item_matrix.shape
        
        # Build dictionary of items each user interacted with
        self.user_items = defaultdict(set)
        for user, item in zip(*self.user_item_matrix.nonzero()):
            self.user_items[user].add(item)
        
        # Calculate item popularity as sum of interactions per item
        self.item_popularity = np.array(user_item_matrix.sum(axis=0)).flatten()
        
        # Create item factors for reranking compatibility (32 dimensions)
        self.item_factors = np.zeros((self.n_items, 32))
        max_pop = np.max(self.item_popularity)
        if max_pop > 0:
            self.item_factors[:, 0] = self.item_popularity / max_pop
        
        # Fill remaining dimensions with random noise (influenced by popularity)
        for i in range(self.n_items):
            np.random.seed(self.random_state + i)
            self.item_factors[i, 1:] = np.random.normal(0, 0.1, 31) * (0.5 + 0.5 * self.item_factors[i, 0])
        
        print("Popularity-based recommender ready! Top 5 most popular items:",
              np.argsort(self.item_popularity)[::-1][:5])
        return self
    
    def recommend(self, user_id, n=10, exclude_seen=True):
        """
        Generate item recommendations based on global popularity

        Parameters:
        - user_id: user index
        - n: number of recommendations to generate
        - exclude_seen: whether to exclude items the user already interacted with

        Returns:
        - array of n recommended item indices
        """
        recommended_items = np.argsort(self.item_popularity)[::-1]
        if exclude_seen and user_id in self.user_items:
            seen = set(self.user_items[user_id])
            recommended_items = np.array([item for item in recommended_items if item not in seen])
        return recommended_items[:n]

#################################
# RERANKER IMPLEMENTATION (using the same classes as before)
#################################

class SimpleReranker:
    """
    Simple reranker that balances original scores with diversity.
    """
    def __init__(self, model, alpha=0.7):
        """
        Initialize reranker

        Parameters:
        - model: trained recommender model (PopRecommender)
        - alpha: weight for original scores (0 to 1)
        """
        self.model = model
        self.alpha = alpha
        self.item_popularity = np.zeros(model.n_items)
        for user in range(model.n_users):
            if user in model.user_items:
                for item in model.user_items[user]:
                    self.item_popularity[item] += 1
        max_pop = np.max(self.item_popularity)
        if max_pop > 0:
            self.norm_popularity = self.item_popularity / max_pop
        else:
            self.norm_popularity = np.zeros_like(self.item_popularity)
    
    def rerank(self, user_id, n=10):
        """
        Generate reranked recommendations.
        """
        candidates = self.model.recommend(user_id, n=n*3, exclude_seen=True)
        scores = self.model.item_popularity / np.max(self.model.item_popularity)
        selected = []
        while len(selected) < n and candidates.size > 0:
            best_score = -np.inf
            best_item = None
            for item in candidates:
                if item in selected:
                    continue
                score_orig = scores[item]
                diversity_score = 0
                if selected:
                    item_factors = self.model.item_factors[item]
                    selected_factors = self.model.item_factors[selected]
                    similarities = []
                    for sel in selected_factors:
                        dot = np.dot(item_factors, sel)
                        norm = np.linalg.norm(item_factors) * np.linalg.norm(sel)
                        sim = dot / norm if norm > 0 else 0
                        similarities.append(sim)
                    if similarities:
                        diversity_score = 1 - np.mean(similarities)
                novelty_score = 1 - self.norm_popularity[item]
                combined = self.alpha * score_orig + (1 - self.alpha) * 0.5 * diversity_score + (1 - self.alpha) * 0.5 * novelty_score
                if combined > best_score:
                    best_score = combined
                    best_item = item
            if best_item is None:
                break
            selected.append(best_item)
            candidates = candidates[candidates != best_item]
        return np.array(selected)

class MMRReranker:
    """
    Maximum Marginal Relevance (MMR) Reranker.

    Balances relevance and diversity by selecting items that maximize:
    MMR = λ * rel(i) - (1-λ) * max(sim(i,j)) for j in selected items.
    """
    def __init__(self, model, lambda_param=0.7):
        """
        Initialize MMR reranker

        Parameters:
        - model: trained recommender model (PopRecommender)
        - lambda_param: trade-off parameter (0-1)
        """
        self.model = model
        self.lambda_param = lambda_param
        
    def calculate_item_similarity(self, item1, item2):
        factors1 = self.model.item_factors[item1]
        factors2 = self.model.item_factors[item2]
        dot = np.dot(factors1, factors2)
        norm = np.linalg.norm(factors1) * np.linalg.norm(factors2)
        return dot / norm if norm > 0 else 0
    
    def rerank(self, user_id, n=10, candidate_size=100):
        candidates = self.model.recommend(user_id, n=candidate_size, exclude_seen=True)
        scores = self.model.item_popularity / np.max(self.model.item_popularity)
        selected = []
        if candidates.size > 0:
            selected.append(candidates[np.argmax(scores[candidates])])
            remaining = set(candidates) - set(selected)
        else:
            remaining = set()
        while len(selected) < n and remaining:
            max_mmr = -np.inf
            max_item = None
            for item in remaining:
                idx = np.where(candidates == item)[0][0]
                relevance = scores[item]
                max_sim = 0
                for sel in selected:
                    sim = self.calculate_item_similarity(item, sel)
                    max_sim = max(max_sim, sim)
                mmr_score = self.lambda_param * relevance - (1 - self.lambda_param) * max_sim
                if mmr_score > max_mmr:
                    max_mmr = mmr_score
                    max_item = item
            if max_item is None:
                break
            selected.append(max_item)
            remaining.remove(max_item)
        return np.array(selected)

#################################
# EVALUATION METRICS (same as before)
#################################

def calculate_ndcg(recommended_items, relevant_items, relevant_scores, k=None):
    if k is None:
        k = len(recommended_items)
    else:
        k = min(k, len(recommended_items))
    
    # Relevanz-Map mit Begrenzung der Ratings
    relevance_map = {}
    for item_id, score in zip(relevant_items, relevant_scores):
        try:
            score_float = float(score)
        except (ValueError, TypeError):
            score_float = 0.0
        score_float = min(score_float, 10.0)
        relevance_map[item_id] = score_float
    
    # DCG für die empfohlenen Items
    dcg = 0.0
    for i, item_id in enumerate(recommended_items[:k]):
        if item_id in relevance_map:
            rel = float(relevance_map[item_id])
            dcg += (2 ** rel - 1) / np.log2(i + 2)
    
    # Ideal DCG: Wir sortieren die relevanten Items nach dem begrenzten Rating
    sorted_relevant = sorted(
        zip(relevant_items, relevant_scores),
        key=lambda x: min(float(x[1]), 10.0),
        reverse=True
    )
    idcg = 0.0
    for i, (item_id, rel) in enumerate(sorted_relevant[:k]):
        rel = min(float(rel), 10.0)
        idcg += (2 ** rel - 1) / np.log2(i + 2)
    
    return dcg / idcg if idcg > 0 else 0

def calculate_precision(recommended_items, relevant_items):
    num_relevant = sum(1 for item in recommended_items if item in relevant_items)
    return num_relevant / len(recommended_items) if recommended_items else 0

def calculate_recall(recommended_items, relevant_items):
    num_relevant = sum(1 for item in recommended_items if item in relevant_items)
    return num_relevant / len(relevant_items) if relevant_items else 0

def calculate_diversity_metrics(recommendations, item_popularity, total_items, tail_items=None):
    rec_counts = Counter(recommendations)
    item_coverage = len(rec_counts) / total_items
    sorted_counts = sorted(rec_counts.values())
    n = len(sorted_counts)
    if n == 0:
        gini_index = 0
    else:
        cumulative_sum = sum((i + 1) * count for i, count in enumerate(sorted_counts))
        gini_index = (2 * cumulative_sum) / (n * sum(sorted_counts)) - (n + 1) / n
    rec_total = sum(rec_counts.values())
    probabilities = [count / rec_total for count in rec_counts.values()]
    entropy = -sum(p * np.log2(p) for p in probabilities if p > 0)
    max_entropy = np.log2(min(total_items, rec_total))
    normalized_entropy = entropy / max_entropy if max_entropy > 0 else 0
    if tail_items is None:
        sorted_pop_indices = np.argsort(item_popularity)
        num_tail_items = int(len(sorted_pop_indices) * 0.2)
        tail_items = set(sorted_pop_indices[:num_tail_items])
    tail_recommendations = sum(1 for item in recommendations if item in tail_items)
    tail_percentage = tail_recommendations / len(recommendations) if recommendations else 0
    metrics = {
        'item_coverage': item_coverage,
        'gini_index': gini_index,
        'shannon_entropy': normalized_entropy,
        'tail_percentage': tail_percentage
    }
    return metrics, tail_items

#################################
# COMPREHENSIVE EVALUATION (Popularity-based on LastFM)
#################################

def comprehensive_evaluation_multiple_rerankers(k=10, sample_size=None):
    """
    Run a comprehensive evaluation for the Popularity-based recommender on LastFM.
    """
    print("=" * 80)
    print(f"COMPREHENSIVE EVALUATION WITH MULTIPLE RERANKERS (k={k})")
    print("=" * 80)
    
    print("\nLoading LastFM dataset...")
    ratings_df, dummy_df = load_lastfm()
    
    print("Splitting data for evaluation...")
    value_counts = ratings_df['user_id'].value_counts()
    if value_counts.min() >= 2:
        print("Using stratified sampling...")
        train_df, test_df = train_test_split(ratings_df, test_size=0.2, stratify=ratings_df['user_id'], random_state=42)
    else:
        print("Using random sampling (some users have only 1 rating)...")
        train_df, test_df = train_test_split(ratings_df, test_size=0.2, random_state=42)
    
    print("Creating user-item matrix...")
    user_item_matrix, user_mapping, item_mapping = create_user_item_matrix(train_df)
    reverse_user_mapping = {v: k for k, v in user_mapping.items()}
    reverse_item_mapping = {v: k for k, v in item_mapping.items()}
    
    test_relevant_items = defaultdict(list)
    test_relevant_scores = defaultdict(list)
    for _, row in test_df.iterrows():
        uid, iid, rating = row['user_id'], row['item_id'], row['rating']
        if uid in user_mapping and iid in item_mapping:
            test_relevant_items[uid].append(iid)
            test_relevant_scores[uid].append(rating)
    
    print("\nTraining Popularity-based recommender...")
    model = PopRecommender()
    model.fit(user_item_matrix)
    
    print("\nInitializing rerankers...")
    simple_reranker = SimpleReranker(model=model, alpha=0.7)
    mmr_reranker = MMRReranker(model=model, lambda_param=0.7)
    
    rerankers = {
        "Original Pop": None,
        "Simple Reranker": simple_reranker,
        "MMR Reranker": mmr_reranker
    }
    
    all_results = {}
    if sample_size is not None and sample_size < len(test_relevant_items):
        eval_users = random.sample(list(test_relevant_items.keys()), sample_size)
    else:
        eval_users = list(test_relevant_items.keys())
    print(f"\nEvaluating {len(eval_users)} users...")
    
    for name, reranker in rerankers.items():
        print(f"\nEvaluating {name}...")
        ndcg_scores = []
        precision_scores = []
        recall_scores = []
        all_recs = []
        for uid in eval_users:
            if not test_relevant_items[uid]:
                continue
            user_idx = user_mapping[uid]
            if reranker is None:
                rec_idx = model.recommend(user_idx, n=k)
            else:
                rec_idx = reranker.rerank(user_idx, n=k)
            rec = [reverse_item_mapping[idx] for idx in rec_idx]
            all_recs.extend(rec_idx)
            ndcg_scores.append(calculate_ndcg(rec, test_relevant_items[uid], test_relevant_scores[uid]))
            precision_scores.append(calculate_precision(rec, test_relevant_items[uid]))
            recall_scores.append(calculate_recall(rec, test_relevant_items[uid]))
        accuracy_metrics = {
            f'ndcg@{k}': np.mean(ndcg_scores),
            f'precision@{k}': np.mean(precision_scores),
            f'recall@{k}': np.mean(recall_scores)
        }
        item_popularity = np.zeros(model.n_items)
        for user in range(model.n_users):
            if user in model.user_items:
                for item in model.user_items[user]:
                    item_popularity[item] += 1
        diversity_metrics, _ = calculate_diversity_metrics(recommendations=all_recs,
                                                            item_popularity=item_popularity,
                                                            total_items=model.n_items)
        all_results[name] = {
            'accuracy': accuracy_metrics,
            'diversity': diversity_metrics
        }
    
    print("\n" + "="*30 + " ACCURACY METRICS COMPARISON " + "="*30)
    print(f"{'Metric':<15}", end='')
    for name in rerankers.keys():
        print(f"{name:<20}", end='')
    print()
    print("-" * 80)
    for metric in [f'ndcg@{k}', f'precision@{k}', f'recall@{k}']:
        print(f"{metric:<15}", end='')
        baseline = all_results["Original Pop"]['accuracy'][metric]
        for name in rerankers.keys():
            value = all_results[name]['accuracy'][metric]
            change = ((value - baseline) / baseline * 100) if baseline > 0 else float('inf')
            if name == "Original Pop":
                print(f"{value:.4f}{' '*15}", end='')
            else:
                print(f"{value:.4f} ({change:+.1f}%){' '*5}", end='')
        print()
    
    print("\n" + "="*30 + " DIVERSITY METRICS COMPARISON " + "="*30)
    print(f"{'Metric':<15}", end='')
    for name in rerankers.keys():
        print(f"{name:<20}", end='')
    print()
    print("-" * 80)
    for metric in ['item_coverage', 'gini_index', 'shannon_entropy', 'tail_percentage']:
        print(f"{metric:<15}", end='')
        baseline = all_results["Original Pop"]['diversity'][metric]
        for name in rerankers.keys():
            value = all_results[name]['diversity'][metric]
            change = ((value - baseline) / baseline * 100) if baseline > 0 else float('inf')
            if name == "Original Pop":
                print(f"{value:.4f}{' '*15}", end='')
            else:
                print(f"{value:.4f} ({change:+.1f}%){' '*5}", end='')
        print()
    
    print("\n" + "="*30 + " METRIC INTERPRETATIONS " + "="*30)
    print("Accuracy Metrics:")
    print("- NDCG: Higher is better, measures ranking quality")
    print("- Precision: Higher is better, measures relevant item ratio in recommendations")
    print("- Recall: Higher is better, measures coverage of all relevant items")
    print("\nDiversity Metrics:")
    print("- Item Coverage: Higher means more catalog items are recommended")
    print("- Gini Index: Lower means more equality in item recommendations")
    print("- Shannon Entropy: Higher means more diverse recommendations")
    print("- Tail Percentage: Higher means more niche items are recommended")
    
    return all_results

# Execute evaluation when run directly
if __name__ == "__main__":
    comprehensive_evaluation_multiple_rerankers(k=10)

COMPREHENSIVE EVALUATION WITH MULTIPLE RERANKERS (k=10)

Loading LastFM dataset...
Loading LastFM dataset...
Trying to load LastFM data with utf-8 encoding...
Successfully loaded LastFM dataset with utf-8 encoding
Loaded 92835 interactions from 1893 users on 17633 artists
Splitting data for evaluation...
Using random sampling (some users have only 1 rating)...
Creating user-item matrix...

Training Popularity-based recommender...
Popularity-based recommender ready! Top 5 most popular items: [ 93 377 112 109 154]

Initializing rerankers...

Evaluating 1870 users...

Evaluating Original Pop...

Evaluating Simple Reranker...

Evaluating MMR Reranker...

Metric         Original Pop        Simple Reranker     MMR Reranker        
--------------------------------------------------------------------------------
ndcg@10        0.0834               0.0847 (+1.6%)     0.0852 (+2.1%)     
precision@10   0.0662               0.0675 (+2.0%)     0.0681 (+2.9%)     
recall@10      0.0724             