In [15]:
# Domain-Dependent Hybrid Recommender System
# @author Velizar Petrov

import pandas as pd
import numpy as np
import pickle
import os
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from scipy.sparse import csr_matrix
from scipy.spatial.distance import cosine
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from transfer_learning import (
    BidirectionalKnowledgeTransfer,
    CrossDomainRecommender,
    create_realistic_cross_domain_overlap,
    evaluate_proper_transfer_fixed,
    run_complete_transfer_analysis,
    display_transfer_results)

print("Domain-Dependent Hybrid Recommender System")

# CORE MODEL CLASSES 

class BiasedSVDRecommender:
    # SVD with user and item bias terms added to base predictions
    
    def __init__(self, n_components=100):
        self.n_components = n_components
        self.user_factors = None
        self.item_factors = None
        self.user_biases = None
        self.item_biases = None
        self.global_mean = 0
        self.user_mapper = {}
        self.item_mapper = {}
        
    def fit(self, ratings_df):
        print(f"Training SVD...")
        self.global_mean = ratings_df['rating'].mean()
        
        # Create mappings from user/item IDs to matrix indices
        unique_users = ratings_df['userId'].unique()
        unique_items = ratings_df['movieId'].unique()
        self.user_mapper = {user: idx for idx, user in enumerate(unique_users)}
        self.item_mapper = {item: idx for idx, item in enumerate(unique_items)}
        
        n_users, n_items = len(unique_users), len(unique_items)
        print(f"Users: {n_users:,}, Items: {n_items:,}")
        
        # Build sparse user-item matrix, centered around global mean
        user_indices = ratings_df['userId'].map(self.user_mapper).values
        item_indices = ratings_df['movieId'].map(self.item_mapper).values
        ratings = ratings_df['rating'].values
        
        rating_matrix = csr_matrix(
            (ratings - self.global_mean, (user_indices, item_indices)),
            shape=(n_users, n_items)
        )
        
        # Decompose into user and item latent factors
        svd = TruncatedSVD(n_components=self.n_components, random_state=42)
        self.user_factors = svd.fit_transform(rating_matrix)
        self.item_factors = svd.components_.T
        
        # Calculate bias terms by averaging residuals for each user/item
        self.user_biases = np.zeros(n_users)
        self.item_biases = np.zeros(n_items)
        
        for user_idx, item_idx, rating in zip(user_indices, item_indices, ratings):
            prediction = np.dot(self.user_factors[user_idx], self.item_factors[item_idx])
            error = rating - self.global_mean - prediction
            self.user_biases[user_idx] += error
            self.item_biases[item_idx] += error
        
        # Average the accumulated errors
        user_counts = np.bincount(user_indices)
        item_counts = np.bincount(item_indices)
        
        for user_idx in range(n_users):
            if user_counts[user_idx] > 0:
                self.user_biases[user_idx] /= user_counts[user_idx]
        
        for item_idx in range(n_items):
            if item_counts[item_idx] > 0:
                self.item_biases[item_idx] /= item_counts[item_idx]
        
        print(f"SVD training complete")
    
    def predict(self, user_id, item_id):
        # Cold start: return global mean if user or item not seen during training
        if user_id not in self.user_mapper or item_id not in self.item_mapper:
            return self.global_mean
        
        user_idx = self.user_mapper[user_id]
        item_idx = self.item_mapper[item_id]
        
        # Prediction = global mean + user bias + item bias + latent factor interaction
        prediction = (self.global_mean + 
                     self.user_biases[user_idx] + 
                     self.item_biases[item_idx] + 
                     np.dot(self.user_factors[user_idx], self.item_factors[item_idx]))
        
        return max(1.0, min(5.0, prediction))


class ItemBasedCF:
    # Item-based collaborative filtering using cosine similarity
    
    def __init__(self, k=30):
        self.k = k  # Number of similar items to use for prediction
        self.item_similarity = None
        self.ratings_matrix = None
        self.user_mapper = {}
        self.item_mapper = {}
        self.global_mean = 0
        self.item_means = {}
        
    def fit(self, ratings_df):
        print("Training ItemCF...")
        
        self.global_mean = ratings_df['rating'].mean()
        item_stats = ratings_df.groupby('movieId')['rating'].mean()
        self.item_means = item_stats.to_dict()
        
        # Limit to top 3000 items for computational efficiency
        item_counts = ratings_df['movieId'].value_counts()
        top_items = item_counts.head(3000).index
        filtered_ratings = ratings_df[ratings_df['movieId'].isin(top_items)]
        
        unique_users = filtered_ratings['userId'].unique()
        unique_items = filtered_ratings['movieId'].unique()
        self.user_mapper = {user: idx for idx, user in enumerate(unique_users)}
        self.item_mapper = {item: idx for idx, item in enumerate(unique_items)}
        
        user_indices = filtered_ratings['userId'].map(self.user_mapper)
        item_indices = filtered_ratings['movieId'].map(self.item_mapper)
        
        # Build user-item matrix and compute item-item similarity
        self.ratings_matrix = csr_matrix(
            (filtered_ratings['rating'], (user_indices, item_indices)),
            shape=(len(unique_users), len(unique_items))
        )
        
        self.item_similarity = cosine_similarity(self.ratings_matrix.T)
        print("ItemCF training complete")
        
    def predict(self, user_id, item_id):
        item_mean = self.item_means.get(item_id, self.global_mean)
        
        # Cold start fallback
        if user_id not in self.user_mapper or item_id not in self.item_mapper:
            return item_mean
        
        user_idx = self.user_mapper[user_id]
        item_idx = self.item_mapper[item_id]
        
        # Get this user's ratings and find similar items they've rated
        user_ratings = self.ratings_matrix[user_idx].toarray().flatten()
        similarities = self.item_similarity[item_idx]
        rated_items = np.where(user_ratings > 0)[0]
        
        if len(rated_items) == 0:
            return item_mean
        
        # Filter to sufficiently similar items and keep top k
        similar_items = [(similarities[item], user_ratings[item]) 
                        for item in rated_items if similarities[item] > 0.05]
        similar_items.sort(reverse=True)
        similar_items = similar_items[:self.k]
        
        if not similar_items:
            return item_mean
        
        # Weighted average of similar items' ratings
        numerator = sum(sim * rating for sim, rating in similar_items)
        denominator = sum(abs(sim) for sim, rating in similar_items)
        
        if denominator > 0:
            return max(1.0, min(5.0, numerator / denominator))
        
        return item_mean


class ContentBasedRecommender:
    # Content-based filtering using TF-IDF representations of item metadata
    
    def __init__(self, max_features=5000):
        self.max_features = max_features
        self.vectorizer = TfidfVectorizer(max_features=max_features, stop_words='english')
        self.item_profiles = None
        self.user_profiles = {}
        self.item_mapper = {}
        self.global_mean = 0
        self.user_means = {}
        
    def fit(self, items_df, ratings_df):
        print("Training Content-based model...")
        
        self.global_mean = ratings_df['rating'].mean()
        user_stats = ratings_df.groupby('userId')['rating'].mean()
        self.user_means = user_stats.to_dict()
        
        # Create TF-IDF vectors from item content
        content = items_df['content_features'].fillna('').astype(str)
        self.item_profiles = self.vectorizer.fit_transform(content)
        self.item_mapper = {item: idx for idx, item in enumerate(items_df['movieId'])}
        
        # Build user profiles as weighted combinations of items they rated highly
        user_item_ratings = {}
        for _, row in ratings_df.iterrows():
            user_id = row['userId']
            item_id = row['movieId']
            rating = row['rating']
            
            if item_id in self.item_mapper:
                if user_id not in user_item_ratings:
                    user_item_ratings[user_id] = []
                user_item_ratings[user_id].append((self.item_mapper[item_id], rating))
        
        # Only use above-average ratings to build preferences
        for user_id, item_ratings in user_item_ratings.items():
            if len(item_ratings) < 3:
                continue
            
            user_mean = np.mean([rating for _, rating in item_ratings])
            weighted_profile = None
            total_weight = 0
            
            for item_idx, rating in item_ratings:
                if rating >= user_mean:
                    weight = rating - user_mean + 1
                    item_profile = self.item_profiles[item_idx].toarray().flatten()
                    
                    if weighted_profile is None:
                        weighted_profile = weight * item_profile
                    else:
                        weighted_profile += weight * item_profile
                    total_weight += weight
            
            if total_weight > 0:
                self.user_profiles[user_id] = weighted_profile / total_weight
        
        print(f"Created {len(self.user_profiles)} user profiles")
        
    def predict(self, user_id, item_id):
        user_mean = self.user_means.get(user_id, self.global_mean)
        
        # Cold start fallback
        if user_id not in self.user_profiles or item_id not in self.item_mapper:
            return user_mean
        
        user_profile = self.user_profiles[user_id]
        item_idx = self.item_mapper[item_id]
        item_profile = self.item_profiles[item_idx].toarray().flatten()
        
        # Calculate cosine similarity between user and item profiles
        try:
            similarity = 1 - cosine(user_profile, item_profile)
            similarity = max(0, min(1, similarity))
        except:
            similarity = 0.5
        
        # Simple threshold-based rating offset
        if similarity > 0.6:
            rating_offset = 1.0
        elif similarity > 0.4:
            rating_offset = 0.5
        else:
            rating_offset = 0.0
        
        return max(1.0, min(5.0, user_mean + rating_offset))


class HybridRecommender:
    # Simple weighted average hybrid of multiple models
    
    def __init__(self, models, weights=None):
        self.models = models  # List of (name, model) tuples
        self.weights = weights or [1.0] * len(models)
        
    def predict(self, user_id, item_id):
        predictions = []
        weights = []
        
        # Collect predictions from all available models
        for i, (name, model) in enumerate(self.models):
            try:
                pred = model.predict(user_id, item_id)
                if not (np.isnan(pred) or np.isinf(pred)):
                    predictions.append(pred)
                    weights.append(self.weights[i])
            except:
                continue
        
        if not predictions:
            return 3.0
        
        # Weighted average
        if sum(weights) > 0:
            return max(1.0, min(5.0, np.average(predictions, weights=weights)))
        
        return max(1.0, min(5.0, np.mean(predictions)))


# DATA LOADING 

def load_dataset(dataset_type='movies', max_items=3000, max_ratings=300000):
    #Load movies or books dataset
    print(f"Loading {dataset_type} dataset...")
    
    if dataset_type == 'books':
        return load_books_data(max_items, max_ratings)
    else:
        return load_movies_data(max_items, max_ratings)


def load_movies_data(max_movies, max_ratings):
    #Load movie dataset and combine content features
    base_path = "Datasets/The Movies Dataset"
    
    movies = pd.read_csv(os.path.join(base_path, "movies_clean.csv"))
    content_features = pd.read_csv(os.path.join(base_path, "content_features.csv"))
    movies = movies.merge(content_features, on='id', how='left')
    
    # Combine all text fields into single content string
    def combine_content(row):
        parts = []
        for field in ['genres', 'overview', 'tagline', 'content_features']:
            if pd.notna(row.get(field)):
                parts.append(str(row[field]))
        return ' '.join(parts)
    
    movies['content_features'] = movies.apply(combine_content, axis=1)
    
    ratings = pd.read_csv(os.path.join(base_path, "ratings_clean.csv"))
    
    # Sample popular movies to limit dataset size
    popular_movies = ratings['movieId'].value_counts().head(max_movies).index
    movies = movies[movies['id'].isin(popular_movies)]
    ratings = ratings[ratings['movieId'].isin(popular_movies)]
    
    if len(ratings) > max_ratings:
        ratings = ratings.sample(n=max_ratings, random_state=42)
    
    movies = movies.rename(columns={'id': 'movieId'})
    
    # Filter out users and items with too few ratings
    user_counts = ratings['userId'].value_counts()
    item_counts = ratings['movieId'].value_counts()
    valid_users = user_counts[user_counts >= 3].index
    valid_items = item_counts[item_counts >= 3].index
    
    ratings = ratings[
        (ratings['userId'].isin(valid_users)) & 
        (ratings['movieId'].isin(valid_items))
    ]
    
    movies = movies[movies['movieId'].isin(ratings['movieId'].unique())]
    
    print(f"Final: {len(ratings):,} ratings, {len(movies):,} movies, {len(ratings['userId'].unique()):,} users")
    return {'ratings': ratings, 'movies': movies}


def load_books_data(max_books, max_ratings):
    # Load books dataset and combine content features
    base_path = "Datasets/goodbooks_10k_rating_and_description"
    
    books = pd.read_csv(os.path.join(base_path, "goodbooks_10k_rating_and_description.csv"))
    
    # Combine all book metadata into single content string
    def combine_book_content(row):
        parts = []
        for field in ['genres', 'book_desc', 'title', 'book_authors', 'tags']:
            if field in row.index and pd.notna(row.get(field)):
                parts.append(str(row[field]))
        return ' '.join(parts)
    
    books['content_features'] = books.apply(combine_book_content, axis=1)
    
    ratings = pd.read_csv(os.path.join(base_path, "ratings.csv"))
    
    # Standardize column names to match movies dataset
    if 'user_id' in ratings.columns:
        ratings = ratings.rename(columns={'user_id': 'userId'})
    if 'book_id' in ratings.columns:
        ratings = ratings.rename(columns={'book_id': 'movieId'})
    
    if len(ratings) > max_ratings:
        ratings = ratings.sample(n=max_ratings, random_state=42)
    
    popular_books = ratings['movieId'].value_counts().head(max_books).index
    
    # Handle different possible book ID column names
    if 'book_id' not in books.columns:
        for col in ['id', 'bookId', 'book_ID']:
            if col in books.columns:
                books = books.rename(columns={col: 'book_id'})
                break
    
    books = books[books['book_id'].isin(popular_books)]
    ratings = ratings[ratings['movieId'].isin(popular_books)]
    books = books.rename(columns={'book_id': 'movieId'})
    
    # Filter out sparse users/items
    user_counts = ratings['userId'].value_counts()
    item_counts = ratings['movieId'].value_counts()
    valid_users = user_counts[user_counts >= 3].index
    valid_items = item_counts[item_counts >= 3].index
    
    ratings = ratings[
        (ratings['userId'].isin(valid_users)) & 
        (ratings['movieId'].isin(valid_items))
    ]
    
    books = books[books['movieId'].isin(ratings['movieId'].unique())]
    
    print(f"Final: {len(ratings):,} ratings, {len(books):,} books, {len(ratings['userId'].unique()):,} users")
    return {'ratings': ratings, 'movies': books}


class BeyondAccuracyMetrics:
    # Calculate novelty, diversity, and serendipity metrics
    
    def __init__(self, items_df, ratings_df):
        self.items_df = items_df
        self.item_popularity = ratings_df['movieId'].value_counts().to_dict()
        self.total_ratings = len(ratings_df)
        
        # Precompute content similarity matrix for diversity calculations
        content = items_df['content_features'].fillna('').astype(str)
        vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
        content_matrix = vectorizer.fit_transform(content)
        self.content_similarity = cosine_similarity(content_matrix)
        self.item_to_idx = {item_id: idx for idx, item_id in enumerate(items_df['movieId'])}
    
    def novelty(self, recommended_items):
        # Novelty = average inverse popularity (how obscure are the recommendations)
        novelty_scores = []
        for item_id, _ in recommended_items:
            pop = self.item_popularity.get(item_id, 1)
            prob = pop / self.total_ratings
            novelty_scores.append(-np.log(max(prob, 1e-10)))
        return np.mean(novelty_scores) if novelty_scores else 0.0
    
    def diversity(self, recommended_items):
        # Diversity = 1 - average pairwise similarity (how different recommendations are from each other)
        item_ids = [item_id for item_id, _ in recommended_items]
        valid_items = [item for item in item_ids if item in self.item_to_idx]
        
        if len(valid_items) < 2:
            return 0.0
        
        similarities = []
        for i in range(len(valid_items)):
            for j in range(i + 1, len(valid_items)):
                idx_i = self.item_to_idx[valid_items[i]]
                idx_j = self.item_to_idx[valid_items[j]]
                similarities.append(self.content_similarity[idx_i, idx_j])
        
        return 1.0 - np.mean(similarities) if similarities else 0.0
    
    def serendipity(self, user_id, recommended_items, user_history):
        # Serendipity = unexpectedness × relevance (surprising but good recommendations)
        serendipity_scores = []
        
        for item_id, predicted_rating in recommended_items:
            if item_id not in self.item_to_idx:
                continue
            
            item_idx = self.item_to_idx[item_id]
            
            # Unexpectedness = how different is this from what user has seen
            max_similarity = 0.0
            for hist_item in user_history:
                if hist_item in self.item_to_idx:
                    hist_idx = self.item_to_idx[hist_item]
                    similarity = self.content_similarity[item_idx, hist_idx]
                    max_similarity = max(max_similarity, similarity)
            
            unexpectedness = 1.0 - max_similarity
            # Relevance = predicted rating quality (normalized to 0-1)
            relevance = max(0, (predicted_rating - 1.0) / 4.0)
            serendipity_scores.append(unexpectedness * relevance)
        
        return np.mean(serendipity_scores) if serendipity_scores else 0.0


# MODEL TRAINING 

def train_models(data_dict, dataset_type='movies'):
    # Train all models for given dataset
    print(f"Training models for {dataset_type}...")
    
    ratings_df = data_dict['ratings']
    items_df = data_dict['movies']
    
    # 80/20 train/validation split
    train_ratings, val_ratings = train_test_split(ratings_df, test_size=0.2, random_state=42)
    
    models = {}
    
    # Train SVD
    try:
        svd = BiasedSVDRecommender(n_components=200)
        svd.fit(train_ratings)
        models[f'svd_{dataset_type}'] = svd
    except Exception as e:
        print(f"SVD failed: {e}")
    
    # Train ItemCF
    try:
        itemcf = ItemBasedCF(k=50)
        itemcf.fit(train_ratings)
        models[f'itemcf_{dataset_type}'] = itemcf
    except Exception as e:
        print(f"ItemCF failed: {e}")
    
    # Train Content
    try:
        content = ContentBasedRecommender()
        content.fit(items_df, train_ratings)
        models[f'content_{dataset_type}'] = content
    except Exception as e:
        print(f"Content failed: {e}")
    
    # Train hybrids only if base models succeeded
    # ItemCF+Content hybrid: 70% weight to content (it performs better)
    if f'itemcf_{dataset_type}' in models and f'content_{dataset_type}' in models:
        models[f'itemcf_content_{dataset_type}'] = HybridRecommender([
            ('itemcf', models[f'itemcf_{dataset_type}']), 
            ('content', models[f'content_{dataset_type}'])
        ], [0.3, 0.7])
    
    # SVD+ItemCF hybrid
    if f'svd_{dataset_type}' in models and f'itemcf_{dataset_type}' in models:
        models[f'svd_itemcf_{dataset_type}'] = HybridRecommender([
            ('svd', models[f'svd_{dataset_type}']), 
            ('itemcf', models[f'itemcf_{dataset_type}'])
        ], [0.3, 0.7])
    
    print(f"Training complete: {len(models)} models")
    return models

# THESIS PLOTS 

def create_thesis_plots(movies_results, books_results):
    # Create 2x2 subplot figure for the paper
    
    plt.style.use('seaborn-v0_8-darkgrid')
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle('Cross-Domain Recommendation Performance Analysis', fontsize=16, fontweight='bold')
    
    ax1 = axes[0, 0]
    
    models = ['SVD', 'ItemCF', 'Content', 'ItemCF+\nContent', 'SVD+\nItemCF']
    
    movies_p10 = [
        movies_results.get('svd_movies', {}).get('precision@10', 0),
        movies_results.get('itemcf_movies', {}).get('precision@10', 0),
        movies_results.get('content_movies', {}).get('precision@10', 0),
        movies_results.get('itemcf_content_movies', {}).get('precision@10', 0),
        movies_results.get('svd_itemcf_movies', {}).get('precision@10', 0)
    ]
    
    books_p10 = [
        books_results.get('svd_books', {}).get('precision@10', 0),
        books_results.get('itemcf_books', {}).get('precision@10', 0),
        books_results.get('content_books', {}).get('precision@10', 0),
        books_results.get('itemcf_content_books', {}).get('precision@10', 0),
        books_results.get('svd_itemcf_books', {}).get('precision@10', 0)
    ]
    
    x = np.arange(len(models))
    width = 0.35
    
    bars1 = ax1.bar(x - width/2, movies_p10, width, label='Movies', color='steelblue', alpha=0.8)
    bars2 = ax1.bar(x + width/2, books_p10, width, label='Books', color='coral', alpha=0.8)
    
    ax1.set_ylabel('Precision@10', fontsize=11)
    ax1.set_title('Model Performance by Domain', fontsize=12, fontweight='bold')
    ax1.set_xticks(x)
    ax1.set_xticklabels(models, fontsize=9)
    ax1.legend(fontsize=10)
    ax1.grid(axis='y', alpha=0.3)
    
    for bar in bars1:
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1%}', ha='center', va='bottom', fontsize=8)
    for bar in bars2:
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1%}', ha='center', va='bottom', fontsize=8)
    
    ax2 = axes[0, 1]
    
    movies_rmse = [
        movies_results.get('svd_movies', {}).get('rmse', 0),
        movies_results.get('itemcf_movies', {}).get('rmse', 0),
        movies_results.get('content_movies', {}).get('rmse', 0),
        movies_results.get('itemcf_content_movies', {}).get('rmse', 0),
        movies_results.get('svd_itemcf_movies', {}).get('rmse', 0)
    ]
    
    books_rmse = [
        books_results.get('svd_books', {}).get('rmse', 0),
        books_results.get('itemcf_books', {}).get('rmse', 0),
        books_results.get('content_books', {}).get('rmse', 0),
        books_results.get('itemcf_content_books', {}).get('rmse', 0),
        books_results.get('svd_itemcf_books', {}).get('rmse', 0)
    ]
    
    bars3 = ax2.bar(x - width/2, movies_rmse, width, label='Movies', color='steelblue', alpha=0.8)
    bars4 = ax2.bar(x + width/2, books_rmse, width, label='Books', color='coral', alpha=0.8)
    
    ax2.set_ylabel('RMSE (lower is better)', fontsize=11)
    ax2.set_title('Rating Prediction Accuracy', fontsize=12, fontweight='bold')
    ax2.set_xticks(x)
    ax2.set_xticklabels(models, fontsize=9)
    ax2.legend(fontsize=10)
    ax2.grid(axis='y', alpha=0.3)
    
    ax3 = axes[1, 0]
    
    # Calculate books advantage (positive = books better, negative = movies better)
    advantages = [books_p10[i] - movies_p10[i] for i in range(len(models))]
    colors = ['green' if adv > 0 else 'red' for adv in advantages]
    
    bars5 = ax3.barh(models, advantages, color=colors, alpha=0.7)
    ax3.set_xlabel('Books Advantage (Books P@10 - Movies P@10)', fontsize=11)
    ax3.set_title('Domain-Specific Performance Differences', fontsize=12, fontweight='bold')
    ax3.axvline(x=0, color='black', linestyle='-', linewidth=0.8)
    ax3.grid(axis='x', alpha=0.3)
    
    for i, (bar, value) in enumerate(zip(bars5, advantages)):
        ax3.text(value + (0.005 if value >= 0 else -0.005), i,
                f'{value:+.1%}', ha='left' if value >= 0 else 'right', 
                va='center', fontsize=9)
    
    ax4 = axes[1, 1]
    
    categories = ['Individual\n(Best)', 'Hybrid\n(Best)', 'CF\n(Best)']
    
    # Compare best in each category
    movies_cat = [
        max(movies_p10[0], movies_p10[1], movies_p10[2]),
        max(movies_p10[3], movies_p10[4]),
        max(movies_p10[0], movies_p10[1])]
    books_cat = [
            max(books_p10[0], books_p10[1], books_p10[2]),
            max(books_p10[3], books_p10[4]),
            max(books_p10[0], books_p10[1])]
    
    x_cat = np.arange(len(categories))
    
    bars6 = ax4.bar(x_cat - width/2, movies_cat, width, label='Movies', color='steelblue', alpha=0.8)
    bars7 = ax4.bar(x_cat + width/2, books_cat, width, label='Books', color='coral', alpha=0.8)
    
    ax4.set_ylabel('Best Precision@10', fontsize=11)
    ax4.set_title('Best Performance by Model Category', fontsize=12, fontweight='bold')
    ax4.set_xticks(x_cat)
    ax4.set_xticklabels(categories, fontsize=9)
    ax4.legend(fontsize=10)
    ax4.grid(axis='y', alpha=0.3)
    
    for bar in bars6:
        height = bar.get_height()
        ax4.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1%}', ha='center', va='bottom', fontsize=8)
    for bar in bars7:
        height = bar.get_height()
        ax4.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1%}', ha='center', va='bottom', fontsize=8)
    
    plt.tight_layout()
    plt.savefig('thesis_results.png', dpi=300, bbox_inches='tight')
    print("\nPlot saved as 'thesis_results.png'")
    plt.show()


def create_simple_comparison_plot(movies_results, books_results):
    # Create single-plot comparison for the paper
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Order by performance for better visualisation
    models = ['Content', 'ItemCF+Content', 'ItemCF', 'SVD+ItemCF', 'SVD']
    
    movies_p10 = [
        movies_results.get('content_movies', {}).get('precision@10', 0),
        movies_results.get('itemcf_content_movies', {}).get('precision@10', 0),
        movies_results.get('itemcf_movies', {}).get('precision@10', 0),
        movies_results.get('svd_itemcf_movies', {}).get('precision@10', 0),
        movies_results.get('svd_movies', {}).get('precision@10', 0)
    ]
    
    books_p10 = [
        books_results.get('content_books', {}).get('precision@10', 0),
        books_results.get('itemcf_content_books', {}).get('precision@10', 0),
        books_results.get('itemcf_books', {}).get('precision@10', 0),
        books_results.get('svd_itemcf_books', {}).get('precision@10', 0),
        books_results.get('svd_books', {}).get('precision@10', 0)
    ]
    
    x = np.arange(len(models))
    width = 0.35
    
    ax.bar(x - width/2, movies_p10, width, label='Movies Domain', color='#2E86AB', alpha=0.85)
    ax.bar(x + width/2, books_p10, width, label='Books Domain', color='#A23B72', alpha=0.85)
    
    ax.set_ylabel('Precision@10', fontsize=13, fontweight='bold')
    ax.set_xlabel('Recommendation Model', fontsize=13, fontweight='bold')
    ax.set_title('Cross-Domain Recommendation Performance', fontsize=15, fontweight='bold', pad=20)
    ax.set_xticks(x)
    ax.set_xticklabels(models, fontsize=11)
    ax.legend(fontsize=12, loc='upper right')
    ax.grid(axis='y', alpha=0.3, linestyle='--')
    
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.0%}'))
    
    plt.tight_layout()
    plt.savefig('thesis_simple_comparison.png', dpi=300, bbox_inches='tight')
    print("\nSimple plot saved as 'thesis_simple_comparison.png'")
    plt.show()

# EVALUATION 

def precision_at_k(y_true, y_pred, k=10, threshold=4.0):
    # Precision@k: fraction of top-k recommendations that are relevant
    top_k_indices = np.argsort(y_pred)[::-1][:k]
    relevant_in_topk = sum(1 for idx in top_k_indices if y_true[idx] >= threshold)
    return relevant_in_topk / k


def recall_at_k(y_true, y_pred, k=10, threshold=4.0):
    # Recall@k: fraction of all relevant items found in top-k
    relevant_items = sum(1 for rating in y_true if rating >= threshold)
    if relevant_items == 0:
        return 0.0
    
    top_k_indices = np.argsort(y_pred)[::-1][:k]
    relevant_in_topk = sum(1 for idx in top_k_indices if y_true[idx] >= threshold)
    return relevant_in_topk / relevant_items


def evaluate_models(models, train_data, test_data, items_df, sample_size=200):
    # Evaluate all models with accuracy and beyond-accuracy metrics
    print(f"Evaluating {len(models)} models with {sample_size} users...")
    
    beyond_metrics = BeyondAccuracyMetrics(items_df, train_data)
    
    results = {}
    test_users = test_data['userId'].unique()
    user_test_counts = test_data['userId'].value_counts()
    # Only use users with sufficient test ratings
    valid_users = user_test_counts[user_test_counts >= 5].index
    
    if len(valid_users) > sample_size:
        sample_users = np.random.choice(valid_users, size=sample_size, replace=False)
    else:
        sample_users = valid_users
    
    for model_name, model in models.items():
        print(f"Testing {model_name}...")
        
        rmse_scores = []
        precision_scores = []
        recall_scores = []
        novelty_scores = []
        diversity_scores = []
        serendipity_scores = []
        
        for user_id in sample_users:
            user_test = test_data[test_data['userId'] == user_id]
            
            if len(user_test) < 5:
                continue
            
            # Rating prediction evaluation
            predictions = []
            actuals = []
            
            for _, row in user_test.iterrows():
                try:
                    pred = model.predict(row['userId'], row['movieId'])
                    if not (np.isnan(pred) or np.isinf(pred)):
                        predictions.append(pred)
                        actuals.append(row['rating'])
                except:
                    continue
            
            if len(predictions) >= 3:
                rmse_scores.append(np.sqrt(np.mean((np.array(actuals) - np.array(predictions)) ** 2)))
            
            # Ranking evaluation: mix test items with random candidates
            user_seen = set(train_data[train_data['userId'] == user_id]['movieId'])
            candidates = [item for item in items_df['movieId'].unique() if item not in user_seen]
            
            if len(candidates) < 30:
                continue
            
            # Sample negative candidates (items user hasn't seen)
            test_candidates = np.random.choice(candidates, size=min(50, len(candidates)), replace=False)
            test_items = user_test['movieId'].tolist()
            
            # Combine for ranking task
            eval_items = list(test_candidates) + test_items
            eval_predictions = []
            eval_actuals = []
            
            for item_id in eval_items:
                try:
                    pred = model.predict(user_id, item_id)
                    eval_predictions.append(pred if not (np.isnan(pred) or np.isinf(pred)) else 3.0)
                    
                    if item_id in test_items:
                        actual_rating = user_test[user_test['movieId'] == item_id]['rating'].iloc[0]
                        eval_actuals.append(actual_rating)
                    else:
                        # Assume unrated items are not relevant
                        eval_actuals.append(2.5)
                except:
                    eval_predictions.append(3.0)
                    eval_actuals.append(2.5)
            
            if len(eval_predictions) >= 10:
                precision_scores.append(precision_at_k(np.array(eval_actuals), np.array(eval_predictions), 10))
                recall_scores.append(recall_at_k(np.array(eval_actuals), np.array(eval_predictions), 10))
            
            # Beyond-accuracy metrics: generate top-10 recommendations
            try:
                item_scores = []
                # Limit to 100 candidates for efficiency
                for item_id in candidates[:100]:
                    try:
                        score = model.predict(user_id, item_id)
                        item_scores.append((item_id, score))
                    except:
                        continue
                
                recommendations = sorted(item_scores, key=lambda x: x[1], reverse=True)[:10]
                
                if len(recommendations) >= 5:
                    user_history = list(user_seen)
                    novelty_scores.append(beyond_metrics.novelty(recommendations))
                    diversity_scores.append(beyond_metrics.diversity(recommendations))
                    serendipity_scores.append(beyond_metrics.serendipity(user_id, recommendations, user_history))
            except:
                pass
        
        results[model_name] = {
            'rmse': np.mean(rmse_scores) if rmse_scores else float('inf'),
            'precision@10': np.mean(precision_scores) if precision_scores else 0.0,
            'recall@10': np.mean(recall_scores) if recall_scores else 0.0,
            'novelty': np.mean(novelty_scores) if novelty_scores else 0.0,
            'diversity': np.mean(diversity_scores) if diversity_scores else 0.0,
            'serendipity': np.mean(serendipity_scores) if serendipity_scores else 0.0,
            'users_tested': len(rmse_scores)
        }
    
    return results


def display_results(results, title="Results"):
    # Display evaluation results table
    print(f"\n{title}")
    print("=" * 100)
    print(f"{'Model':<25} | {'RMSE':<8} | {'Prec@10':<8} | {'Rec@10':<8} | {'Novelty':<7} | {'Diversity':<7} | {'Serendipity':<7}")
    print("-" * 100)
    
    # Sort by precision@10 for easier comparison
    sorted_models = sorted(results.items(), key=lambda x: x[1]['precision@10'], reverse=True)
    
    for model_name, metrics in sorted_models:
        rmse = metrics['rmse']
        rmse_str = f"{rmse:.3f}" if rmse != float('inf') else "N/A"
        
        print(f"{model_name:<25} | {rmse_str:<8} | {metrics['precision@10']:<8.3f} | "
              f"{metrics['recall@10']:<8.3f} | {metrics['novelty']:<7.2f} | "
              f"{metrics['diversity']:<7.3f} | {metrics['serendipity']:<7.3f}")


# TRANSFER LEARNING 

def run_transfer_learning():
    # Run complete transfer learning analysis
    print("Transfer Learning Analysis")
    print("=" * 40)
    
    # Load both domains
    movies_data = load_dataset('movies', max_items=2000, max_ratings=200000)
    books_data = load_dataset('books', max_items=1500, max_ratings=150000)
    
    # Create simulated cross-domain user overlap
    movies_transfer, books_transfer = create_realistic_cross_domain_overlap(
        movies_data, books_data, min_overlap_users=100
    )
    
    # Train models on both domains
    print("\nTraining models...")
    movies_models = train_models(movies_transfer, 'movies')
    books_models = train_models(books_transfer, 'books')
    
    # Evaluate native performance on cross-domain users only
    print("\nEvaluating native performance on cross-domain users...")
    movies_cross_ratings = movies_transfer['ratings'][
        movies_transfer['ratings']['userId'].isin(movies_transfer['cross_domain_users'])
    ]
    books_cross_ratings = books_transfer['ratings'][
        books_transfer['ratings']['userId'].isin(books_transfer['cross_domain_users'])
    ]
    
    movies_train, movies_test = train_test_split(movies_cross_ratings, test_size=0.2, random_state=42)
    books_train, books_test = train_test_split(books_cross_ratings, test_size=0.2, random_state=42)
    
    movies_native = evaluate_models(movies_models, movies_train, movies_test, movies_transfer['movies'], 60)
    books_native = evaluate_models(books_models, books_train, books_test, books_transfer['movies'], 60)
    
    # Evaluate transfer: apply source domain models to target domain
    print("\nEvaluating transfer learning...")
    movies_to_books = evaluate_proper_transfer_fixed(
        movies_models, movies_transfer, books_transfer, 'movies_to_books', precision_at_k
    )
    books_to_movies = evaluate_proper_transfer_fixed(
        books_models, books_transfer, movies_transfer, 'books_to_movies', precision_at_k
    )
    
    # Display transfer results with retention percentages
    display_transfer_results(movies_native, books_native, movies_to_books, books_to_movies)
    
    return {
        'movies_native': movies_native,
        'books_native': books_native,
        'movies_to_books': movies_to_books,
        'books_to_movies': books_to_movies
    }


# MAIN PIPELINE 

def run_complete_pipeline(dataset_type='movies'):
    # Run complete training and evaluation pipeline for one dataset
    print(f"Running pipeline for {dataset_type}...")
    
    # Load data
    data_dict = load_dataset(dataset_type, max_items=3000, max_ratings=300000)
    
    # Train all models
    models = train_models(data_dict, dataset_type)
    
    # Save trained models for later use (commented out for space preservation)
    # os.makedirs("Models", exist_ok=True)
    # with open(f"Models/models_{dataset_type}.pkl", 'wb') as f:
        # pickle.dump(models, f)
    
    # Evaluate on test set
    train_data, test_data = train_test_split(data_dict['ratings'], test_size=0.2, random_state=42)
    
    # Sanity check: print single prediction from each model
    print("\nModel Prediction Sanity Check:")
    test_user = test_data['userId'].iloc[0]
    test_item = test_data['movieId'].iloc[0]
    
    for name, model in models.items():
        try:
            pred = model.predict(test_user, test_item)
            print(f"{name}: {pred:.3f}")
        except Exception as e:
            print(f"{name}: ERROR - {e}")
    
    results = evaluate_models(models, train_data, test_data, data_dict['movies'])
    
    # Display results
    display_results(results, f"{dataset_type.title()} Results")
    
    return models, results


def run_cross_domain_comparison():
    #Compare performance across movies and books domains
    print("Cross-Domain Comparison")
    print("=" * 40)
    
    # Run full pipeline for both domains
    movies_models, movies_results = run_complete_pipeline('movies')
    books_models, books_results = run_complete_pipeline('books')
    
    # Find best model in each domain
    movies_best = max(movies_results.items(), key=lambda x: x[1]['precision@10'])
    books_best = max(books_results.items(), key=lambda x: x[1]['precision@10'])
    
    print(f"\nBEST MODELS:")
    print(f"Movies: {movies_best[0]} (P@10: {movies_best[1]['precision@10']:.3f})")
    print(f"Books:  {books_best[0]} (P@10: {books_best[1]['precision@10']:.3f})")
    
    # Generate plots for the paper
    print("\nGenerating plots...")
    create_thesis_plots(movies_results, books_results)
    create_simple_comparison_plot(movies_results, books_results)
    
    return {'movies': movies_results, 'books': books_results}


# SIMPLE MENU 

def run_menu():
    # Interactive menu for running different experiments
    print("\nDomain-Dependent Hybrid Recommender")
    print("=" * 50)
    print("1. Movies pipeline")
    print("2. Books pipeline")
    print("3. Cross-domain comparison")
    print("4. Transfer learning")
    print("0. Exit")
    
    choice = input("\nChoice: ").strip()
    
    if choice == '1':
        return run_complete_pipeline('movies')
    elif choice == '2':
        return run_complete_pipeline('books')
    elif choice == '3':
        return run_cross_domain_comparison()
    elif choice == '4':
        return run_transfer_learning()
    elif choice == '0':
        print("Done!")
        return None
    else:
        print("Invalid choice")
        return None


if __name__ == "__main__":
    print("\nReady! Call run_menu() to start")
    run_menu()    

Domain-Dependent Hybrid Recommender System

Ready! Call run_menu() to start

Domain-Dependent Hybrid Recommender
1. Movies pipeline
2. Books pipeline
3. Cross-domain comparison
4. Transfer learning
0. Exit
Transfer Learning Analysis
Loading movies dataset...
Final: 108,005 ratings, 1,996 movies, 22,644 users
Loading books dataset...
Final: 52,045 ratings, 1,491 books, 13,915 users
Creating realistic cross-domain overlap...
  Threshold 3: Movies=22644, Books=13915
Target overlap: 6957 users
Created 6957 cross-domain users

Training models...
Training models for movies...
Training SVD...
Users: 22,571, Items: 1,994
SVD training complete
Training ItemCF...
ItemCF training complete
Training Content-based model...
Created 17199 user profiles
Training complete: 5 models
Training models for books...
Training SVD...
Users: 13,852, Items: 1,500
SVD training complete
Training ItemCF...
ItemCF training complete
Training Content-based model...
Created 9202 user profiles
Training complete: 5 models

In [None]:
run_menu()


Domain-Dependent Hybrid Recommender
1. Movies pipeline
2. Books pipeline
3. Cross-domain comparison
4. Transfer learning
0. Exit
Running pipeline for books...
Loading books dataset...
Final: 210,863 ratings, 2,980 books, 40,079 users
Training models for books...
Training SVD...
Users: 39,993, Items: 3,000
SVD training complete
Training ItemCF...
ItemCF training complete
Training Content-based model...
Created 33496 user profiles
Training complete: 5 models

Model Prediction Sanity Check:
svd_books: 4.119
itemcf_books: 4.000
content_books: 4.000
itemcf_content_books: 4.000
svd_itemcf_books: 4.036
Evaluating 5 models with 200 users...
Testing svd_books...
Testing itemcf_books...
Testing content_books...
Testing itemcf_content_books...
Testing svd_itemcf_books...

Books Results
Model                     | RMSE     | P@10     | R@10     | Novel   | Divers  | Serend 
----------------------------------------------------------------------------------------------------
content_books          

({'svd_books': <__main__.ImprovedSVDRecommender at 0x19878aad000>,
  'itemcf_books': <__main__.ItemBasedCF at 0x19878aacf70>,
  'content_books': <__main__.ContentBasedRecommender at 0x19878aada80>,
  'itemcf_content_books': <__main__.HybridRecommender at 0x19878aaed70>,
  'svd_itemcf_books': <__main__.HybridRecommender at 0x19878aafac0>},
 {'svd_books': {'rmse': np.float64(0.9299164993938026),
   'precision@10': np.float64(0.058024691358024696),
   'recall@10': np.float64(0.15462962962962964),
   'novelty': np.float64(6.547748831576749),
   'diversity': np.float64(0.7515024786421108),
   'serendipity': np.float64(0.47863696897348773),
   'users_tested': 162},
  'itemcf_books': {'rmse': np.float64(0.9186402612359683),
   'precision@10': np.float64(0.09506172839506172),
   'recall@10': np.float64(0.2598765432098765),
   'novelty': np.float64(5.914783038518696),
   'diversity': np.float64(0.5579211387065663),
   'serendipity': np.float64(0.5696180151738128),
   'users_tested': 162},
  'co