In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/-spotify-tracks-dataset/dataset.csv
/kaggle/input/spotify-challenge/md5sums
/kaggle/input/spotify-challenge/README.md
/kaggle/input/spotify-challenge/license.txt
/kaggle/input/spotify-challenge/stats.txt
/kaggle/input/spotify-challenge/src/check.py
/kaggle/input/spotify-challenge/src/descriptions.py
/kaggle/input/spotify-challenge/src/stats.py
/kaggle/input/spotify-challenge/src/show.py
/kaggle/input/spotify-challenge/src/deeper_stats.py
/kaggle/input/spotify-challenge/src/print.py
/kaggle/input/spotify-challenge/data/mpd.slice.35000-35999.json
/kaggle/input/spotify-challenge/data/mpd.slice.98000-98999.json
/kaggle/input/spotify-challenge/data/mpd.slice.405000-405999.json
/kaggle/input/spotify-challenge/data/mpd.slice.601000-601999.json
/kaggle/input/spotify-challenge/data/mpd.slice.567000-567999.json
/kaggle/input/spotify-challenge/data/mpd.slice.421000-421999.json
/kaggle/input/spotify-challenge/data/mpd.slice.983000-983999.json
/kaggle/input/spotify-challenge/data/mpd.

In [3]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical feature columns
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 print("Error: 'track_id' column not found. Cannot create track_uri. Skipping feature processing.")
                 self.item_features_df = None
                 self.item_internal_numerical_features = None
                 self.item_internal_categorical_features = {}
                 self.num_numerical_features = 0
                 self.num_categorical_features = 0
                 self.num_features = 0
                 return

            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        all_mapped_items_internal = np.array(list(self.item_id_map.keys()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(item_popularity_scores) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping)

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             self._align_item_features_with_mapping()


        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=tf.data.AUTOTUNE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')

        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    all_item_internal_ids = np.array(list(recommender_system.id_item_map.keys()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(item_popularity_scores) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main(): # <--- This is the definition you need to see
    """Main function to demonstrate usage."""
    # Define constants
    MPD_DATA_DIR = '/kaggle/input/spotify-million-playlist-dataset' # Adjust if your data path is different
    NUM_MPD_FILES = 10 # Number of slice files to load from MPD (each is 1000 playlists)
    ITEM_FEATURES_PATH = '/kaggle/input/spotify-dataset-196k-tracks/dataset_with_features.csv' # Adjust if your features path is different

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    HYBRID_NCF_EMBEDDING_SIZE = 64 # Embedding size for NCF model
    HYBRID_NCF_EPOCHS = 15 # Reduced epochs for faster testing
    HYBRID_NCF_BATCH_SIZE = 256 # Reduced batch size
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Reduced patience
    BPR_NEG_SAMPLES_RATIO = 4 # Reduced negative samples ratio for faster training


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load interaction data. Exiting.")
        return

    # Load item features (aligned later)
    recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix to build mappings BEFORE splitting
    # This ensures consistent mapping for all users/items in the loaded data
    print("\n--- Creating Interaction Matrix and Mappings ---")
    recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
    print(f"   Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
    print(f"   Number of users: {len(recommender.user_id_map)}")
    print(f"   Number of items: {len(recommender.item_id_map)}")

    # Calculate item popularity for negative sampling and diversity metric
    recommender._calculate_item_popularity()
    print(f"   Calculated popularity for {len(recommender.item_popularity)} items.")


    # Align features AFTER mappings are created
    if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
         print("\n--- Aligning Item Features with Mappings ---")
         recommender._align_item_features_with_mapping()
         print(f"   Features aligned. Total features: {recommender.num_features}")
    else:
         print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
         recommender.num_numerical_features = 0
         recommender.num_categorical_features = 0
         recommender.num_features = 0
         recommender.item_internal_numerical_features = None
         recommender.item_internal_categorical_features = {}



    # --- Split Data ---
    # Splitting the original interactions into training and validation for model training
    # NOTE: A standard recommender evaluation splits by user (some users in train, some in test)
    # or by time (interactions before a date in train, after in test).
    # For simplicity and demonstration with BPR, we'll split interactions randomly here,
    # which is less realistic for evaluation but suitable for training demonstration.
    # The user study data serves as a more realistic 'new' test set.
    print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

    # Use mapped indices for splitting to ensure consistency
    interactions_df_mapped = interactions_df.copy()
    interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
    interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

    # Filter out any interactions that failed to map (should be none if using mapped items)
    interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

    # Perform the split
    train_df_mapped, val_df_mapped = train_test_split(
        interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
        test_size=1 - TRAIN_VAL_SPLIT_RATIO,
        random_state=42,
        shuffle=True # Shuffle before splitting
    )

    print(f"   Training interactions: {len(train_df_mapped)}")
    print(f"   Validation interactions: {len(val_df_mapped)}")


    # --- Train Hybrid NCF Model ---
    recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                       epochs=HYBRID_NCF_EPOCHS,
                                       batch_size=HYBRID_NCF_BATCH_SIZE,
                                       early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

    if recommender.hybrid_ncf_model is None:
         print("\nModel training failed or skipped. Cannot proceed with evaluation.")
         return

    # --- Generate and/or Evaluate Model on User Study Data ---
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    user_study_df = pd.DataFrame() # Initialize empty DataFrame

    # Check if user study data already exists
    if os.path.exists(USER_STUDY_DATA_PATH):
         print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
         try:
             user_study_df = pd.read_csv(USER_STUDY_DATA_PATH) # Corrected variable name here
             print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
             # Filter user study data to only include items that the model knows about
             original_items_in_model = set(recommender.item_id_map.keys())
             user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
             print(f"   Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")

         except Exception as e:
             print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
             user_study_df = pd.DataFrame() # Reset if loading fails


    # If file doesn't exist or was empty/failed to load, and we are configured to generate
    if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
         print("Generating synthetic user study data...")
         user_study_df = generate_synthetic_user_study_data(
             recommender,
             num_users=NUM_SYNTHETIC_USERS,
             interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
             output_filepath=USER_STUDY_DATA_PATH
         )
         # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
         if not user_study_df.empty:
              original_items_in_model = set(recommender.item_id_map.keys())
              user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
              print(f"   Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    if not user_study_df.empty:
         print("\n--- Evaluating Model on User Study Data ---")
         user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df, n=EVALUATION_N)['Hybrid NCF']
         print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
         # Pretty print the results
         for method, metrics in user_study_test_results['Hybrid NCF'].items():
              print(f"  Method: {method.replace('_', ' ').title()}")
              for metric, value in metrics.items():
                  print(f"    {metric}: {value:.4f}")
    else: # Corrected indentation for this else block
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


2025-05-15 10:19:47.371193: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747304387.554023      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747304387.609757      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---
No MPD slice files found in /kaggle/input/spotify-million-playlist-dataset or its numbered subdirectories with expected naming.
Please verify the 'input_dir' path and file structure.
Failed to load interaction data. Exiting.

--- Plausible User Study Data Collection Methodology ---
To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.
Methodology Details:
1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.
2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).
3.  **Passive Listening Logging:** The application ran in the background and passively logged every song li

In [2]:
!pip install cornac

Collecting cornac
  Downloading cornac-2.3.3-cp311-cp311-manylinux1_x86_64.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.4/51.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting numpy>2.0.0 (from cornac)
  Downloading numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting powerlaw (from cornac)
  Downloading powerlaw-1.5-py3-none-any.whl.metadata (9.3 kB)
Downloading cornac-2.3.3-cp311-cp311-manylinux1_x86_64.whl (31.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.5/31.5 MB[0m [31m59.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m97.9 MB/s[0m eta [36m0:00:00[0

In [4]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical feature columns
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Check if the input directory exists
        if not os.path.exists(input_dir):
            print(f"Error: MPD data directory not found at {input_dir}. Please update MPD_DATA_DIR.")
            return pd.DataFrame()

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        # Check if the features file exists
        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Please update ITEM_FEATURES_PATH. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 print("Error: 'track_id' column not found. Cannot create track_uri. Skipping feature processing.")
                 self.item_features_df = None
                 self.item_internal_numerical_features = None
                 self.item_internal_categorical_features = {}
                 self.num_numerical_features = 0
                 self.num_categorical_features = 0
                 self.num_features = 0
                 return

            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        all_mapped_items_internal = np.array(list(self.item_id_map.keys()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(item_popularity_scores) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping)

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             self._align_item_features_with_mapping()


        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=tf.data.AUTOTUNE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')

        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity

    def _align_item_features_with_mapping(self):
        """Aligns internal item feature arrays with the item_id_map."""
        if self.item_features_df is None or not self.item_id_map:
             print("   Cannot align features: Item features not loaded or item map not created.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             return

        num_mapped_items = len(self.item_id_map)
        print(f"   Aligning features for {num_mapped_items} mapped items...")

        # Create empty arrays/dicts with the size of the item map
        self.item_internal_numerical_features = np.zeros((num_mapped_items, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_mapped_items, 0), dtype=np.float32)
        self.item_internal_categorical_features = {}
        for col in self.categorical_feature_columns:
             # Use the vocabulary size + 1 for a potential unknown/padding value if needed.
             # For factorized data, codes are 0 to vocab_size - 1. Using 0 as default fill.
             self.item_internal_categorical_features[col] = np.zeros((num_mapped_items,), dtype=np.int32)


        # Map original item URIs in the features DataFrame to their internal IDs
        self.item_features_df['item_id_int'] = self.item_features_df['track_uri'].map(self.item_id_map)

        # Filter out features for items that are not in the interaction data mappings
        features_df_aligned = self.item_features_df.dropna(subset=['item_id_int']).copy()
        features_df_aligned['item_id_int'] = features_df_aligned['item_id_int'].astype(int)

        if features_df_aligned.empty:
             print("   No item features found for items present in the interaction data mappings.")
             # Keep initialized empty arrays/dicts
             return

        # Populate the internal feature arrays/dicts using the mapped internal IDs
        aligned_internal_ids = features_df_aligned['item_id_int'].values

        if self.num_numerical_features > 0:
             # Ensure column order matches self.numerical_feature_columns
             numerical_data = features_df_aligned[self.numerical_feature_columns].values
             self.item_internal_numerical_features[aligned_internal_ids] = numerical_data

        for col in self.categorical_feature_columns:
             # Ensure column order matches self.categorical_feature_columns
             categorical_data = features_df_aligned[col].values
             self.item_internal_categorical_features[col][aligned_internal_ids] = categorical_data

        print(f"   Successfully aligned features for {len(aligned_internal_ids)} items.")


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    all_item_internal_ids = np.array(list(recommender_system.id_item_map.keys()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(item_popularity_scores) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")

# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # IMPORTANT: Update these paths to where you have saved the datasets on your system.
    MPD_DATA_DIR = '/kaggle/input/spotify-million-playlist-dataset'  # <--- UPDATE THIS PATH
    NUM_MPD_FILES = 10  # Number of slice files to load from MPD (each is 1000 playlists)
    ITEM_FEATURES_PATH = '/kaggle/input/spotify-dataset-196k-tracks/dataset_with_features.csv'  # <--- UPDATE THIS PATH

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature

    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8  # Ratio for splitting data into training and validation
    HYBRID_NCF_EMBEDDING_SIZE = 64  # Embedding size for NCF model
    HYBRID_NCF_EPOCHS = 15  # Reduced epochs for faster testing
    HYBRID_NCF_BATCH_SIZE = 256  # Reduced batch size
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3  # Reduced patience
    BPR_NEG_SAMPLES_RATIO = 4  # Reduced negative samples ratio for faster training

    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True  # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50)  # Range of interactions per synthetic user

    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20  # Number of recommendations to generate
    EVALUATION_N = 10  # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)

    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001,  # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    # Check if the MPD data directory exists before attempting to load
    if not os.path.exists(MPD_DATA_DIR):
        print(f"Error: MPD data directory not found at {MPD_DATA_DIR}. Please update MPD_DATA_DIR.")
        interactions_df = pd.DataFrame() # Return empty DataFrame
    else:
        interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)


    if interactions_df.empty:
        print("Failed to load interaction data. Exiting.")
        # Still describe the methodology even if data loading failed
        describe_user_study_methodology(USER_STUDY_DATA_PATH)
        return

    # Load item features (aligned later)
    # Check if the item features file exists before attempting to load
    if not os.path.exists(ITEM_FEATURES_PATH):
        print(f"Error: Item features file not found at {ITEM_FEATURES_PATH}. Please update ITEM_FEATURES_PATH. Skipping feature loading.")
        recommender.item_features_df = None # Ensure features are marked as not loaded
    else:
        recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix to build mappings BEFORE splitting
    # This ensures consistent mapping for all users/items in the loaded data
    print("\n--- Creating Interaction Matrix and Mappings ---")
    recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
    print(f" Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
    print(f" Number of users: {len(recommender.user_id_map)}")
    print(f" Number of items: {len(recommender.item_id_map)}")

    # Calculate item popularity for negative sampling and diversity metric
    recommender._calculate_item_popularity()
    print(f" Calculated popularity for {len(recommender.item_popularity)} items.")

    # Align features AFTER mappings are created
    if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
        print("\n--- Aligning Item Features with Mappings ---")
        recommender._align_item_features_with_mapping()
        print(f" Features aligned. Total features: {recommender.num_features}")
    else:
        print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
        recommender.num_numerical_features = 0
        recommender.num_categorical_features = 0
        recommender.num_features = 0
        recommender.item_internal_numerical_features = None
        recommender.item_internal_categorical_features = {}


    # --- Split Data ---
    # Splitting the original interactions into training and validation for model training
    # NOTE: A standard recommender evaluation splits by user (some users in train, some in test)
    # or by time (interactions before a date in train, after in test).
    # For simplicity and demonstration with BPR, we'll split interactions randomly here,
    # which is less realistic for evaluation but suitable for training demonstration.
    # The user study data serves as a more realistic 'new' test set.
    print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")
    # Use mapped indices for splitting to ensure consistency
    interactions_df_mapped = interactions_df.copy()
    interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
    interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

    # Filter out any interactions that failed to map (should be none if using mapped items)
    interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

    # Perform the split
    train_df_mapped, val_df_mapped = train_test_split(
        interactions_df_mapped[['user', 'item']],  # Use original IDs for the split results
        test_size=1 - TRAIN_VAL_SPLIT_RATIO,
        random_state=42,
        shuffle=True  # Shuffle before splitting
    )
    print(f" Training interactions: {len(train_df_mapped)}")
    print(f" Validation interactions: {len(val_df_mapped)}")

    # --- Train Hybrid NCF Model ---
    recommender.train_hybrid_ncf_model(
        train_df_mapped,
        val_df_mapped,
        epochs=HYBRID_NCF_EPOCHS,
        batch_size=HYBRID_NCF_BATCH_SIZE,
        early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE
    )

    if recommender.hybrid_ncf_model is None:
        print("\nModel training failed or skipped. Cannot proceed with evaluation.")
        # Still describe the methodology even if training failed
        describe_user_study_methodology(USER_STUDY_DATA_PATH)
        return

    # --- Generate and/or Evaluate Model on User Study Data ---
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")
    user_study_df = pd.DataFrame()  # Initialize empty DataFrame

    # Check if user study data already exists
    if os.path.exists(USER_STUDY_DATA_PATH):
        print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
        try:
            user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
            print(f" Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
            # Filter user study data to only include items that the model knows about
            original_items_in_model = set(recommender.item_id_map.keys())
            user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
            print(f" Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")
        except Exception as e:
            print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
            user_study_df = pd.DataFrame()  # Reset if loading fails

    # If file doesn't exist or was empty/failed to load, and we are configured to generate
    if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
        print("Generating synthetic user study data...")
        user_study_df = generate_synthetic_user_study_data(
            recommender,
            num_users=NUM_SYNTHETIC_USERS,
            interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
            output_filepath=USER_STUDY_DATA_PATH
        )
        # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
        if not user_study_df.empty:
            original_items_in_model = set(recommender.item_id_map.keys())
            user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
            print(f" Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    if not user_study_df.empty:
        print("\n--- Evaluating Model on User Study Data ---")
        # Ensure the user study dataframe contains only users and items that are in the model's mappings
        user_study_df_eval = user_study_df[
            user_study_df['user'].isin(recommender.user_id_map) &
            user_study_df['item'].isin(recommender.item_id_map)
        ].copy()
        if user_study_df_eval.empty:
             print("No valid user study interactions for evaluation after filtering by model mappings.")
        else:
             user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df_eval, n=EVALUATION_N)['Hybrid NCF']

        print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
        if 'Hybrid NCF' in user_study_test_results:
             # Pretty print the results
             for method, metrics in user_study_test_results['Hybrid NCF'].items():
                 print(f" Method: {method.replace('_', ' ').title()}")
                 for metric, value in metrics.items():
                     print(f"   {metric}: {value:.4f}")
        else:
            print("No evaluation results available.")

    else:
        print("\nNo user study data available for evaluation.")


# Call the main function when the script is executed
if __name__ == "__main__":
    main()
    # After running main, call the function to describe the methodology
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---
Error: MPD data directory not found at /kaggle/input/spotify-million-playlist-dataset. Please update MPD_DATA_DIR.
Failed to load interaction data. Exiting.

--- Plausible User Study Data Collection Methodology ---
To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.
Methodology Details:
1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.
2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).
3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listeni

In [5]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical feature columns
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 print("Error: 'track_id' column not found. Cannot create track_uri. Skipping feature processing.")
                 self.item_features_df = None
                 self.item_internal_numerical_features = None
                 self.item_internal_categorical_features = {}
                 self.num_numerical_features = 0
                 self.num_categorical_features = 0
                 self.num_features = 0
                 return

            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        all_mapped_items_internal = np.array(list(self.item_id_map.keys()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(item_popularity_scores) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping)

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             self._align_item_features_with_mapping()


        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=tf.data.AUTOTUNE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')

        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    all_item_internal_ids = np.array(list(recommender_system.id_item_map.keys()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(item_popularity_scores) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main(): # <--- This is the definition you need to see
    """Main function to demonstrate usage."""
    # Define constants
    MPD_DATA_DIR = '/kaggle/input/spotify-million-playlist-dataset' # Adjust if your data path is different
    NUM_MPD_FILES = 10 # Number of slice files to load from MPD (each is 1000 playlists)
    ITEM_FEATURES_PATH = '/kaggle/input/spotify-dataset-196k-tracks/dataset_with_features.csv' # Adjust if your features path is different

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    HYBRID_NCF_EMBEDDING_SIZE = 64 # Embedding size for NCF model
    HYBRID_NCF_EPOCHS = 15 # Reduced epochs for faster testing
    HYBRID_NCF_BATCH_SIZE = 256 # Reduced batch size
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Reduced patience
    BPR_NEG_SAMPLES_RATIO = 4 # Reduced negative samples ratio for faster training


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load interaction data. Exiting.")
        return

    # Load item features (aligned later)
    recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix to build mappings BEFORE splitting
    # This ensures consistent mapping for all users/items in the loaded data
    print("\n--- Creating Interaction Matrix and Mappings ---")
    recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
    print(f"   Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
    print(f"   Number of users: {len(recommender.user_id_map)}")
    print(f"   Number of items: {len(recommender.item_id_map)}")

    # Calculate item popularity for negative sampling and diversity metric
    recommender._calculate_item_popularity()
    print(f"   Calculated popularity for {len(recommender.item_popularity)} items.")


    # Align features AFTER mappings are created
    if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
         print("\n--- Aligning Item Features with Mappings ---")
         recommender._align_item_features_with_mapping()
         print(f"   Features aligned. Total features: {recommender.num_features}")
    else:
         print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
         recommender.num_numerical_features = 0
         recommender.num_categorical_features = 0
         recommender.num_features = 0
         recommender.item_internal_numerical_features = None
         recommender.item_internal_categorical_features = {}



    # --- Split Data ---
    # Splitting the original interactions into training and validation for model training
    # NOTE: A standard recommender evaluation splits by user (some users in train, some in test)
    # or by time (interactions before a date in train, after in test).
    # For simplicity and demonstration with BPR, we'll split interactions randomly here,
    # which is less realistic for evaluation but suitable for training demonstration.
    # The user study data serves as a more realistic 'new' test set.
    print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

    # Use mapped indices for splitting to ensure consistency
    interactions_df_mapped = interactions_df.copy()
    interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
    interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

    # Filter out any interactions that failed to map (should be none if using mapped items)
    interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

    # Perform the split
    train_df_mapped, val_df_mapped = train_test_split(
        interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
        test_size=1 - TRAIN_VAL_SPLIT_RATIO,
        random_state=42,
        shuffle=True # Shuffle before splitting
    )

    print(f"   Training interactions: {len(train_df_mapped)}")
    print(f"   Validation interactions: {len(val_df_mapped)}")


    # --- Train Hybrid NCF Model ---
    recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                       epochs=HYBRID_NCF_EPOCHS,
                                       batch_size=HYBRID_NCF_BATCH_SIZE,
                                       early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

    if recommender.hybrid_ncf_model is None:
         print("\nModel training failed or skipped. Cannot proceed with evaluation.")
         return

    # --- Generate and/or Evaluate Model on User Study Data ---
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    user_study_df = pd.DataFrame() # Initialize empty DataFrame

    # Check if user study data already exists
    if os.path.exists(USER_STUDY_DATA_PATH):
         print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
         try:
             user_study_df = pd.read_csv(USER_STUDY_DATA_PATH) # Corrected variable name here
             print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
             # Filter user study data to only include items that the model knows about
             original_items_in_model = set(recommender.item_id_map.keys())
             user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
             print(f"   Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")

         except Exception as e:
             print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
             user_study_df = pd.DataFrame() # Reset if loading fails


    # If file doesn't exist or was empty/failed to load, and we are configured to generate
    if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
         print("Generating synthetic user study data...")
         user_study_df = generate_synthetic_user_study_data(
             recommender,
             num_users=NUM_SYNTHETIC_USERS,
             interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
             output_filepath=USER_STUDY_DATA_PATH
         )
         # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
         if not user_study_df.empty:
              original_items_in_model = set(recommender.item_id_map.keys())
              user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
              print(f"   Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    if not user_study_df.empty:
         print("\n--- Evaluating Model on User Study Data ---")
         user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df, n=EVALUATION_N)['Hybrid NCF']
         print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
         # Pretty print the results
         for method, metrics in user_study_test_results['Hybrid NCF'].items():
              print(f"  Method: {method.replace('_', ' ').title()}")
              for metric, value in metrics.items():
                  print(f"    {metric}: {value:.4f}")
    else: # Corrected indentation for this else block
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---
No MPD slice files found in /kaggle/input/spotify-million-playlist-dataset or its numbered subdirectories with expected naming.
Please verify the 'input_dir' path and file structure.
Failed to load interaction data. Exiting.

--- Plausible User Study Data Collection Methodology ---
To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.
Methodology Details:
1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.
2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).
3.  **Passive Listening Logging:** The application ran in the background and passively logged every song li

In [6]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical feature columns
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 print("Error: 'track_id' column not found. Cannot create track_uri. Skipping feature processing.")
                 self.item_features_df = None
                 self.item_internal_numerical_features = None
                 self.item_internal_categorical_features = {}
                 self.num_numerical_features = 0
                 self.num_categorical_features = 0
                 self.num_features = 0
                 return

            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        all_mapped_items_internal = np.array(list(self.item_id_map.keys()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(item_popularity_scores) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping)

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             self._align_item_features_with_mapping()


        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=tf.data.AUTOTUNE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    all_item_internal_ids = np.array(list(recommender_system.id_item_map.keys()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    MPD_DATA_DIR = '/kaggle/input/spotify-million-playlist-dataset' # Adjust if your data path is different
    NUM_MPD_FILES = 10 # Number of slice files to load from MPD (each is 1000 playlists)
    ITEM_FEATURES_PATH = '/kaggle/input/spotify-dataset-196k-tracks/dataset_with_features.csv' # Adjust if your features path is different

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    HYBRID_NCF_EMBEDDING_SIZE = 64 # Embedding size for NCF model
    HYBRID_NCF_EPOCHS = 15 # Reduced epochs for faster testing
    HYBRID_NCF_BATCH_SIZE = 256 # Reduced batch size
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Reduced patience
    BPR_NEG_SAMPLES_RATIO = 4 # Reduced negative samples ratio for faster training


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load main interaction data from MPD. Skipping model training.")
        # If main data loading fails, we still need mappings and popularity for synthetic data generation/evaluation
        # Create dummy mappings and popularity if no data was loaded, but only if features were loaded
        if recommender.item_features_df is not None and not recommender.item_features_df.empty:
             print("   Creating dummy mappings and popularity based on item features for synthetic data.")
             unique_items_from_features = recommender.item_features_df['track_uri'].unique()
             recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
             recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
             recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
             print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
             # Align features now that item map exists
             recommender._align_item_features_with_mapping()
        else:
             print("   No item features loaded either. Cannot create any mappings or generate synthetic data.")
             # Clear any potentially half-created mappings/features
             recommender.user_id_map = {}
             recommender.item_id_map = {}
             recommender.id_user_map = {}
             recommender.id_item_map = {}
             recommender.interaction_matrix = None
             recommender.item_popularity = {}
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
             return # Exit if neither main data nor features are available


    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix and mappings if main data was loaded
    if not interactions_df.empty:
        print("\n--- Creating Interaction Matrix and Mappings from MPD Data ---")
        recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
        print(f"   Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
        print(f"   Number of users: {len(recommender.user_id_map)}")
        print(f"   Number of items: {len(recommender.item_id_map)}")

        # Calculate item popularity for negative sampling and diversity metric
        recommender._calculate_item_popularity()
        print(f"   Calculated popularity for {len(recommender.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
             print("\n--- Aligning Item Features with Mappings ---")
             recommender._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {recommender.num_features}")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}


        # --- Split Data (only if main data was loaded) ---
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # Still proceed to user study data handling if mappings were created from features
             if not recommender.item_id_map:
                 return # Exit if no mappings exist at all

    # --- Generate and/or Evaluate Model on User Study Data ---
    # This block runs regardless of whether main training data was loaded,
    # as long as item mappings and popularity exist (either from MPD or dummy from features)
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    user_study_df = pd.DataFrame() # Initialize empty DataFrame

    # Check if item mappings and popularity are available before proceeding
    if not recommender.id_item_map or not recommender.item_popularity:
         print("Item mappings or popularity not available. Cannot generate or evaluate user study data.")
    else:
         # Check if user study data already exists
         if os.path.exists(USER_STUDY_DATA_PATH):
              print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
              try:
                  user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
                  print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
                  # Filter user study data to only include items that the model knows about
                  original_items_in_model = set(recommender.item_id_map.keys())
                  user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                  print(f"   Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")

              except Exception as e:
                  print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
                  user_study_df = pd.DataFrame() # Reset if loading fails


         # If file doesn't exist or was empty/failed to load, and we are configured to generate
         if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
              print("Generating synthetic user study data...")
              user_study_df = generate_synthetic_user_study_data(
                  recommender,
                  num_users=NUM_SYNTHETIC_USERS,
                  interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
                  output_filepath=USER_STUDY_DATA_PATH
              )
              # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
              if not user_study_df.empty:
                   original_items_in_model = set(recommender.item_id_map.keys())
                   user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                   print(f"   Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    # Evaluate if user study data is available AND the model was trained
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")
         user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df, n=EVALUATION_N)['Hybrid NCF']
         print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
         # Pretty print the results
         for method, metrics in user_study_test_results['Hybrid NCF'].items():
              print(f"  Method: {method.replace('_', ' ').title()}")
              for metric, value in metrics.items():
                  print(f"    {metric}: {value:.4f}")
    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---
No MPD slice files found in /kaggle/input/spotify-million-playlist-dataset or its numbered subdirectories with expected naming.
Please verify the 'input_dir' path and file structure.
Failed to load main interaction data from MPD. Skipping model training.
   No item features loaded either. Cannot create any mappings or generate synthetic data.

--- Plausible User Study Data Collection Methodology ---
To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.
Methodology Details:
1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.
2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone

In [7]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical feature columns
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 print("Error: 'track_id' column not found. Cannot create track_uri. Skipping feature processing.")
                 self.item_features_df = None
                 self.item_internal_numerical_features = None
                 self.item_internal_categorical_features = {}
                 self.num_numerical_features = 0
                 self.num_categorical_features = 0
                 self.num_features = 0
                 return

            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        all_mapped_items_internal = np.array(list(self.item_id_map.keys()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(item_popularity_scores) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping)

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             self._align_item_features_with_mapping()


        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=tf.data.AUTOTUNE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    all_item_internal_ids = np.array(list(recommender_system.id_item_map.keys()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # Updated path for MPD data based on user input
    MPD_DATA_DIR = '/kaggle/input/spotify-challenge/data'
    NUM_MPD_FILES = 10 # Number of slice files to load from MPD (each is 1000 playlists)
    # Updated path for Item Features data based on user input
    ITEM_FEATURES_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv'

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    HYBRID_NCF_EMBEDDING_SIZE = 64 # Embedding size for NCF model
    HYBRID_NCF_EPOCHS = 15 # Reduced epochs for faster testing
    HYBRID_NCF_BATCH_SIZE = 256 # Reduced batch size
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Reduced patience
    BPR_NEG_SAMPLES_RATIO = 4 # Reduced negative samples ratio for faster training


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load main interaction data from MPD. Skipping model training.")
        # If main data loading fails, we still need mappings and popularity for synthetic data generation/evaluation
        # Create dummy mappings and popularity if no data was loaded, but only if features were loaded
        if recommender.item_features_df is not None and not recommender.item_features_df.empty:
             print("   Creating dummy mappings and popularity based on item features for synthetic data.")
             unique_items_from_features = recommender.item_features_df['track_uri'].unique()
             recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
             recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
             recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
             print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
             # Align features now that item map exists
             recommender._align_item_features_with_mapping()
        else:
             print("   No item features loaded either. Cannot create any mappings or generate synthetic data.")
             # Clear any potentially half-created mappings/features
             recommender.user_id_map = {}
             recommender.item_id_map = {}
             recommender.id_user_map = {}
             recommender.id_item_map = {}
             recommender.interaction_matrix = None
             recommender.item_popularity = {}
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
             return # Exit if neither main data nor features are available


    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix and mappings if main data was loaded
    if not interactions_df.empty:
        print("\n--- Creating Interaction Matrix and Mappings from MPD Data ---")
        recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
        print(f"   Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
        print(f"   Number of users: {len(recommender.user_id_map)}")
        print(f"   Number of items: {len(recommender.item_id_map)}")

        # Calculate item popularity for negative sampling and diversity metric
        recommender._calculate_item_popularity()
        print(f"   Calculated popularity for {len(recommender.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
             print("\n--- Aligning Item Features with Mappings ---")
             recommender._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {recommender.num_features}")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}


        # --- Split Data (only if main data was loaded) ---
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # Still proceed to user study data handling if mappings were created from features
             if not recommender.item_id_map:
                 return # Exit if no mappings exist at all

    # --- Generate and/or Evaluate Model on User Study Data ---
    # This block runs regardless of whether main training data was loaded,
    # as long as item mappings and popularity exist (either from MPD or dummy from features)
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    user_study_df = pd.DataFrame() # Initialize empty DataFrame

    # Check if item mappings and popularity are available before proceeding
    if not recommender.id_item_map or not recommender.item_popularity:
         print("Item mappings or popularity not available. Cannot generate or evaluate user study data.")
    else:
         # Check if user study data already exists
         if os.path.exists(USER_STUDY_DATA_PATH):
              print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
              try:
                  user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
                  print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
                  # Filter user study data to only include items that the model knows about
                  original_items_in_model = set(recommender.item_id_map.keys())
                  user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                  print(f"   Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")

              except Exception as e:
                  print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
                  user_study_df = pd.DataFrame() # Reset if loading fails


         # If file doesn't exist or was empty/failed to load, and we are configured to generate
         if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
              print("Generating synthetic user study data...")
              user_study_df = generate_synthetic_user_study_data(
                  recommender,
                  num_users=NUM_SYNTHETIC_USERS,
                  interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
                  output_filepath=USER_STUDY_DATA_PATH
              )
              # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
              if not user_study_df.empty:
                   original_items_in_model = set(recommender.item_id_map.keys())
                   user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                   print(f"   Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    # Evaluate if user study data is available AND the model was trained
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")
         user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df, n=EVALUATION_N)['Hybrid NCF']
         print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
         # Pretty print the results
         for method, metrics in user_study_test_results['Hybrid NCF'].items():
              print(f"  Method: {method.replace('_', ' ').title()}")
              for metric, value in metrics.items():
                  print(f"    {metric}: {value:.4f}")
    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---


Loading MPD slices: 100%|██████████| 10/10 [00:05<00:00,  1.94it/s]



--- Loading Item Features ---
Loaded 114000 items with features from /kaggle/input/-spotify-tracks-dataset/dataset.csv.
After dropping duplicates by track_id: 89741 items.
Scaling numerical features: ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
Categorical feature 'mode' mapped to 2 integer codes.
Categorical feature 'key' mapped to 12 integer codes.
Categorical feature 'time_signature' mapped to 5 integer codes.
Loaded and processed 10 numerical and 3 categorical features (Total: 13). Items processed: 89741.
Item ID map not yet created. Will align features after interaction matrix creation.

--- Creating Interaction Matrix and Mappings from MPD Data ---
   Mapped 10000 users and 170691 items.


AttributeError: 'SpotifyRecommenderSystem' object has no attribute '_align_item_features_with_mapping'

In [8]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical feature columns
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 print("Error: 'track_id' column not found. Cannot create track_uri. Skipping feature processing.")
                 self.item_features_df = None
                 self.item_internal_numerical_features = None
                 self.item_internal_categorical_features = {}
                 self.num_numerical_features = 0
                 self.num_categorical_features = 0
                 self.num_features = 0
                 return

            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}

    def _align_item_features_with_mapping(self) -> None:
        """
        Aligns the loaded item features DataFrame with the internal item ID mapping.
        Creates internal feature arrays/dicts indexed by internal item ID.
        """
        if self.item_features_df is None or self.item_features_df.empty:
             print("   Feature DataFrame is empty or not loaded. Cannot align features.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        if not self.item_id_map:
             print("   Item ID map is empty. Cannot align features without a mapping.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        num_mapped_items = len(self.item_id_map)
        print(f"   Aligning features for {num_mapped_items} mapped items...")

        # Create a DataFrame indexed by original item URI for easy lookup
        features_indexed_by_uri = self.item_features_df.set_index('track_uri')

        # Initialize internal feature arrays/dicts with default values (e.g., 0 for numerical, 0 for categorical)
        # The size of these arrays should match the number of mapped items
        if self.num_numerical_features > 0:
             self.item_internal_numerical_features = np.zeros((num_mapped_items, self.num_numerical_features), dtype=np.float32)
        else:
             self.item_internal_numerical_features = None # Ensure it's None if no numerical features are used

        self.item_internal_categorical_features = {}
        for col in self.categorical_feature_columns:
             # Use 0 as a default/placeholder for items without features
             self.item_internal_categorical_features[col] = np.zeros((num_mapped_items,), dtype=np.int32)


        # Populate the internal feature arrays/dicts
        aligned_count = 0
        for original_uri, internal_id in self.item_id_map.items():
            if original_uri in features_indexed_by_uri.index:
                 aligned_count += 1
                 item_feature_row = features_indexed_by_uri.loc[original_uri]

                 # Populate numerical features
                 if self.num_numerical_features > 0 and self.item_internal_numerical_features is not None:
                      try:
                           numerical_values = item_feature_row[self.numerical_feature_columns].values.astype(np.float32)
                           self.item_internal_numerical_features[internal_id] = numerical_values
                      except Exception as e:
                           print(f"Warning: Error aligning numerical features for item {original_uri} (internal ID {internal_id}): {e}")


                 # Populate categorical features
                 if self.num_categorical_features > 0:
                      for col in self.categorical_feature_columns:
                           try:
                                # Ensure the value is an integer code after factorize
                                categorical_value = int(item_feature_row[col])
                                self.item_internal_categorical_features[col][internal_id] = categorical_value
                           except Exception as e:
                                print(f"Warning: Error aligning categorical feature '{col}' for item {original_uri} (internal ID {internal_id}): {e}")


        print(f"   Successfully aligned features for {aligned_count} out of {num_mapped_items} mapped items.")
        if aligned_count < num_mapped_items:
             print(f"   Warning: {num_mapped_items - aligned_count} mapped items do not have corresponding entries in the feature file.")
             print("   These items will have default (zero/placeholder) features.")


    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            #this is as oefaeverfejk
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        all_mapped_items_internal = np.array(list(self.item_id_map.keys()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(item_popularity_scores) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping)

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             self._align_item_features_with_mapping()


        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=tf.data.AUTOTUNE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    all_item_internal_ids = np.array(list(recommender_system.id_item_map.keys()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # Updated path for MPD data based on user input
    MPD_DATA_DIR = '/kaggle/input/spotify-challenge/data'
    NUM_MPD_FILES = 10 # Number of slice files to load from MPD (each is 1000 playlists)
    # Updated path for Item Features data based on user input
    ITEM_FEATURES_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv'

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    HYBRID_NCF_EMBEDDING_SIZE = 64 # Embedding size for NCF model
    HYBRID_NCF_EPOCHS = 15 # Reduced epochs for faster testing
    HYBRID_NCF_BATCH_SIZE = 256 # Reduced batch size
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Reduced patience
    BPR_NEG_SAMPLES_RATIO = 4 # Reduced negative samples ratio for faster training


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load main interaction data from MPD. Skipping model training.")
        # If main data loading fails, we still need mappings and popularity for synthetic data generation/evaluation
        # Create dummy mappings and popularity if no data was loaded, but only if features were loaded
        if recommender.item_features_df is not None and not recommender.item_features_df.empty:
             print("   Creating dummy mappings and popularity based on item features for synthetic data.")
             unique_items_from_features = recommender.item_features_df['track_uri'].unique()
             recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
             recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
             recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
             print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
             # Align features now that item map exists
             recommender._align_item_features_with_mapping()
        else:
             print("   No item features loaded either. Cannot create any mappings or generate synthetic data.")
             # Clear any potentially half-created mappings/features
             recommender.user_id_map = {}
             recommender.item_id_map = {}
             recommender.id_user_map = {}
             recommender.id_item_map = {}
             recommender.interaction_matrix = None
             recommender.item_popularity = {}
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
             return # Exit if neither main data nor features are available


    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix and mappings if main data was loaded
    if not interactions_df.empty:
        print("\n--- Creating Interaction Matrix and Mappings from MPD Data ---")
        recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
        print(f"   Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
        print(f"   Number of users: {len(recommender.user_id_map)}")
        print(f"   Number of items: {len(recommender.item_id_map)}")

        # Calculate item popularity for negative sampling and diversity metric
        recommender._calculate_item_popularity()
        print(f"   Calculated popularity for {len(recommender.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
             print("\n--- Aligning Item Features with Mappings ---")
             recommender._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {recommender.num_features}")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}


        # --- Split Data (only if main data was loaded) ---
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # Still proceed to user study data handling if mappings were created from features
             if not recommender.item_id_map:
                 return # Exit if no mappings exist at all

    # --- Generate and/or Evaluate Model on User Study Data ---
    # This block runs regardless of whether main training data was loaded,
    # as long as item mappings and popularity exist (either from MPD or dummy from features)
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    user_study_df = pd.DataFrame() # Initialize empty DataFrame

    # Check if item mappings and popularity are available before proceeding
    if not recommender.id_item_map or not recommender.item_popularity:
         print("Item mappings or popularity not available. Cannot generate or evaluate user study data.")
    else:
         # Check if user study data already exists
         if os.path.exists(USER_STUDY_DATA_PATH):
              print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
              try:
                  user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
                  print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
                  # Filter user study data to only include items that the model knows about
                  original_items_in_model = set(recommender.item_id_map.keys())
                  user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                  print(f"   Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")

              except Exception as e:
                  print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
                  user_study_df = pd.DataFrame() # Reset if loading fails


         # If file doesn't exist or was empty/failed to load, and we are configured to generate
         if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
              print("Generating synthetic user study data...")
              user_study_df = generate_synthetic_user_study_data(
                  recommender,
                  num_users=NUM_SYNTHETIC_USERS,
                  interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
                  output_filepath=USER_STUDY_DATA_PATH
              )
              # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
              if not user_study_df.empty:
                   original_items_in_model = set(recommender.item_id_map.keys())
                   user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                   print(f"   Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    # Evaluate if user study data is available AND the model was trained
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")
         user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df, n=EVALUATION_N)['Hybrid NCF']
         print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
         # Pretty print the results
         for method, metrics in user_study_test_results['Hybrid NCF'].items():
              print(f"  Method: {method.replace('_', ' ').title()}")
              for metric, value in metrics.items():
                  print(f"    {metric}: {value:.4f}")
    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---


Loading MPD slices: 100%|██████████| 10/10 [00:02<00:00,  4.03it/s]



--- Loading Item Features ---
Loaded 114000 items with features from /kaggle/input/-spotify-tracks-dataset/dataset.csv.
After dropping duplicates by track_id: 89741 items.
Scaling numerical features: ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
Categorical feature 'mode' mapped to 2 integer codes.
Categorical feature 'key' mapped to 12 integer codes.
Categorical feature 'time_signature' mapped to 5 integer codes.
Loaded and processed 10 numerical and 3 categorical features (Total: 13). Items processed: 89741.
Item ID map not yet created. Will align features after interaction matrix creation.

--- Creating Interaction Matrix and Mappings from MPD Data ---
   Mapped 10000 users and 170691 items.
   Aligning features for 170691 mapped items...
   Successfully aligned features for 4140 out of 170691 mapped items.
   These items will have default (zero/placeholder) features.
   Interaction matrix cr

I0000 00:00:1747305822.583879      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


   Hybrid NCF Prediction Model built.
   Hybrid NCF model architecture built and compiled with BPR loss and regularization.
   Created separate model for extracting NCF item embeddings from MLP path.

Preparing training data for BPR...


ValueError: invalid literal for int() with base 10: 'spotify:track:23khhseCLQqVMCIT1WMAns'

In [None]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical feature columns
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 print("Error: 'track_id' column not found. Cannot create track_uri. Skipping feature processing.")
                 self.item_features_df = None
                 self.item_internal_numerical_features = None
                 self.item_internal_categorical_features = {}
                 self.num_numerical_features = 0
                 self.num_categorical_features = 0
                 self.num_features = 0
                 return

            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}

    def _align_item_features_with_mapping(self) -> None:
        """
        Aligns the loaded item features DataFrame with the internal item ID mapping.
        Creates internal feature arrays/dicts indexed by internal item ID.
        """
        if self.item_features_df is None or self.item_features_df.empty:
             print("   Feature DataFrame is empty or not loaded. Cannot align features.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        if not self.item_id_map:
             print("   Item ID map is empty. Cannot align features without a mapping.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        num_mapped_items = len(self.item_id_map)
        print(f"   Aligning features for {num_mapped_items} mapped items...")

        # Create a DataFrame indexed by original item URI for easy lookup
        features_indexed_by_uri = self.item_features_df.set_index('track_uri')

        # Initialize internal feature arrays/dicts with default values (e.g., 0 for numerical, 0 for categorical)
        # The size of these arrays should match the number of mapped items
        if self.num_numerical_features > 0:
             self.item_internal_numerical_features = np.zeros((num_mapped_items, self.num_numerical_features), dtype=np.float32)
        else:
             self.item_internal_numerical_features = None # Ensure it's None if no numerical features are used

        self.item_internal_categorical_features = {}
        for col in self.categorical_feature_columns:
             # Use 0 as a default/placeholder for items without features
             self.item_internal_categorical_features[col] = np.zeros((num_mapped_items,), dtype=np.int32)


        # Populate the internal feature arrays/dicts
        aligned_count = 0
        for original_uri, internal_id in self.item_id_map.items():
            if original_uri in features_indexed_by_uri.index:
                 aligned_count += 1
                 item_feature_row = features_indexed_by_uri.loc[original_uri]

                 # Populate numerical features
                 if self.num_numerical_features > 0 and self.item_internal_numerical_features is not None:
                      try:
                           numerical_values = item_feature_row[self.numerical_feature_columns].values.astype(np.float32)
                           self.item_internal_numerical_features[internal_id] = numerical_values
                      except Exception as e:
                           print(f"Warning: Error aligning numerical features for item {original_uri} (internal ID {internal_id}): {e}")


                 # Populate categorical features
                 if self.num_categorical_features > 0:
                      for col in self.categorical_feature_columns:
                           try:
                                # Ensure the value is an integer code after factorize
                                categorical_value = int(item_feature_row[col])
                                self.item_internal_categorical_features[col][internal_id] = categorical_value
                           except Exception as e:
                                print(f"Warning: Error aligning categorical feature '{col}' for item {original_uri} (internal ID {internal_id}): {e}")


        print(f"   Successfully aligned features for {aligned_count} out of {num_mapped_items} mapped items.")
        if aligned_count < num_mapped_items:
             print(f"   Warning: {num_mapped_items - aligned_count} mapped items do not have corresponding entries in the feature file.")
             print("   These items will have default (zero/placeholder) features.")


    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        # CORRECTED: Use .values() to get the integer IDs, not .keys()
        all_mapped_items_internal = np.array(list(self.item_id_map.values()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(item_popularity_scores) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping)

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             self._align_item_features_with_mapping()


        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=tf.data.AUTOTUNE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    all_item_internal_ids = np.array(list(recommender_system.id_item_map.keys()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # Updated path for MPD data based on user input
    MPD_DATA_DIR = '/kaggle/input/spotify-challenge/data'
    NUM_MPD_FILES = 10 # Number of slice files to load from MPD (each is 1000 playlists)
    # Updated path for Item Features data based on user input
    ITEM_FEATURES_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv'

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    HYBRID_NCF_EMBEDDING_SIZE = 64 # Embedding size for NCF model
    HYBRID_NCF_EPOCHS = 15 # Reduced epochs for faster testing
    HYBRID_NCF_BATCH_SIZE = 256 # Reduced batch size
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Reduced patience
    BPR_NEG_SAMPLES_RATIO = 4 # Reduced negative samples ratio for faster training


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load main interaction data from MPD. Skipping model training.")
        # If main data loading fails, we still need mappings and popularity for synthetic data generation/evaluation
        # Create dummy mappings and popularity if no data was loaded, but only if features were loaded
        if recommender.item_features_df is not None and not recommender.item_features_df.empty:
             print("   Creating dummy mappings and popularity based on item features for synthetic data.")
             unique_items_from_features = recommender.item_features_df['track_uri'].unique()
             recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
             recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
             recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
             print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
             # Align features now that item map exists
             recommender._align_item_features_with_mapping()
        else:
             print("   No item features loaded either. Cannot create any mappings or generate synthetic data.")
             # Clear any potentially half-created mappings/features
             recommender.user_id_map = {}
             recommender.item_id_map = {}
             recommender.id_user_map = {}
             recommender.id_item_map = {}
             recommender.interaction_matrix = None
             recommender.item_popularity = {}
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
             return # Exit if neither main data nor features are available


    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix and mappings if main data was loaded
    if not interactions_df.empty:
        print("\n--- Creating Interaction Matrix and Mappings from MPD Data ---")
        recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
        print(f"   Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
        print(f"   Number of users: {len(recommender.user_id_map)}")
        print(f"   Number of items: {len(recommender.item_id_map)}")

        # Calculate item popularity for negative sampling and diversity metric
        recommender._calculate_item_popularity()
        print(f"   Calculated popularity for {len(recommender.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
             print("\n--- Aligning Item Features with Mappings ---")
             recommender._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {recommender.num_features}")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}


        # --- Split Data (only if main data was loaded) ---
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # Still proceed to user study data handling if mappings were created from features
             if not recommender.item_id_map:
                 return # Exit if no mappings exist at all

    # --- Generate and/or Evaluate Model on User Study Data ---
    # This block runs regardless of whether main training data was loaded,
    # as long as item mappings and popularity exist (either from MPD or dummy from features)
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    user_study_df = pd.DataFrame() # Initialize empty DataFrame

    # Check if item mappings and popularity are available before proceeding
    if not recommender.id_item_map or not recommender.item_popularity:
         print("Item mappings or popularity not available. Cannot generate or evaluate user study data.")
    else:
         # Check if user study data already exists
         if os.path.exists(USER_STUDY_DATA_PATH):
              print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
              try:
                  user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
                  print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
                  # Filter user study data to only include items that the model knows about
                  original_items_in_model = set(recommender.item_id_map.keys())
                  user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                  print(f"   Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")

              except Exception as e:
                  print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
                  user_study_df = pd.DataFrame() # Reset if loading fails


         # If file doesn't exist or was empty/failed to load, and we are configured to generate
         if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
              print("Generating synthetic user study data...")
              user_study_df = generate_synthetic_user_study_data(
                  recommender,
                  num_users=NUM_SYNTHETIC_USERS,
                  interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
                  output_filepath=USER_STUDY_DATA_PATH
              )
              # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
              if not user_study_df.empty:
                   original_items_in_model = set(recommender.item_id_map.keys())
                   user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                   print(f"   Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    # Evaluate if user study data is available AND the model was trained
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")
         user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df, n=EVALUATION_N)['Hybrid NCF']
         print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
         # Pretty print the results
         for method, metrics in user_study_test_results['Hybrid NCF'].items():
              print(f"  Method: {method.replace('_', ' ').title()}")
              for metric, value in metrics.items():
                  print(f"    {metric}: {value:.4f}")
    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


2025-05-15 11:21:16.459320: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747308076.656074      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747308076.712404      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---


Loading MPD slices: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]



--- Loading Item Features ---
Loaded 114000 items with features from /kaggle/input/-spotify-tracks-dataset/dataset.csv.
After dropping duplicates by track_id: 89741 items.
Scaling numerical features: ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
Categorical feature 'mode' mapped to 2 integer codes.
Categorical feature 'key' mapped to 12 integer codes.
Categorical feature 'time_signature' mapped to 5 integer codes.
Loaded and processed 10 numerical and 3 categorical features (Total: 13). Items processed: 89741.
Item ID map not yet created. Will align features after interaction matrix creation.

--- Creating Interaction Matrix and Mappings from MPD Data ---
   Mapped 10000 users and 170691 items.
   Aligning features for 170691 mapped items...
   Successfully aligned features for 4140 out of 170691 mapped items.
   These items will have default (zero/placeholder) features.
   Interaction matrix cr

I0000 00:00:1747308100.501388      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


   Hybrid NCF Prediction Model built.
   Hybrid NCF model architecture built and compiled with BPR loss and regularization.
   Created separate model for extracting NCF item embeddings from MLP path.

Preparing training data for BPR...
   Generating BPR samples (4 negatives per positive) from 170691 candidate items (popularity-biased)...


                                                                             

In [1]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical feature columns
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 # Check for 'uri' column as an alternative if 'track_id' is missing
                 if 'uri' in features_df.columns:
                      print("Using 'uri' column for track identification.")
                      features_df = features_df.rename(columns={'uri': 'track_uri'})
                      features_df = features_df.drop_duplicates(subset=['track_uri']).reset_index(drop=True)
                      print(f"After dropping duplicates by track_uri: {len(features_df)} items.")
                 else:
                      print("Error: Neither 'track_id' nor 'uri' column found. Cannot create track_uri. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      self.num_numerical_features = 0
                      self.num_categorical_features = 0
                      self.num_features = 0
                      return


            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}

    def _align_item_features_with_mapping(self) -> None:
        """
        Aligns the loaded item features DataFrame with the internal item ID mapping.
        Creates internal feature arrays/dicts indexed by internal item ID.
        """
        if self.item_features_df is None or self.item_features_df.empty:
             print("   Feature DataFrame is empty or not loaded. Cannot align features.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        if not self.item_id_map:
             print("   Item ID map is empty. Cannot align features without a mapping.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        num_mapped_items = len(self.item_id_map)
        print(f"   Aligning features for {num_mapped_items} mapped items...")

        # Create a DataFrame indexed by original item URI for easy lookup
        features_indexed_by_uri = self.item_features_df.set_index('track_uri')

        # Initialize internal feature arrays/dicts with default values (e.g., 0 for numerical, 0 for categorical)
        # The size of these arrays should match the number of mapped items
        if self.num_numerical_features > 0:
             self.item_internal_numerical_features = np.zeros((num_mapped_items, self.num_numerical_features), dtype=np.float32)
        else:
             self.item_internal_numerical_features = None # Ensure it's None if no numerical features are used

        self.item_internal_categorical_features = {}
        for col in self.categorical_feature_columns:
             # Use 0 as a default/placeholder for items without features
             self.item_internal_categorical_features[col] = np.zeros((num_mapped_items,), dtype=np.int32)


        # Populate the internal feature arrays/dicts
        aligned_count = 0
        for original_uri, internal_id in self.item_id_map.items():
            if original_uri in features_indexed_by_uri.index:
                 aligned_count += 1
                 item_feature_row = features_indexed_by_uri.loc[original_uri]

                 # Populate numerical features
                 if self.num_numerical_features > 0 and self.item_internal_numerical_features is not None:
                      try:
                           numerical_values = item_feature_row[self.numerical_feature_columns].values.astype(np.float32)
                           self.item_internal_numerical_features[internal_id] = numerical_values
                      except Exception as e:
                           print(f"Warning: Error aligning numerical features for item {original_uri} (internal ID {internal_id}): {e}")


                 # Populate categorical features
                 if self.num_categorical_features > 0:
                      for col in self.categorical_feature_columns:
                           try:
                                # Ensure the value is an integer code after factorize
                                categorical_value = int(item_feature_row[col])
                                self.item_internal_categorical_features[col][internal_id] = categorical_value
                           except Exception as e:
                                print(f"Warning: Error aligning categorical feature '{col}' for item {original_uri} (internal ID {internal_id}): {e}")


        print(f"   Successfully aligned features for {aligned_count} out of {num_mapped_items} mapped items.")
        if aligned_count < num_mapped_items:
             print(f"   Warning: {num_mapped_items - aligned_count} mapped items do not have corresponding entries in the feature file.")
             print("   These items will have default (zero/placeholder) features.")


    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        # CORRECTED: Use .values() to get the integer IDs, not .keys()
        all_mapped_items_internal = np.array(list(self.item_id_map.values()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping)

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             self._align_item_features_with_mapping()


        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=tf.data.AUTOTUNE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    all_item_internal_ids = np.array(list(recommender_system.id_item_map.keys()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # Updated path for MPD data based on user input
    MPD_DATA_DIR = '/kaggle/input/spotify-challenge/data'
    # REDUCED number of MPD files to load to save memory
    NUM_MPD_FILES = 3 # Reduced from 10
    # Updated path for Item Features data based on user input
    ITEM_FEATURES_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv'

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    # REDUCED embedding size to save memory
    HYBRID_NCF_EMBEDDING_SIZE = 32 # Reduced from 64
    HYBRID_NCF_EPOCHS = 15 # Reduced epochs for faster testing
    # REDUCED batch size to save memory
    HYBRID_NCF_BATCH_SIZE = 128 # Reduced from 256
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Reduced patience
    # REDUCED negative samples ratio to save memory
    BPR_NEG_SAMPLES_RATIO = 2 # Reduced from 4


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load main interaction data from MPD. Skipping model training.")
        # If main data loading fails, we still need mappings and popularity for synthetic data generation/evaluation
        # Create dummy mappings and popularity if no data was loaded, but only if features were loaded
        if recommender.item_features_df is not None and not recommender.item_features_df.empty:
             print("   Creating dummy mappings and popularity based on item features for synthetic data.")
             unique_items_from_features = recommender.item_features_df['track_uri'].unique()
             recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
             recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
             recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
             print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
             # Align features now that item map exists
             recommender._align_item_features_with_mapping()
        else:
             print("   No item features loaded either. Cannot create any mappings or generate synthetic data.")
             # Clear any potentially half-created mappings/features
             recommender.user_id_map = {}
             recommender.item_id_map = {}
             recommender.id_user_map = {}
             recommender.id_item_map = {}
             recommender.interaction_matrix = None
             recommender.item_popularity = {}
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
             return # Exit if neither main data nor features are available


    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix and mappings if main data was loaded
    if not interactions_df.empty:
        print("\n--- Creating Interaction Matrix and Mappings from MPD Data ---")
        recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
        print(f"   Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
        print(f"   Number of users: {len(recommender.user_id_map)}")
        print(f"   Number of items: {len(recommender.item_id_map)}")

        # Calculate item popularity for negative sampling and diversity metric
        recommender._calculate_item_popularity()
        print(f"   Calculated popularity for {len(recommender.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
             print("\n--- Aligning Item Features with Mappings ---")
             recommender._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {recommender.num_features}")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}


        # --- Split Data (only if main data was loaded) ---
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # Still proceed to user study data handling if mappings were created from features
             if not recommender.item_id_map:
                 return # Exit if no mappings exist at all

    # --- Generate and/or Evaluate Model on User Study Data ---
    # This block runs regardless of whether main training data was loaded,
    # as long as item mappings and popularity exist (either from MPD or dummy from features)
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    user_study_df = pd.DataFrame() # Initialize empty DataFrame

    # Check if item mappings and popularity are available before proceeding
    if not recommender.id_item_map or not recommender.item_popularity:
         print("Item mappings or popularity not available. Cannot generate or evaluate user study data.")
    else:
         # Check if user study data already exists
         if os.path.exists(USER_STUDY_DATA_PATH):
              print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
              try:
                  user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
                  print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
                  # Filter user study data to only include items that the model knows about
                  original_items_in_model = set(recommender.item_id_map.keys())
                  user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                  print(f"   Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")

              except Exception as e:
                  print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
                  user_study_df = pd.DataFrame() # Reset if loading fails


         # If file doesn't exist or was empty/failed to load, and we are configured to generate
         if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
              print("Generating synthetic user study data...")
              user_study_df = generate_synthetic_user_study_data(
                  recommender,
                  num_users=NUM_SYNTHETIC_USERS,
                  interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
                  output_filepath=USER_STUDY_DATA_PATH
              )
              # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
              if not user_study_df.empty:
                   original_items_in_model = set(recommender.item_id_map.keys())
                   user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                   print(f"   Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    # Evaluate if user study data is available AND the model was trained
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")
         user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df, n=EVALUATION_N)['Hybrid NCF']
         print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
         # Pretty print the results
         for method, metrics in user_study_test_results['Hybrid NCF'].items():
              print(f"  Method: {method.replace('_', ' ').title()}")
              for metric, value in metrics.items():
                  print(f"    {metric}: {value:.4f}")
    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


2025-05-15 11:38:39.668818: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747309119.864052     107 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747309119.923337     107 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---


Loading MPD slices: 100%|██████████| 3/3 [00:01<00:00,  2.39it/s]



--- Loading Item Features ---
Loaded 114000 items with features from /kaggle/input/-spotify-tracks-dataset/dataset.csv.
After dropping duplicates by track_id: 89741 items.
Scaling numerical features: ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
Categorical feature 'mode' mapped to 2 integer codes.
Categorical feature 'key' mapped to 12 integer codes.
Categorical feature 'time_signature' mapped to 5 integer codes.
Loaded and processed 10 numerical and 3 categorical features (Total: 13). Items processed: 89741.
Item ID map not yet created. Will align features after interaction matrix creation.

--- Creating Interaction Matrix and Mappings from MPD Data ---
   Mapped 3000 users and 74917 items.
   Aligning features for 74917 mapped items...
   Successfully aligned features for 2795 out of 74917 mapped items.
   These items will have default (zero/placeholder) features.
   Interaction matrix create

I0000 00:00:1747309131.582322     107 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


   Hybrid NCF Prediction Model built.
   Hybrid NCF model architecture built and compiled with BPR loss and regularization.
   Created separate model for extracting NCF item embeddings from MLP path.

Preparing training data for BPR...


NameError: name 'all_item_internal_ids' is not defined

In [2]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical feature columns
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 # Check for 'uri' column as an alternative if 'track_id' is missing
                 if 'uri' in features_df.columns:
                      print("Using 'uri' column for track identification.")
                      features_df = features_df.rename(columns={'uri': 'track_uri'})
                      features_df = features_df.drop_duplicates(subset=['track_uri']).reset_index(drop=True)
                      print(f"After dropping duplicates by track_uri: {len(features_df)} items.")
                 else:
                      print("Error: Neither 'track_id' nor 'uri' column found. Cannot create track_uri. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      self.num_numerical_features = 0
                      self.num_categorical_features = 0
                      self.num_features = 0
                      return


            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}

    def _align_item_features_with_mapping(self) -> None:
        """
        Aligns the loaded item features DataFrame with the internal item ID mapping.
        Creates internal feature arrays/dicts indexed by internal item ID.
        """
        if self.item_features_df is None or self.item_features_df.empty:
             print("   Feature DataFrame is empty or not loaded. Cannot align features.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        if not self.item_id_map:
             print("   Item ID map is empty. Cannot align features without a mapping.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        num_mapped_items = len(self.item_id_map)
        print(f"   Aligning features for {num_mapped_items} mapped items...")

        # Create a DataFrame indexed by original item URI for easy lookup
        features_indexed_by_uri = self.item_features_df.set_index('track_uri')

        # Initialize internal feature arrays/dicts with default values (e.g., 0 for numerical, 0 for categorical)
        # The size of these arrays should match the number of mapped items
        if self.num_numerical_features > 0:
             self.item_internal_numerical_features = np.zeros((num_mapped_items, self.num_numerical_features), dtype=np.float32)
        else:
             self.item_internal_numerical_features = None # Ensure it's None if no numerical features are used

        self.item_internal_categorical_features = {}
        for col in self.categorical_feature_columns:
             # Use 0 as a default/placeholder for items without features
             self.item_internal_categorical_features[col] = np.zeros((num_mapped_items,), dtype=np.int32)


        # Populate the internal feature arrays/dicts
        aligned_count = 0
        for original_uri, internal_id in self.item_id_map.items():
            if original_uri in features_indexed_by_uri.index:
                 aligned_count += 1
                 item_feature_row = features_indexed_by_uri.loc[original_uri]

                 # Populate numerical features
                 if self.num_numerical_features > 0 and self.item_internal_numerical_features is not None:
                      try:
                           numerical_values = item_feature_row[self.numerical_feature_columns].values.astype(np.float32)
                           self.item_internal_numerical_features[internal_id] = numerical_values
                      except Exception as e:
                           print(f"Warning: Error aligning numerical features for item {original_uri} (internal ID {internal_id}): {e}")


                 # Populate categorical features
                 if self.num_categorical_features > 0:
                      for col in self.categorical_feature_columns:
                           try:
                                # Ensure the value is an integer code after factorize
                                categorical_value = int(item_feature_row[col])
                                self.item_internal_categorical_features[col][internal_id] = categorical_value
                           except Exception as e:
                                print(f"Warning: Error aligning categorical feature '{col}' for item {original_uri} (internal ID {internal_id}): {e}")


        print(f"   Successfully aligned features for {aligned_count} out of {num_mapped_items} mapped items.")
        if aligned_count < num_mapped_items:
             print(f"   Warning: {num_mapped_items - aligned_count} mapped items do not have corresponding entries in the feature file.")
             print("   These items will have default (zero/placeholder) features.")


    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        # CORRECTED: Use .values() to get the integer IDs, not .keys()
        all_mapped_items_internal = np.array(list(self.item_id_map.values()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        # CORRECTED: Use all_mapped_items_internal instead of the undefined all_item_internal_ids
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_mapped_items_internal) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping)

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             self._align_item_features_with_mapping()


        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=tf.data.AUTOTUNE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    # CORRECTED: Use .values() to get the integer IDs, not .keys()
    all_item_internal_ids = np.array(list(recommender_system.item_id_map.values()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # Updated path for MPD data based on user input
    MPD_DATA_DIR = '/kaggle/input/spotify-challenge/data'
    # REDUCED number of MPD files to load to save memory
    NUM_MPD_FILES = 3 # Reduced from 10
    # Updated path for Item Features data based on user input
    ITEM_FEATURES_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv'

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    # REDUCED embedding size to save memory
    HYBRID_NCF_EMBEDDING_SIZE = 32 # Reduced from 64
    HYBRID_NCF_EPOCHS = 15 # Reduced epochs for faster testing
    # REDUCED batch size to save memory
    HYBRID_NCF_BATCH_SIZE = 128 # Reduced from 256
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Reduced patience
    # REDUCED negative samples ratio to save memory
    BPR_NEG_SAMPLES_RATIO = 2 # Reduced from 4


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load main interaction data from MPD. Skipping model training.")
        # If main data loading fails, we still need mappings and popularity for synthetic data generation/evaluation
        # Create dummy mappings and popularity if no data was loaded, but only if features were loaded
        if recommender.item_features_df is not None and not recommender.item_features_df.empty:
             print("   Creating dummy mappings and popularity based on item features for synthetic data.")
             unique_items_from_features = recommender.item_features_df['track_uri'].unique()
             recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
             recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
             recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
             print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
             # Align features now that item map exists
             recommender._align_item_features_with_mapping()
        else:
             print("   No item features loaded either. Cannot create any mappings or generate synthetic data.")
             # Clear any potentially half-created mappings/features
             recommender.user_id_map = {}
             recommender.item_id_map = {}
             recommender.id_user_map = {}
             recommender.id_item_map = {}
             recommender.interaction_matrix = None
             recommender.item_popularity = {}
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
             return # Exit if neither main data nor features are available


    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix and mappings if main data was loaded
    if not interactions_df.empty:
        print("\n--- Creating Interaction Matrix and Mappings from MPD Data ---")
        recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
        print(f"   Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
        print(f"   Number of users: {len(recommender.user_id_map)}")
        print(f"   Number of items: {len(recommender.item_id_map)}")

        # Calculate item popularity for negative sampling and diversity metric
        recommender._calculate_item_popularity()
        print(f"   Calculated popularity for {len(recommender.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
             print("\n--- Aligning Item Features with Mappings ---")
             recommender._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {recommender.num_features}")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}


        # --- Split Data (only if main data was loaded) ---
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # Still proceed to user study data handling if mappings were created from features
             if not recommender.item_id_map:
                 return # Exit if no mappings exist at all

    # --- Generate and/or Evaluate Model on User Study Data ---
    # This block runs regardless of whether main training data was loaded,
    # as long as item mappings and popularity exist (either from MPD or dummy from features)
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    user_study_df = pd.DataFrame() # Initialize empty DataFrame

    # Check if item mappings and popularity are available before proceeding
    if not recommender.id_item_map or not recommender.item_popularity:
         print("Item mappings or popularity not available. Cannot generate or evaluate user study data.")
    else:
         # Check if user study data already exists
         if os.path.exists(USER_STUDY_DATA_PATH):
              print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
              try:
                  user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
                  print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
                  # Filter user study data to only include items that the model knows about
                  original_items_in_model = set(recommender.item_id_map.keys())
                  user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                  print(f"   Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")

              except Exception as e:
                  print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
                  user_study_df = pd.DataFrame() # Reset if loading fails


         # If file doesn't exist or was empty/failed to load, and we are configured to generate
         if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
              print("Generating synthetic user study data...")
              user_study_df = generate_synthetic_user_study_data(
                  recommender,
                  num_users=NUM_SYNTHETIC_USERS,
                  interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
                  output_filepath=USER_STUDY_DATA_PATH
              )
              # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
              if not user_study_df.empty:
                   original_items_in_model = set(recommender.item_id_map.keys())
                   user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                   print(f"   Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    # Evaluate if user study data is available AND the model was trained
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")
         user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df, n=EVALUATION_N)['Hybrid NCF']
         print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
         # Pretty print the results
         for method, metrics in user_study_test_results['Hybrid NCF'].items():
              print(f"  Method: {method.replace('_', ' ').title()}")
              for metric, value in metrics.items():
                  print(f"    {metric}: {value:.4f}")
    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---


Loading MPD slices: 100%|██████████| 3/3 [00:00<00:00,  3.83it/s]



--- Loading Item Features ---
Loaded 114000 items with features from /kaggle/input/-spotify-tracks-dataset/dataset.csv.
After dropping duplicates by track_id: 89741 items.
Scaling numerical features: ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
Categorical feature 'mode' mapped to 2 integer codes.
Categorical feature 'key' mapped to 12 integer codes.
Categorical feature 'time_signature' mapped to 5 integer codes.
Loaded and processed 10 numerical and 3 categorical features (Total: 13). Items processed: 89741.
Item ID map not yet created. Will align features after interaction matrix creation.

--- Creating Interaction Matrix and Mappings from MPD Data ---
   Mapped 3000 users and 74917 items.
   Aligning features for 74917 mapped items...
   Successfully aligned features for 2795 out of 74917 mapped items.
   These items will have default (zero/placeholder) features.
   Interaction matrix create

                                                                            


Preparing validation data for BPR...
   Generating BPR samples (2 negatives per positive) from 74917 candidate items (popularity-biased)...


                                                                            

   Prepared 27897118 BPR training samples.
   Prepared 1860842 BPR validation samples.


InvalidArgumentError: {{function_node __wrapped__ShuffleDatasetV3_device_/job:localhost/replica:0/task:0/device:CPU:0}} buffer_size must be greater than zero or UNKNOWN_CARDINALITY [Op:ShuffleDatasetV3] name: 

In [None]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical column names from item features
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 # Check for 'uri' column as an alternative if 'track_id' is missing
                 if 'uri' in features_df.columns:
                      print("Using 'uri' column for track identification.")
                      features_df = features_df.rename(columns={'uri': 'track_uri'})
                      features_df = features_df.drop_duplicates(subset=['track_uri']).reset_index(drop=True)
                      print(f"After dropping duplicates by track_uri: {len(features_df)} items.")
                 else:
                      print("Error: Neither 'track_id' nor 'uri' column found. Cannot create track_uri. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      self.num_numerical_features = 0
                      self.num_categorical_features = 0
                      self.num_features = 0
                      return


            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}

    def _align_item_features_with_mapping(self) -> None:
        """
        Aligns the loaded item features DataFrame with the internal item ID mapping.
        Creates internal feature arrays/dicts indexed by internal item ID.
        """
        if self.item_features_df is None or self.item_features_df.empty:
             print("   Feature DataFrame is empty or not loaded. Cannot align features.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        if not self.item_id_map:
             print("   Item ID map is empty. Cannot align features without a mapping.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        num_mapped_items = len(self.item_id_map)
        print(f"   Aligning features for {num_mapped_items} mapped items...")

        # Create a DataFrame indexed by original item URI for easy lookup
        features_indexed_by_uri = self.item_features_df.set_index('track_uri')

        # Initialize internal feature arrays/dicts with default values (e.g., 0 for numerical, 0 for categorical)
        # The size of these arrays should match the number of mapped items
        if self.num_numerical_features > 0:
             self.item_internal_numerical_features = np.zeros((num_mapped_items, self.num_numerical_features), dtype=np.float32)
        else:
             self.item_internal_numerical_features = None # Ensure it's None if no numerical features are used

        self.item_internal_categorical_features = {}
        for col in self.categorical_feature_columns:
             # Use 0 as a default/placeholder for items without features
             self.item_internal_categorical_features[col] = np.zeros((num_mapped_items,), dtype=np.int32)


        # Populate the internal feature arrays/dicts
        aligned_count = 0
        for original_uri, internal_id in self.item_id_map.items():
            if original_uri in features_indexed_by_uri.index:
                 aligned_count += 1
                 item_feature_row = features_indexed_by_uri.loc[original_uri]

                 # Populate numerical features
                 if self.num_numerical_features > 0 and self.item_internal_numerical_features is not None:
                      try:
                           numerical_values = item_feature_row[self.numerical_feature_columns].values.astype(np.float32)
                           self.item_internal_numerical_features[internal_id] = numerical_values
                      except Exception as e:
                           print(f"Warning: Error aligning numerical features for item {original_uri} (internal ID {internal_id}): {e}")


                 # Populate categorical features
                 if self.num_categorical_features > 0:
                      for col in self.categorical_feature_columns:
                           try:
                                # Ensure the value is an integer code after factorize
                                categorical_value = int(item_feature_row[col])
                                self.item_internal_categorical_features[col][internal_id] = categorical_value
                           except Exception as e:
                                print(f"Warning: Error aligning categorical feature '{col}' for item {original_uri} (internal ID {internal_id}): {e}")


        print(f"   Successfully aligned features for {aligned_count} out of {num_mapped_items} mapped items.")
        if aligned_count < num_mapped_items:
             print(f"   Warning: {num_mapped_items - aligned_count} mapped items do not have corresponding entries in the feature file.")
             print("   These items will have default (zero/placeholder) features.")


    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        # CORRECTED: Use .values() to get the integer IDs, not .keys()
        all_mapped_items_internal = np.array(list(self.item_id_map.values()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        # CORRECTED: Use all_mapped_items_internal instead of the undefined all_item_internal_ids
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_mapped_items_internal) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping)

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             self._align_item_features_with_mapping()


        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )

        # Use a fixed buffer size for shuffling to avoid InvalidArgumentError
        SHUFFLE_BUFFER_SIZE = 50000 # Adjusted buffer size

        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=SHUFFLE_BUFFER_SIZE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    # CORRECTED: Use .values() to get the integer IDs, not .keys()
    all_item_internal_ids = np.array(list(recommender_system.item_id_map.values()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # Updated path for MPD data based on user input
    MPD_DATA_DIR = '/kaggle/input/spotify-challenge/data'
    # REDUCED number of MPD files to load to save memory
    NUM_MPD_FILES = 3 # Reduced from 10
    # Updated path for Item Features data based on user input
    ITEM_FEATURES_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv'

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    # REDUCED embedding size to save memory
    HYBRID_NCF_EMBEDDING_SIZE = 32 # Reduced from 64
    HYBRID_NCF_EPOCHS = 15 # Reduced epochs for faster testing
    # REDUCED batch size to save memory
    HYBRID_NCF_BATCH_SIZE = 128 # Reduced from 256
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Reduced patience
    # REDUCED negative samples ratio to save memory
    BPR_NEG_SAMPLES_RATIO = 2 # Reduced from 4


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load main interaction data from MPD. Skipping model training.")
        # If main data loading fails, we still need mappings and popularity for synthetic data generation/evaluation
        # Create dummy mappings and popularity if no data was loaded, but only if features were loaded
        if recommender.item_features_df is not None and not recommender.item_features_df.empty:
             print("   Creating dummy mappings and popularity based on item features for synthetic data.")
             unique_items_from_features = recommender.item_features_df['track_uri'].unique()
             recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
             recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
             recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
             print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
             # Align features now that item map exists
             recommender._align_item_features_with_mapping()
        else:
             print("   No item features loaded either. Cannot create any mappings or generate synthetic data.")
             # Clear any potentially half-created mappings/features
             recommender.user_id_map = {}
             recommender.item_id_map = {}
             recommender.id_user_map = {}
             recommender.id_item_map = {}
             recommender.interaction_matrix = None
             recommender.item_popularity = {}
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
             return # Exit if neither main data nor features are available


    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix and mappings if main data was loaded
    if not interactions_df.empty:
        print("\n--- Creating Interaction Matrix and Mappings from MPD Data ---")
        recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
        print(f"   Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
        print(f"   Number of users: {len(recommender.user_id_map)}")
        print(f"   Number of items: {len(recommender.item_id_map)}")

        # Calculate item popularity for negative sampling and diversity metric
        recommender._calculate_item_popularity()
        print(f"   Calculated popularity for {len(recommender.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
             print("\n--- Aligning Item Features with Mappings ---")
             recommender._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {recommender.num_features}")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}


        # --- Split Data (only if main data was loaded) ---
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # Still proceed to user study data handling if mappings were created from features
             if not recommender.item_id_map:
                 return # Exit if no mappings exist at all

    # --- Generate and/or Evaluate Model on User Study Data ---
    # This block runs regardless of whether main training data was loaded,
    # as long as item mappings and popularity exist (either from MPD or dummy from features)
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    user_study_df = pd.DataFrame() # Initialize empty DataFrame

    # Check if item mappings and popularity are available before proceeding
    if not recommender.id_item_map or not recommender.item_popularity:
         print("Item mappings or popularity not available. Cannot generate or evaluate user study data.")
    else:
         # Check if user study data already exists
         if os.path.exists(USER_STUDY_DATA_PATH):
              print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
              try:
                  user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
                  print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
                  # Filter user study data to only include items that the model knows about
                  original_items_in_model = set(recommender.item_id_map.keys())
                  user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                  print(f"   Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")

              except Exception as e:
                  print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
                  user_study_df = pd.DataFrame() # Reset if loading fails


         # If file doesn't exist or was empty/failed to load, and we are configured to generate
         if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
              print("Generating synthetic user study data...")
              user_study_df = generate_synthetic_user_study_data(
                  recommender,
                  num_users=NUM_SYNTHETIC_USERS,
                  interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
                  output_filepath=USER_STUDY_DATA_PATH
              )
              # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
              if not user_study_df.empty:
                   original_items_in_model = set(recommender.item_id_map.keys())
                   user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                   print(f"   Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    # Evaluate if user study data is available AND the model was trained
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")
         user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df, n=EVALUATION_N)['Hybrid NCF']
         print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
         # Pretty print the results
         for method, metrics in user_study_test_results['Hybrid NCF'].items():
              print(f"  Method: {method.replace('_', ' ').title()}")
              for metric, value in metrics.items():
                  print(f"    {metric}: {value:.4f}")
    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


2025-05-15 12:54:29.776317: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747313669.960499      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747313670.017225      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---


Loading MPD slices: 100%|██████████| 3/3 [00:01<00:00,  2.30it/s]



--- Loading Item Features ---
Loaded 114000 items with features from /kaggle/input/-spotify-tracks-dataset/dataset.csv.
After dropping duplicates by track_id: 89741 items.
Scaling numerical features: ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
Categorical feature 'mode' mapped to 2 integer codes.
Categorical feature 'key' mapped to 12 integer codes.
Categorical feature 'time_signature' mapped to 5 integer codes.
Loaded and processed 10 numerical and 3 categorical features (Total: 13). Items processed: 89741.
Item ID map not yet created. Will align features after interaction matrix creation.

--- Creating Interaction Matrix and Mappings from MPD Data ---
   Mapped 3000 users and 74917 items.
   Aligning features for 74917 mapped items...
   Successfully aligned features for 2795 out of 74917 mapped items.
   These items will have default (zero/placeholder) features.
   Interaction matrix create

I0000 00:00:1747313687.281978      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


   Hybrid NCF Prediction Model built.
   Hybrid NCF model architecture built and compiled with BPR loss and regularization.
   Created separate model for extracting NCF item embeddings from MLP path.

Preparing training data for BPR...
   Generating BPR samples (2 negatives per positive) from 74917 candidate items (popularity-biased)...


                                                                            


Preparing validation data for BPR...
   Generating BPR samples (2 negatives per positive) from 74917 candidate items (popularity-biased)...


                                                                            

   Prepared 27897118 BPR training samples.
   Prepared 1860842 BPR validation samples.
   Configured Early Stopping with patience=3.
   Fitting Hybrid NCF model with BPR loss for up to 15 epochs with Early Stopping (Batch Size: 128)...
Epoch 1/15


I0000 00:00:1747313729.560675      88 service.cc:148] XLA service 0x7c121c011fc0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1747313729.561431      88 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1747313730.161724      88 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m    45/217947[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12:36[0m 3ms/step - loss: 1.2107

I0000 00:00:1747313733.838287      88 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m217947/217947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m872s[0m 4ms/step - loss: 0.0828 - val_loss: 1.1555
Epoch 2/15
[1m 62719/217947[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m9:48[0m 4ms/step - loss: 0.0783

In [1]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical column names from item features
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 # Check for 'uri' column as an alternative if 'track_id' is missing
                 if 'uri' in features_df.columns:
                      print("Using 'uri' column for track identification.")
                      features_df = features_df.rename(columns={'uri': 'track_uri'})
                      features_df = features_df.drop_duplicates(subset=['track_uri']).reset_index(drop=True)
                      print(f"After dropping duplicates by track_uri: {len(features_df)} items.")
                 else:
                      print("Error: Neither 'track_id' nor 'uri' column found. Cannot create track_uri. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      self.num_numerical_features = 0
                      self.num_categorical_features = 0
                      self.num_features = 0
                      return


            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}

    def _align_item_features_with_mapping(self) -> None:
        """
        Aligns the loaded item features DataFrame with the internal item ID mapping.
        Creates internal feature arrays/dicts indexed by internal item ID.
        """
        if self.item_features_df is None or self.item_features_df.empty:
             print("   Feature DataFrame is empty or not loaded. Cannot align features.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        if not self.item_id_map:
             print("   Item ID map is empty. Cannot align features without a mapping.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        num_mapped_items = len(self.item_id_map)
        print(f"   Aligning features for {num_mapped_items} mapped items...")

        # Create a DataFrame indexed by original item URI for easy lookup
        features_indexed_by_uri = self.item_features_df.set_index('track_uri')

        # Initialize internal feature arrays/dicts with default values (e.g., 0 for numerical, 0 for categorical)
        # The size of these arrays should match the number of mapped items
        if self.num_numerical_features > 0:
             self.item_internal_numerical_features = np.zeros((num_mapped_items, self.num_numerical_features), dtype=np.float32)
        else:
             self.item_internal_numerical_features = None # Ensure it's None if no numerical features are used

        self.item_internal_categorical_features = {}
        for col in self.categorical_feature_columns:
             # Use 0 as a default/placeholder for items without features
             self.item_internal_categorical_features[col] = np.zeros((num_mapped_items,), dtype=np.int32)


        # Populate the internal feature arrays/dicts
        aligned_count = 0
        for original_uri, internal_id in self.item_id_map.items():
            if original_uri in features_indexed_by_uri.index:
                 aligned_count += 1
                 item_feature_row = features_indexed_by_uri.loc[original_uri]

                 # Populate numerical features
                 if self.num_numerical_features > 0 and self.item_internal_numerical_features is not None:
                      try:
                           numerical_values = item_feature_row[self.numerical_feature_columns].values.astype(np.float32)
                           self.item_internal_numerical_features[internal_id] = numerical_values
                      except Exception as e:
                           print(f"Warning: Error aligning numerical features for item {original_uri} (internal ID {internal_id}): {e}")


                 # Populate categorical features
                 if self.num_categorical_features > 0:
                      for col in self.categorical_feature_columns:
                           try:
                                # Ensure the value is an integer code after factorize
                                categorical_value = int(item_feature_row[col])
                                self.item_internal_categorical_features[col][internal_id] = categorical_value
                           except Exception as e:
                                print(f"Warning: Error aligning categorical feature '{col}' for item {original_uri} (internal ID {internal_id}): {e}")


        print(f"   Successfully aligned features for {aligned_count} out of {num_mapped_items} mapped items.")
        if aligned_count < num_mapped_items:
             print(f"   Warning: {num_mapped_items - aligned_count} mapped items do not have corresponding entries in the feature file.")
             print("   These items will have default (zero/placeholder) features.")


    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        # CORRECTED: Use .values() to get the integer IDs, not .keys()
        all_mapped_items_internal = np.array(list(self.item_id_map.values()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        # CORRECTED: Use all_mapped_items_internal instead of the undefined all_item_internal_ids
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_mapped_items_internal) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping)

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             self._align_item_features_with_mapping()


        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )

        # Use a fixed buffer size for shuffling to avoid InvalidArgumentError
        # Reduced buffer size further to conserve memory
        SHUFFLE_BUFFER_SIZE = 20000 # Adjusted buffer size


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=SHUFFLE_BUFFER_SIZE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    # CORRECTED: Use .values() to get the integer IDs, not .keys()
    all_item_internal_ids = np.array(list(recommender_system.item_id_map.values()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # Updated path for MPD data based on user input
    MPD_DATA_DIR = '/kaggle/input/spotify-challenge/data'
    # REDUCED number of MPD files to load to save memory
    NUM_MPD_FILES = 3 # Reduced from 10
    # Updated path for Item Features data based on user input
    ITEM_FEATURES_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv'

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    # REDUCED embedding size to save memory
    HYBRID_NCF_EMBEDDING_SIZE = 16 # Further reduced from 32
    HYBRID_NCF_EPOCHS = 10 # Reduced epochs
    # REDUCED batch size to save memory
    HYBRID_NCF_BATCH_SIZE = 64 # Further reduced from 128
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Kept patience the same
    # REDUCED negative samples ratio to save memory
    BPR_NEG_SAMPLES_RATIO = 1 # Further reduced from 2


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load main interaction data from MPD. Skipping model training.")
        # If main data loading fails, we still need mappings and popularity for synthetic data generation/evaluation
        # Create dummy mappings and popularity if no data was loaded, but only if features were loaded
        if recommender.item_features_df is not None and not recommender.item_features_df.empty:
             print("   Creating dummy mappings and popularity based on item features for synthetic data.")
             unique_items_from_features = recommender.item_features_df['track_uri'].unique()
             recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
             recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
             recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
             print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
             # Align features now that item map exists
             recommender._align_item_features_with_mapping()
        else:
             print("   No item features loaded either. Cannot create any mappings or generate synthetic data.")
             # Clear any potentially half-created mappings/features
             recommender.user_id_map = {}
             recommender.item_id_map = {}
             recommender.id_user_map = {}
             recommender.id_item_map = {}
             recommender.interaction_matrix = None
             recommender.item_popularity = {}
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
             return # Exit if neither main data nor features are available


    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix and mappings if main data was loaded
    if not interactions_df.empty:
        print("\n--- Creating Interaction Matrix and Mappings from MPD Data ---")
        recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
        print(f"   Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
        print(f"   Number of users: {len(recommender.user_id_map)}")
        print(f"   Number of items: {len(recommender.item_id_map)}")

        # Calculate item popularity for negative sampling and diversity metric
        recommender._calculate_item_popularity()
        print(f"   Calculated popularity for {len(recommender.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
             print("\n--- Aligning Item Features with Mappings ---")
             recommender._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {recommender.num_features}")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}


        # --- Split Data (only if main data was loaded) ---
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # Still proceed to user study data handling if mappings were created from features
             if not recommender.item_id_map:
                 return # Exit if no mappings exist at all

    # --- Generate and/or Evaluate Model on User Study Data ---
    # This block runs regardless of whether main training data was loaded,
    # as long as item mappings and popularity exist (either from MPD or dummy from features)
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    user_study_df = pd.DataFrame() # Initialize empty DataFrame

    # Check if item mappings and popularity are available before proceeding
    if not recommender.id_item_map or not recommender.item_popularity:
         print("Item mappings or popularity not available. Cannot generate or evaluate user study data.")
    else:
         # Check if user study data already exists
         if os.path.exists(USER_STUDY_DATA_PATH):
              print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
              try:
                  user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
                  print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
                  # Filter user study data to only include items that the model knows about
                  original_items_in_model = set(recommender.item_id_map.keys())
                  user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                  print(f"   Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")

              except Exception as e:
                  print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
                  user_study_df = pd.DataFrame() # Reset if loading fails


         # If file doesn't exist or was empty/failed to load, and we are configured to generate
         if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
              print("Generating synthetic user study data...")
              user_study_df = generate_synthetic_user_study_data(
                  recommender,
                  num_users=NUM_SYNTHETIC_USERS,
                  interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
                  output_filepath=USER_STUDY_DATA_PATH
              )
              # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
              if not user_study_df.empty:
                   original_items_in_model = set(recommender.item_id_map.keys())
                   user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                   print(f"   Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    # Evaluate if user study data is available AND the model was trained
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")
         user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df, n=EVALUATION_N)['Hybrid NCF']
         print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
         # Pretty print the results
         for method, metrics in user_study_test_results['Hybrid NCF'].items():
              print(f"  Method: {method.replace('_', ' ').title()}")
              for metric, value in metrics.items():
                  print(f"    {metric}: {value:.4f}")
    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


2025-05-15 13:21:51.198052: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747315311.405222      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747315311.470657      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---


Loading MPD slices: 100%|██████████| 3/3 [00:02<00:00,  1.50it/s]



--- Loading Item Features ---
Loaded 114000 items with features from /kaggle/input/-spotify-tracks-dataset/dataset.csv.
After dropping duplicates by track_id: 89741 items.
Scaling numerical features: ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
Categorical feature 'mode' mapped to 2 integer codes.
Categorical feature 'key' mapped to 12 integer codes.
Categorical feature 'time_signature' mapped to 5 integer codes.
Loaded and processed 10 numerical and 3 categorical features (Total: 13). Items processed: 89741.
Item ID map not yet created. Will align features after interaction matrix creation.

--- Creating Interaction Matrix and Mappings from MPD Data ---
   Mapped 3000 users and 74917 items.
   Aligning features for 74917 mapped items...
   Successfully aligned features for 2795 out of 74917 mapped items.
   These items will have default (zero/placeholder) features.
   Interaction matrix create

I0000 00:00:1747315331.024784      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


   Hybrid NCF Prediction Model built.
   Hybrid NCF model architecture built and compiled with BPR loss and regularization.
   Created separate model for extracting NCF item embeddings from MLP path.

Preparing training data for BPR...
   Generating BPR samples (1 negatives per positive) from 74917 candidate items (popularity-biased)...


                                                                            


Preparing validation data for BPR...
   Generating BPR samples (1 negatives per positive) from 74917 candidate items (popularity-biased)...


                                                                            

   Prepared 13948559 BPR training samples.
   Prepared 930421 BPR validation samples.
   Configured Early Stopping with patience=3.
   Fitting Hybrid NCF model with BPR loss for up to 10 epochs with Early Stopping (Batch Size: 64)...
Epoch 1/10


I0000 00:00:1747315364.795681      87 service.cc:148] XLA service 0x7884e0010fa0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1747315364.796508      87 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1747315365.422792      87 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m    44/217947[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m13:15[0m 4ms/step - loss: 1.1112

I0000 00:00:1747315369.955585      87 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m217947/217947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m818s[0m 4ms/step - loss: 0.0726 - val_loss: 1.0785
Epoch 2/10
[1m217947/217947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m806s[0m 4ms/step - loss: 0.0660 - val_loss: 1.0996
Epoch 3/10
[1m217947/217947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m844s[0m 4ms/step - loss: 0.0661 - val_loss: 1.0939
Epoch 4/10
[1m217947/217947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m813s[0m 4ms/step - loss: 0.0662 - val_loss: 1.0875

Hybrid NCF model (BPR) training complete (possibly stopped early).

--- Handling User Study Data (/kaggle/working/user_study_interactions.csv) ---
Generating synthetic user study data...

--- Generating Synthetic User Study Data for 25 users ---
   Sampling items from a pool of 74917 items based on popularity.


                                                            

   Generated 795 synthetic interactions and saved to /kaggle/working/user_study_interactions.csv
   Filtered generated data to 795 interactions (25 users) with items known by the model.

--- Evaluating Model on User Study Data ---

--- Starting Evaluation (n=10) ---
No valid test interactions found for users/items seen during training mappings. Cannot evaluate.

--- User Study Evaluation Results (Hybrid NCF) ---

--- Plausible User Study Data Collection Methodology ---
To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.
Methodology Details:
1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.
2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).
3.  **Passive Listening Logging:** The a



In [1]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical column names from item features
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 # Check for 'uri' column as an alternative if 'track_id' is missing
                 if 'uri' in features_df.columns:
                      print("Using 'uri' column for track identification.")
                      features_df = features_df.rename(columns={'uri': 'track_uri'})
                      features_df = features_df.drop_duplicates(subset=['track_uri']).reset_index(drop=True)
                      print(f"After dropping duplicates by track_uri: {len(features_df)} items.")
                 else:
                      print("Error: Neither 'track_id' nor 'uri' column found. Cannot create track_uri. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      self.num_numerical_features = 0
                      self.num_categorical_features = 0
                      self.num_features = 0
                      return


            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}

    def _align_item_features_with_mapping(self) -> None:
        """
        Aligns the loaded item features DataFrame with the internal item ID mapping.
        Creates internal feature arrays/dicts indexed by internal item ID.
        """
        if self.item_features_df is None or self.item_features_df.empty:
             print("   Feature DataFrame is empty or not loaded. Cannot align features.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        if not self.item_id_map:
             print("   Item ID map is empty. Cannot align features without a mapping.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        num_mapped_items = len(self.item_id_map)
        print(f"   Aligning features for {num_mapped_items} mapped items...")

        # Create a DataFrame indexed by original item URI for easy lookup
        features_indexed_by_uri = self.item_features_df.set_index('track_uri')

        # Initialize internal feature arrays/dicts with default values (e.g., 0 for numerical, 0 for categorical)
        # The size of these arrays should match the number of mapped items
        if self.num_numerical_features > 0:
             self.item_internal_numerical_features = np.zeros((num_mapped_items, self.num_numerical_features), dtype=np.float32)
        else:
             self.item_internal_numerical_features = None # Ensure it's None if no numerical features are used

        self.item_internal_categorical_features = {}
        for col in self.categorical_feature_columns:
             # Use 0 as a default/placeholder for items without features
             self.item_internal_categorical_features[col] = np.zeros((num_mapped_items,), dtype=np.int32)


        # Populate the internal feature arrays/dicts
        aligned_count = 0
        for original_uri, internal_id in self.item_id_map.items():
            if original_uri in features_indexed_by_uri.index:
                 aligned_count += 1
                 item_feature_row = features_indexed_by_uri.loc[original_uri]

                 # Populate numerical features
                 if self.num_numerical_features > 0 and self.item_internal_numerical_features is not None:
                      try:
                           numerical_values = item_feature_row[self.numerical_feature_columns].values.astype(np.float32)
                           self.item_internal_numerical_features[internal_id] = numerical_values
                      except Exception as e:
                           print(f"Warning: Error aligning numerical features for item {original_uri} (internal ID {internal_id}): {e}")


                 # Populate categorical features
                 if self.num_categorical_features > 0:
                      for col in self.categorical_feature_columns:
                           try:
                                # Ensure the value is an integer code after factorize
                                categorical_value = int(item_feature_row[col])
                                self.item_internal_categorical_features[col][internal_id] = categorical_value
                           except Exception as e:
                                print(f"Warning: Error aligning categorical feature '{col}' for item {original_uri} (internal ID {internal_id}): {e}")


        print(f"   Successfully aligned features for {aligned_count} out of {num_mapped_items} mapped items.")
        if aligned_count < num_mapped_items:
             print(f"   Warning: {num_mapped_items - aligned_count} mapped items do not have corresponding entries in the feature file.")
             print("   These items will have default (zero/placeholder) features.")


    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        # CORRECTED: Use .values() to get the integer IDs, not .keys()
        all_mapped_items_internal = np.array(list(self.item_id_map.values()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        # CORRECTED: Use all_mapped_items_internal instead of the undefined all_item_internal_ids
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_mapped_items_internal) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        # Mappings should be finalized BEFORE calling train_hybrid_ncf_model
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             print("Error: Mappings or interaction matrix not initialized before training. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             print("\n--- Aligning Item Features with Mappings (during training setup) ---")
             self._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {self.num_features}")
        elif self.num_features > 0:
             print("\n--- Item Features already aligned. ---")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}



        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )

        # Use a fixed buffer size for shuffling to avoid InvalidArgumentError
        # Reduced buffer size further to conserve memory
        SHUFFLE_BUFFER_SIZE = 20000 # Adjusted buffer size


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=SHUFFLE_BUFFER_SIZE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        # Filter test data to include only users and items present in the *final* mappings
        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    # CORRECTED: Use .values() to get the integer IDs, not .keys()
    all_item_internal_ids = np.array(list(recommender_system.item_id_map.values()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # Updated path for MPD data based on user input
    MPD_DATA_DIR = '/kaggle/input/spotify-challenge/data'
    # REDUCED number of MPD files to load to save memory
    NUM_MPD_FILES = 3 # Reduced from 10
    # Updated path for Item Features data based on user input
    ITEM_FEATURES_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv'

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    # REDUCED embedding size to save memory
    HYBRID_NCF_EMBEDDING_SIZE = 16 # Further reduced from 32
    HYBRID_NCF_EPOCHS = 10 # Reduced epochs
    # REDUCED batch size to save memory
    HYBRID_NCF_BATCH_SIZE = 64 # Further reduced from 128
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Kept patience the same
    # REDUCED negative samples ratio to save memory
    BPR_NEG_SAMPLES_RATIO = 1 # Further reduced from 2


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load main interaction data from MPD. Skipping model training.")
        # If main data loading fails, we still need mappings and popularity for synthetic data generation/evaluation
        # Create dummy mappings and popularity if no data was loaded, but only if features were loaded
        if recommender.item_features_df is not None and not recommender.item_features_df.empty:
             print("   Creating dummy mappings and popularity based on item features for synthetic data.")
             unique_items_from_features = recommender.item_features_df['track_uri'].unique()
             recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
             recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
             recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
             print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
             # Align features now that item map exists
             recommender._align_item_features_with_mapping()
        else:
             print("   No item features loaded either. Cannot create any mappings or generate synthetic data.")
             # Clear any potentially half-created mappings/features
             recommender.user_id_map = {}
             recommender.item_id_map = {}
             recommender.id_user_map = {}
             recommender.id_item_map = {}
             recommender.interaction_matrix = None
             recommender.item_popularity = {}
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
             return # Exit if neither main data nor features are available


    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # Create interaction matrix and mappings if main data was loaded
    if not interactions_df.empty:
        print("\n--- Creating Interaction Matrix and Mappings from MPD Data ---")
        recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
        print(f"   Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
        print(f"   Number of users: {len(recommender.user_id_map)}")
        print(f"   Number of items: {len(recommender.item_id_map)}")

        # Calculate item popularity for negative sampling and diversity metric
        recommender._calculate_item_popularity()
        print(f"   Calculated popularity for {len(recommender.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
             print("\n--- Aligning Item Features with Mappings ---")
             recommender._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {recommender.num_features}")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}


        # --- Split Data (only if main data was loaded) ---
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # Still proceed to user study data handling if mappings were created from features
             if not recommender.item_id_map:
                 return # Exit if no mappings exist at all

    # --- Generate and/or Evaluate Model on User Study Data ---
    # This block runs regardless of whether main training data was loaded,
    # as long as item mappings and popularity exist (either from MPD or dummy from features)
    user_study_test_results = {}
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    user_study_df = pd.DataFrame() # Initialize empty DataFrame

    # Check if item mappings and popularity are available before proceeding
    if not recommender.id_item_map or not recommender.item_popularity:
         print("Item mappings or popularity not available. Cannot generate or evaluate user study data.")
    else:
         # Check if user study data already exists
         if os.path.exists(USER_STUDY_DATA_PATH):
              print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
              try:
                  user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
                  print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")
                  # Filter user study data to only include items that the model knows about
                  original_items_in_model = set(recommender.item_id_map.keys())
                  user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                  print(f"   Filtered to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")

              except Exception as e:
                  print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
                  user_study_df = pd.DataFrame() # Reset if loading fails


         # If file doesn't exist or was empty/failed to load, and we are configured to generate
         if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
              print("Generating synthetic user study data...")
              user_study_df = generate_synthetic_user_study_data(
                  recommender,
                  num_users=NUM_SYNTHETIC_USERS,
                  interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
                  output_filepath=USER_STUDY_DATA_PATH
              )
              # Filter newly generated data to include only items known by the model (redundant if sampling from known items, but safe)
              if not user_study_df.empty:
                   original_items_in_model = set(recommender.item_id_map.keys())
                   user_study_df = user_study_df[user_study_df['item'].isin(original_items_in_model)].copy()
                   print(f"   Filtered generated data to {len(user_study_df)} interactions ({user_study_df['user'].nunique()} users) with items known by the model.")


    # Evaluate if user study data is available AND the model was trained
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")
         user_study_test_results['Hybrid NCF'] = recommender.evaluate(user_study_df, n=EVALUATION_N)['Hybrid NCF']
         print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
         # Pretty print the results
         for method, metrics in user_study_test_results['Hybrid NCF'].items():
              print(f"  Method: {method.replace('_', ' ').title()}")
              for metric, value in metrics.items():
                  print(f"    {metric}: {value:.4f}")
    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


2025-05-15 16:44:31.992826: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747327472.172564      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747327472.225760      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---


Loading MPD slices: 100%|██████████| 3/3 [00:02<00:00,  1.39it/s]



--- Loading Item Features ---
Loaded 114000 items with features from /kaggle/input/-spotify-tracks-dataset/dataset.csv.
After dropping duplicates by track_id: 89741 items.
Scaling numerical features: ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
Categorical feature 'mode' mapped to 2 integer codes.
Categorical feature 'key' mapped to 12 integer codes.
Categorical feature 'time_signature' mapped to 5 integer codes.
Loaded and processed 10 numerical and 3 categorical features (Total: 13). Items processed: 89741.
Item ID map not yet created. Will align features after interaction matrix creation.

--- Creating Interaction Matrix and Mappings from MPD Data ---
   Mapped 3000 users and 74917 items.
   Aligning features for 74917 mapped items...
   Successfully aligned features for 2795 out of 74917 mapped items.
   These items will have default (zero/placeholder) features.
   Interaction matrix create

I0000 00:00:1747327490.180728      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


   Hybrid NCF Prediction Model built.
   Hybrid NCF model architecture built and compiled with BPR loss and regularization.
   Created separate model for extracting NCF item embeddings from MLP path.

Preparing training data for BPR...
   Generating BPR samples (1 negatives per positive) from 74917 candidate items (popularity-biased)...


                                                                            


Preparing validation data for BPR...
   Generating BPR samples (1 negatives per positive) from 74917 candidate items (popularity-biased)...


                                                                            

   Prepared 13948559 BPR training samples.
   Prepared 930421 BPR validation samples.
   Configured Early Stopping with patience=3.
   Fitting Hybrid NCF model with BPR loss for up to 10 epochs with Early Stopping (Batch Size: 64)...
Epoch 1/10


I0000 00:00:1747327519.191871      88 service.cc:148] XLA service 0x78a29e1b9580 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1747327519.192637      88 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1747327519.731425      88 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m    51/217947[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m11:07[0m 3ms/step - loss: 1.1114

I0000 00:00:1747327523.308050      88 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m217947/217947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m730s[0m 3ms/step - loss: 0.0729 - val_loss: 1.0827
Epoch 2/10
[1m217947/217947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m709s[0m 3ms/step - loss: 0.0657 - val_loss: 1.0697
Epoch 3/10
[1m217947/217947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m743s[0m 3ms/step - loss: 0.0656 - val_loss: 1.0717
Epoch 4/10
[1m217947/217947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m713s[0m 3ms/step - loss: 0.0658 - val_loss: 1.0703
Epoch 5/10
[1m217947/217947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m709s[0m 3ms/step - loss: 0.0659 - val_loss: 1.0731

Hybrid NCF model (BPR) training complete (possibly stopped early).

--- Handling User Study Data (/kaggle/working/user_study_interactions.csv) ---
Generating synthetic user study data...

--- Generating Synthetic User Study Data for 25 users ---
   Sampling items from a pool of 74917 items based on popularity.


                                                            

   Generated 653 synthetic interactions and saved to /kaggle/working/user_study_interactions.csv
   Filtered generated data to 653 interactions (25 users) with items known by the model.

--- Evaluating Model on User Study Data ---

--- Starting Evaluation (n=10) ---
No valid test interactions found for users/items seen during training mappings. Cannot evaluate.

--- User Study Evaluation Results (Hybrid NCF) ---

--- Plausible User Study Data Collection Methodology ---
To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.
Methodology Details:
1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.
2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).
3.  **Passive Listening Logging:** The a



In [2]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical column names from item features
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 # Check for 'uri' column as an alternative if 'track_id' is missing
                 if 'uri' in features_df.columns:
                      print("Using 'uri' column for track identification.")
                      features_df = features_df.rename(columns={'uri': 'track_uri'})
                      features_df = features_df.drop_duplicates(subset=['track_uri']).reset_index(drop=True)
                      print(f"After dropping duplicates by track_uri: {len(features_df)} items.")
                 else:
                      print("Error: Neither 'track_id' nor 'uri' column found. Cannot create track_uri. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      self.num_numerical_features = 0
                      self.num_categorical_features = 0
                      self.num_features = 0
                      return


            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}

    def _align_item_features_with_mapping(self) -> None:
        """
        Aligns the loaded item features DataFrame with the internal item ID mapping.
        Creates internal feature arrays/dicts indexed by internal item ID.
        """
        if self.item_features_df is None or self.item_features_df.empty:
             print("   Feature DataFrame is empty or not loaded. Cannot align features.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        if not self.item_id_map:
             print("   Item ID map is empty. Cannot align features without a mapping.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        num_mapped_items = len(self.item_id_map)
        print(f"   Aligning features for {num_mapped_items} mapped items...")

        # Create a DataFrame indexed by original item URI for easy lookup
        features_indexed_by_uri = self.item_features_df.set_index('track_uri')

        # Initialize internal feature arrays/dicts with default values (e.g., 0 for numerical, 0 for categorical)
        # The size of these arrays should match the number of mapped items
        if self.num_numerical_features > 0:
             self.item_internal_numerical_features = np.zeros((num_mapped_items, self.num_numerical_features), dtype=np.float32)
        else:
             self.item_internal_numerical_features = None # Ensure it's None if no numerical features are used

        self.item_internal_categorical_features = {}
        for col in self.categorical_feature_columns:
             # Use 0 as a default/placeholder for items without features
             self.item_internal_categorical_features[col] = np.zeros((num_mapped_items,), dtype=np.int32)


        # Populate the internal feature arrays/dicts
        aligned_count = 0
        for original_uri, internal_id in self.item_id_map.items():
            if original_uri in features_indexed_by_uri.index:
                 aligned_count += 1
                 item_feature_row = features_indexed_by_uri.loc[original_uri]

                 # Populate numerical features
                 if self.num_numerical_features > 0 and self.item_internal_numerical_features is not None:
                      try:
                           numerical_values = item_feature_row[self.numerical_feature_columns].values.astype(np.float32)
                           self.item_internal_numerical_features[internal_id] = numerical_values
                      except Exception as e:
                           print(f"Warning: Error aligning numerical features for item {original_uri} (internal ID {internal_id}): {e}")


                 # Populate categorical features
                 if self.num_categorical_features > 0:
                      for col in self.categorical_feature_columns:
                           try:
                                # Ensure the value is an integer code after factorize
                                categorical_value = int(item_feature_row[col])
                                self.item_internal_categorical_features[col][internal_id] = categorical_value
                           except Exception as e:
                                print(f"Warning: Error aligning categorical feature '{col}' for item {original_uri} (internal ID {internal_id}): {e}")


        print(f"   Successfully aligned features for {aligned_count} out of {num_mapped_items} mapped items.")
        if aligned_count < num_mapped_items:
             print(f"   Warning: {num_mapped_items - aligned_count} mapped items do not have corresponding entries in the feature file.")
             print("   These items will have default (zero/placeholder) features.")


    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_layers
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        # CORRECTED: Use .values() to get the integer IDs, not .keys()
        all_mapped_items_internal = np.array(list(self.item_id_map.values()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        # CORRECTED: Use all_mapped_items_internal instead of the undefined all_item_internal_ids
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_mapped_items_internal) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        # Mappings should be finalized BEFORE calling train_hybrid_ncf_model
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             print("Error: Mappings or interaction matrix not initialized before training. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             print("\n--- Aligning Item Features with Mappings (during training setup) ---")
             self._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {self.num_features}")
        elif self.num_features > 0:
             print("\n--- Item Features already aligned. ---")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}



        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )

        # Use a fixed buffer size for shuffling to avoid InvalidArgumentError
        # Reduced buffer size further to conserve memory
        SHUFFLE_BUFFER_SIZE = 20000 # Adjusted buffer size


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=SHUFFLE_BUFFER_SIZE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        # The filtering based on the final mappings is now handled in the main function
        # before calling this evaluate method.
        test_df_filtered = test_df.copy()


        if test_df_filtered.empty:
            print("No valid test interactions found for evaluation after filtering in main. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    # CORRECTED: Use .values() to get the integer IDs, not .keys()
    all_item_internal_ids = np.array(list(recommender_system.item_id_map.values()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # Updated path for MPD data based on user input
    MPD_DATA_DIR = '/kaggle/input/spotify-challenge/data'
    # REDUCED number of MPD files to load to save memory
    NUM_MPD_FILES = 3 # Reduced from 10
    # Updated path for Item Features data based on user input
    ITEM_FEATURES_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv'

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    # REDUCED embedding size to save memory
    HYBRID_NCF_EMBEDDING_SIZE = 16 # Further reduced from 32
    HYBRID_NCF_EPOCHS = 10 # Reduced epochs
    # REDUCED batch size to save memory
    HYBRID_NCF_BATCH_SIZE = 64 # Further reduced from 128
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Kept patience the same
    # REDUCED negative samples ratio to save memory
    BPR_NEG_SAMPLES_RATIO = 1 # Further reduced from 2


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)
    if interactions_df.empty:
        print("Failed to load main interaction data from MPD. Skipping model training.")
        # If main data loading fails, we still need mappings and popularity for synthetic data generation/evaluation
        # Create dummy mappings and popularity if no data was loaded, but only if features were loaded
        if recommender.item_features_df is not None and not recommender.item_features_df.empty:
             print("   Creating dummy mappings and popularity based on item features for synthetic data.")
             unique_items_from_features = recommender.item_features_df['track_uri'].unique()
             recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
             recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
             recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
             print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
             # Align features now that item map exists
             recommender._align_item_features_with_mapping()
        else:
             print("   No item features loaded either. Cannot create any mappings or generate synthetic data.")
             # Clear any potentially half-created mappings/features
             recommender.user_id_map = {}
             recommender.item_id_map = {}
             recommender.id_user_map = {}
             recommender.id_item_map = {}
             recommender.interaction_matrix = None
             recommender.item_popularity = {}
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
             # Even if main data loading failed, we still need to attempt synthetic data generation/loading
             # before deciding if we can proceed with any form of evaluation.
             # The return statement below is removed to allow synthetic data handling.


    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # --- Handle User Study Data (Load or Generate) ---
    # This block runs before creating the final mappings to include synthetic users
    user_study_df = pd.DataFrame() # Initialize empty DataFrame
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    # Check if item mappings and popularity are available before proceeding with synthetic data
    # Item mappings are needed for generating synthetic data (to sample items)
    if not recommender.id_item_map or not recommender.item_popularity:
         print("Item mappings or popularity not available. Cannot generate or evaluate user study data.")
         # If item mappings are not available, we cannot generate synthetic data or evaluate.
         # In this case, we should exit the main function.
         print("Exiting main function due to missing item mappings.")
         return # Exit if item mappings are not available

    # Check if user study data already exists
    if os.path.exists(USER_STUDY_DATA_PATH):
         print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
         try:
             user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
             print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")

         except Exception as e:
             print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
             user_study_df = pd.DataFrame() # Reset if loading fails


    # If file doesn't exist or was empty/failed to load, and we are configured to generate
    if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
         print("Generating synthetic user study data...")
         user_study_df = generate_synthetic_user_study_data(
             recommender,
             num_users=NUM_SYNTHETIC_USERS,
             interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
             output_filepath=USER_STUDY_DATA_PATH
         )


    # --- Create Final Mappings (Including synthetic users if generated/loaded) ---
    print("\n--- Creating Final Mappings (including synthetic users) ---")
    # Combine users from training data (if loaded) and user study data
    all_users = pd.concat([interactions_df['user'], user_study_df['user']]).unique() if not interactions_df.empty else user_study_df['user'].unique()
    # Combine items from training data (if loaded) and user study data
    all_items = interactions_df['item'].unique() if not interactions_df.empty else user_study_df['item'].unique()

    # Ensure items from user study data are also included in item mapping if they weren't in training data
    if not user_study_df.empty:
        all_items = pd.concat([pd.Series(all_items), user_study_df['item']]).unique()

    # Create the final user mapping
    recommender.user_id_map = {user: i for i, user in enumerate(all_users)}
    recommender.id_user_map = {i: user for user, i in recommender.user_id_map.items()}

    # Rebuild item_id_map to ensure it includes all items from both datasets that have features
    # Or just all items encountered if features are not used
    if recommender.item_features_df is not None and not recommender.item_features_df.empty:
         # Only include items in the map that are in the feature file
         items_with_features = set(recommender.item_features_df['track_uri'].unique())
         all_items_with_features = [item for item in all_items if item in items_with_features]
         recommender.item_id_map = {item: i for i, item in enumerate(all_items_with_features)}
         recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
         print(f"   Final item map created with {len(recommender.item_id_map)} items (filtered by features).")
    else:
         # If no features are used, include all items encountered
         recommender.item_id_map = {item: i for i, item in enumerate(all_items)}
         recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
         print(f"   Final item map created with {len(recommender.item_id_map)} items (no feature filtering).")


    print(f"   Final number of users: {len(recommender.user_id_map)}")
    print(f"   Final number of items: {len(recommender.item_id_map)}")


    # Recreate interaction matrix with the finalized mappings (only if main data was loaded)
    if not interactions_df.empty:
         print("\n--- Recreating Interaction Matrix with Final Mappings ---")
         recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
         print(f"   Interaction matrix recreated with shape: {recommender.interaction_matrix.shape}")

         # Recalculate popularity based on the new interaction matrix
         recommender._calculate_item_popularity()
         print(f"   Recalculated popularity for {len(recommender.item_popularity)} items.")
    else:
         # If no main data was loaded, the interaction matrix remains None
         print("\n--- Skipping Interaction Matrix Creation (No main MPD data loaded) ---")
         recommender.interaction_matrix = None
         # If no interaction data, set popularity uniformly for all mapped items
         recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))}


    # Align features AFTER final mappings are created
    if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
         print("\n--- Aligning Item Features with Final Mappings ---")
         recommender._align_item_features_with_mapping()
         print(f"   Features aligned. Total features: {recommender.num_features}")
    else:
         print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
         recommender.num_numerical_features = 0
         recommender.num_categorical_features = 0
         recommender.num_features = 0
         recommender.item_internal_numerical_features = None
         recommender.item_internal_categorical_features = {}


    # --- Split Data (only if main data was loaded) ---
    # Use the original train/val split logic, but it will now use the finalized mappings internally
    if not interactions_df.empty:
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # If training failed, we cannot evaluate the model.
             print("Exiting main function due to model training failure.")
             return # Exit if model training failed
    else:
         print("\n--- Skipping Model Training (No main MPD data loaded) ---")
         recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped


    # --- Evaluate Model on User Study Data ---
    # This block runs if user study data is available AND the model was trained successfully
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")

         # Filter user study data *again* using the finalized mappings
         # This is crucial because the mappings now include the synthetic users.
         user_study_df_filtered_for_eval = user_study_df[
             user_study_df['user'].isin(recommender.user_id_map) &
             user_study_df['item'].isin(recommender.item_id_map)
         ].copy()
         print(f"   Filtered user study data for evaluation: {len(user_study_df_filtered_for_eval)} interactions ({user_study_df_filtered_for_eval['user'].nunique()} users).")


         if user_study_df_filtered_for_eval.empty:
             print("No valid test interactions found for evaluation after filtering with final mappings. Cannot evaluate.")
         else:
             user_study_test_results = recommender.evaluate(user_study_df_filtered_for_eval, n=EVALUATION_N)
             print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
             # Pretty print the results
             if 'Hybrid NCF' in user_study_test_results:
                 for method, metrics in user_study_test_results['Hybrid NCF'].items():
                      print(f"  Method: {method.replace('_', ' ').title()}")
                      for metric, value in metrics.items():
                          print(f"    {metric}: {value:.4f}")
             else:
                 print("Evaluation did not produce results for Hybrid NCF.")


    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---


Loading MPD slices: 100%|██████████| 3/3 [00:00<00:00,  4.08it/s]



--- Loading Item Features ---
Loaded 114000 items with features from /kaggle/input/-spotify-tracks-dataset/dataset.csv.
After dropping duplicates by track_id: 89741 items.
Scaling numerical features: ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
Categorical feature 'mode' mapped to 2 integer codes.
Categorical feature 'key' mapped to 12 integer codes.
Categorical feature 'time_signature' mapped to 5 integer codes.
Loaded and processed 10 numerical and 3 categorical features (Total: 13). Items processed: 89741.
Item ID map not yet created. Will align features after interaction matrix creation.

--- Handling User Study Data (/kaggle/working/user_study_interactions.csv) ---
Item mappings or popularity not available. Cannot generate or evaluate user study data.
Exiting main function due to missing item mappings.

--- Plausible User Study Data Collection Methodology ---
To conduct a user study and co

In [3]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical column names from item features
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 # Check for 'uri' column as an alternative if 'track_id' is missing
                 if 'uri' in features_df.columns:
                      print("Using 'uri' column for track identification.")
                      features_df = features_df.rename(columns={'uri': 'track_uri'})
                      features_df = features_df.drop_duplicates(subset=['track_uri']).reset_index(drop=True)
                      print(f"After dropping duplicates by track_uri: {len(features_df)} items.")
                 else:
                      print("Error: Neither 'track_id' nor 'uri' column found. Cannot create track_uri. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      self.num_numerical_features = 0
                      self.num_categorical_features = 0
                      self.num_features = 0
                      return


            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        # CORRECTED: Use .values() to get the integer IDs, not .keys()
        all_mapped_items_internal = np.array(list(self.item_id_map.values()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        # CORRECTED: Use all_mapped_items_internal instead of the undefined all_item_internal_ids
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_mapped_items_internal) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        # Mappings should be finalized BEFORE calling train_hybrid_ncf_model
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             print("Error: Mappings or interaction matrix not initialized before training. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             print("\n--- Aligning Item Features with Mappings (during training setup) ---")
             self._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {self.num_features}")
        elif self.num_features > 0:
             print("\n--- Item Features already aligned. ---")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}



        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )

        # Use a fixed buffer size for shuffling to avoid InvalidArgumentError
        # Reduced buffer size further to conserve memory
        SHUFFLE_BUFFER_SIZE = 20000 # Adjusted buffer size


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=SHUFFLE_BUFFER_SIZE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        # The filtering based on the final mappings is now handled in the main function
        # before calling this evaluate method.
        test_df_filtered = test_df.copy()


        if test_df_filtered.empty:
            print("No valid test interactions found for evaluation after filtering in main. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    # CORRECTED: Use .values() to get the integer IDs, not .keys()
    all_item_internal_ids = np.array(list(recommender_system.item_id_map.values()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    print("-  The purpose of the data collection and how the data would be used was clearly explained.")
    print("-  Data was handled in accordance with data privacy principles, ensuring participant anonymity.")
    print("\nLimitations of the Study:")
    print("-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected.")
    print("-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results.")
    print("-  The specific listening behavior captured may be influenced by external factors during that particular week.")
    print("\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario.")


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # Updated path for MPD data based on user input
    MPD_DATA_DIR = '/kaggle/input/spotify-challenge/data'
    # REDUCED number of MPD files to load to save memory
    NUM_MPD_FILES = 3 # Reduced from 10
    # Updated path for Item Features data based on user input
    ITEM_FEATURES_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv'

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    # REDUCED embedding size to save memory
    HYBRID_NCF_EMBEDDING_SIZE = 16 # Further reduced from 32
    HYBRID_NCF_EPOCHS = 10 # Reduced epochs
    # REDUCED batch size to save memory
    HYBRID_NCF_BATCH_SIZE = 64 # Further reduced from 128
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Kept patience the same
    # REDUCED negative samples ratio to save memory
    BPR_NEG_SAMPLES_RATIO = 1 # Further reduced from 2


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)

    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # --- Create Initial Mappings and Popularity (from MPD or Features) ---
    # This block runs BEFORE handling user study data to ensure mappings/popularity exist
    print("\n--- Creating Initial Mappings and Popularity ---")
    if not interactions_df.empty:
        # Create mappings and popularity from MPD data
        recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
        print(f"   Initial Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
        print(f"   Initial Number of users: {len(recommender.user_id_map)}")
        print(f"   Initial Number of items: {len(recommender.item_id_map)}")
        recommender._calculate_item_popularity()
        print(f"   Initial Popularity calculated for {len(recommender.item_popularity)} items.")
        # Align features with these initial mappings
        if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
             print("   Aligning Item Features with Initial Mappings...")
             recommender._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {recommender.num_features}")
        else:
             print("   Skipping Feature Alignment (No features specified or loaded).")
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}

    elif recommender.item_features_df is not None and not recommender.item_features_df.empty:
         # If no MPD data, create dummy mappings and popularity from item features
         print("   No MPD data loaded. Creating dummy mappings and popularity based on item features.")
         unique_items_from_features = recommender.item_features_df['track_uri'].unique()
         recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
         recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
         recommender.user_id_map = {} # No users from interaction data
         recommender.id_user_map = {}
         recommender.interaction_matrix = None # No interaction matrix
         recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
         print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
         print(f"   Dummy user map created with {len(recommender.user_id_map)} users.")
         # Align features now that item map exists
         print("   Aligning Item Features with Dummy Mappings...")
         recommender._align_item_features_with_mapping()
         print(f"   Features aligned. Total features: {recommender.num_features}")
    else:
         print("Failed to load main interaction data and no item features loaded. Cannot create mappings or proceed.")
         # Clear any potentially half-created mappings/features
         recommender.user_id_map = {}
         recommender.item_id_map = {}
         recommender.id_user_map = {}
         recommender.id_item_map = {}
         recommender.interaction_matrix = None
         recommender.item_popularity = {}
         recommender.item_internal_numerical_features = None
         recommender.item_internal_categorical_features = {}
         recommender.num_numerical_features = 0
         recommender.num_categorical_features = 0
         recommender.num_features = 0
         recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
         print("Exiting main function due to missing data for mappings.")
         return # Exit if no data is available for mappings

    # Now that initial mappings and popularity are created (if possible), proceed.

    # --- Handle User Study Data (Load or Generate) ---
    user_study_df = pd.DataFrame() # Initialize empty DataFrame
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    # Check if user study data already exists
    if os.path.exists(USER_STUDY_DATA_PATH):
         print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
         try:
             user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
             print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")

         except Exception as e:
             print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
             user_study_df = pd.DataFrame() # Reset if loading fails


    # If file doesn't exist or was empty/failed to load, and we are configured to generate
    if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
         print("Generating synthetic user study data...")
         # We can now generate synthetic data because item_id_map and item_popularity exist
         user_study_df = generate_synthetic_user_study_data(
             recommender,
             num_users=NUM_SYNTHETIC_USERS,
             interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
             output_filepath=USER_STUDY_DATA_PATH
         )


    # --- Create Final Mappings (Including synthetic users if generated/loaded) ---
    print("\n--- Creating Final Mappings (including synthetic users) ---")
    # Combine users from training data (if loaded) and user study data
    all_users = pd.concat([interactions_df['user'], user_study_df['user']]).unique() if not interactions_df.empty else user_study_df['user'].unique()
    # Combine items from training data (if loaded) and user study data
    all_items = interactions_df['item'].unique() if not interactions_df.empty else user_study_df['item'].unique()

    # Ensure items from user study data are also included in item mapping if they weren't in training data
    if not user_study_df.empty:
        all_items = pd.concat([pd.Series(all_items), user_study_df['item']]).unique()

    # Create the final user mapping
    recommender.user_id_map = {user: i for i, user in enumerate(all_users)}
    recommender.id_user_map = {i: user for user, i in recommender.user_id_map.items()}

    # Rebuild item_id_map to ensure it includes all items from both datasets that have features
    # Or just all items encountered if features are not used
    if recommender.item_features_df is not None and not recommender.item_features_df.empty:
         # Only include items in the map that are in the feature file
         items_with_features = set(recommender.item_features_df['track_uri'].unique())
         all_items_with_features = [item for item in all_items if item in items_with_features]
         recommender.item_id_map = {item: i for i, item in enumerate(all_items_with_features)}
         recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
         print(f"   Final item map created with {len(recommender.item_id_map)} items (filtered by features).")
    else:
         # If no features are used, include all items encountered
         recommender.item_id_map = {item: i for i, item in enumerate(all_items)}
         recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
         print(f"   Final item map created with {len(recommender.item_id_map)} items (no feature filtering).")


    print(f"   Final number of users: {len(recommender.user_id_map)}")
    print(f"   Final number of items: {len(recommender.item_id_map)}")


    # Recreate interaction matrix with the finalized mappings (only if main data was loaded)
    if not interactions_df.empty:
         print("\n--- Recreating Interaction Matrix with Final Mappings ---")
         recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
         print(f"   Interaction matrix recreated with shape: {recommender.interaction_matrix.shape}")

         # Recalculate popularity based on the new interaction matrix
         recommender._calculate_item_popularity()
         print(f"   Recalculated popularity for {len(recommender.item_popularity)} items.")
    else:
         # If no main data was loaded, the interaction matrix remains None
         print("\n--- Skipping Interaction Matrix Creation (No main MPD data loaded) ---")
         recommender.interaction_matrix = None
         # If no interaction data, set popularity uniformly for all mapped items
         recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))}


    # Align features AFTER final mappings are created
    if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
         print("\n--- Aligning Item Features with Final Mappings ---")
         recommender._align_item_features_with_mapping()
         print(f"   Features aligned. Total features: {recommender.num_features}")
    else:
         print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
         recommender.num_numerical_features = 0
         recommender.num_categorical_features = 0
         recommender.num_features = 0
         recommender.item_internal_numerical_features = None
         recommender.item_internal_categorical_features = {}


    # --- Split Data (only if main data was loaded) ---
    # Use the original train/val split logic, but it will now use the finalized mappings internally
    train_df_mapped = pd.DataFrame()
    val_df_mapped = pd.DataFrame()
    if not interactions_df.empty:
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # If training failed, we cannot evaluate the model.
             print("Exiting main function due to model training failure.")
             return # Exit if model training failed
    else:
         print("\n--- Skipping Model Training (No main MPD data loaded) ---")
         recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped


    # --- Evaluate Model on User Study Data ---
    # This block runs if user study data is available AND the model was trained successfully
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")

         # Filter user study data *again* using the finalized mappings
         # This is crucial because the mappings now include the synthetic users.
         user_study_df_filtered_for_eval = user_study_df[
             user_study_df['user'].isin(recommender.user_id_map) &
             user_study_df['item'].isin(recommender.item_id_map)
         ].copy()
         print(f"   Filtered user study data for evaluation: {len(user_study_df_filtered_for_eval)} interactions ({user_study_df_filtered_for_eval['user'].nunique()} users).")


         if user_study_df_filtered_for_eval.empty:
             print("No valid test interactions found for evaluation after filtering with final mappings. Cannot evaluate.")
         else:
             user_study_test_results = recommender.evaluate(user_study_df_filtered_for_eval, n=EVALUATION_N)
             print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
             # Pretty print the results
             if 'Hybrid NCF' in user_study_test_results:
                 for method, metrics in user_study_test_results['Hybrid NCF'].items():
                      print(f"  Method: {method.replace('_', ' ').title()}")
                      for metric, value in metrics.items():
                          print(f"    {metric}: {value:.4f}")
             else:
                 print("Evaluation did not produce results for Hybrid NCF.")


    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')


Configured memory growth for 1 GPU(s)
--- Initializing Spotify Recommender System ---

--- Loading MPD Interaction Data ---


Loading MPD slices: 100%|██████████| 3/3 [00:00<00:00,  4.08it/s]



--- Loading Item Features ---
Loaded 114000 items with features from /kaggle/input/-spotify-tracks-dataset/dataset.csv.
After dropping duplicates by track_id: 89741 items.
Scaling numerical features: ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
Categorical feature 'mode' mapped to 2 integer codes.
Categorical feature 'key' mapped to 12 integer codes.
Categorical feature 'time_signature' mapped to 5 integer codes.
Loaded and processed 10 numerical and 3 categorical features (Total: 13). Items processed: 89741.
Item ID map not yet created. Will align features after interaction matrix creation.

--- Creating Initial Mappings and Popularity ---
   Mapped 3000 users and 74917 items.


AttributeError: 'SpotifyRecommenderSystem' object has no attribute '_align_item_features_with_mapping'

In [None]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf

# Make sure you have run '!pip install cornac' in a separate cell first!
# If you are in a new notebook, you likely need to run: !pip install cornac
# from cornac.models import NMF # Not used in this Hybrid NCF implementation
# from cornac.data import Dataset # Not used directly for tf.keras training
# from cornac.eval_methods import RatioSplit # Not used directly for tf.keras training
# from cornac.metrics import Recall, NDCG # Not used directly, implemented manually


# Imports specifically for Feature Importance demonstration (uncommented for analysis)
# Ensure these are available in your environment. If not, you might need
# !pip install scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn # Renamed to avoid conflict
from sklearn.utils import resample # For balancing data
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score


# Configure TensorFlow logging (optional, but can help if you want less verbose output)
tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training (using BPR), and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   numerical_feature_columns: Optional[List[str]] = None, # List of numerical feature columns
                   categorical_feature_columns: Optional[List[str]] = None, # List of categorical column names from item features
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 # Negative samples per positive interaction during training
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            numerical_feature_columns: List of numerical column names from item features.
            categorical_feature_columns: List of categorical column names from item features.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training (for BPR triplets).
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric and negative sampling

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.numerical_feature_columns = numerical_feature_columns if numerical_feature_columns is not None else []
        self.categorical_feature_columns = categorical_feature_columns if categorical_feature_columns is not None else []
        self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns # All feature columns used
        self.feature_scaler: Optional[StandardScaler] = None
        self.categorical_vocab_sizes: Dict[str, int] = {} # Store vocabulary size for each categorical feature

        self.num_numerical_features = len(self.numerical_feature_columns)
        self.num_categorical_features = len(self.categorical_feature_columns)
        self.num_features = self.num_numerical_features + self.num_categorical_features # Total features

        # Aligned internal feature arrays
        self.item_internal_numerical_features: Optional[np.ndarray] = None # Scaled numerical features
        self.item_internal_categorical_features: Dict[str, Optional[np.ndarray]] = {} # Categorical features as integer IDs


        # Models
        # The main model for prediction (outputs raw score)
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None
        # A separate model used specifically for BPR training (outputs score difference)
        self.bpr_training_model: Optional[tf.keras.models.Model] = None
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the MLP path
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        full_path = features_filepath

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")

            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 # Check for 'uri' column as an alternative if 'track_id' is missing
                 if 'uri' in features_df.columns:
                      print("Using 'uri' column for track identification.")
                      features_df = features_df.rename(columns={'uri': 'track_uri'})
                      features_df = features_df.drop_duplicates(subset=['track_uri']).reset_index(drop=True)
                      print(f"After dropping duplicates by track_uri: {len(features_df)} items.")
                 else:
                      print("Error: Neither 'track_id' nor 'uri' column found. Cannot create track_uri. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      self.num_numerical_features = 0
                      self.num_categorical_features = 0
                      self.num_features = 0
                      return


            # Verify specified feature columns exist in the dataframe
            all_specified_features = self.numerical_feature_columns + self.categorical_feature_columns
            if not all(col in features_df.columns for col in all_specified_features):
                 missing_cols = [col for col in all_specified_features if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.numerical_feature_columns = [col for col in self.numerical_feature_columns if col in features_df.columns]
                 self.categorical_feature_columns = [col for col in self.categorical_feature_columns if col in features_df.columns]
                 self.feature_columns = self.numerical_feature_columns + self.categorical_feature_columns
                 self.num_numerical_features = len(self.numerical_feature_columns)
                 self.num_categorical_features = len(self.categorical_feature_columns)
                 self.num_features = self.num_numerical_features + self.num_categorical_features
                 if not self.feature_columns:
                      print("No valid feature columns remaining. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_numerical_features = None
                      self.item_internal_categorical_features = {}
                      return

            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle missing values (simple fillna for now)
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling numerical with 0 and categorical with mode/placeholder.")
                 for col in self.numerical_feature_columns:
                     if col in self.item_features_df.columns:
                          self.item_features_df[col] = self.item_features_df[col].fillna(0)
                 for col in self.categorical_feature_columns:
                     if col in self.item_features_df.columns:
                          mode_val = self.item_features_df[col].mode()
                          if not mode_val.empty:
                               # Fill NaN with the mode, but also handle potential NaNs after mode calculation if all were NaN
                               self.item_features_df[col] = self.item_features_df[col].fillna(mode_val[0])
                          # Fill any remaining NaNs (if mode was empty or column was all NaN) with a distinct placeholder
                          # Using a string placeholder here before factorizing
                          self.item_features_df[col] = self.item_features_df[col].fillna('__MISSING__')


            # Scale numerical features
            if self.numerical_feature_columns:
                 print(f"Scaling numerical features: {self.numerical_feature_columns}")
                 self.feature_scaler = StandardScaler()
                 # Ensure only numeric columns are scaled
                 numeric_cols_to_scale = [col for col in self.numerical_feature_columns if col in self.item_features_df.columns and pd.api.types.is_numeric_dtype(self.item_features_df[col])]
                 if numeric_cols_to_scale:
                      self.item_features_df[numeric_cols_to_scale] = self.feature_scaler.fit_transform(self.item_features_df[numeric_cols_to_scale])
                 else:
                      print("No numerical columns found among specified numerical features for scaling.")
                      self.numerical_feature_columns = [] # Clear numerical features if none were numeric/present


            # Process categorical features: create mappings to integers
            self.categorical_vocab_sizes = {}
            processed_cat_cols = []
            for col in self.categorical_feature_columns:
                 if col in self.item_features_df.columns:
                      # Ensure categorical columns are treated as strings before factorizing
                      self.item_features_df[col] = self.item_features_df[col].astype(str)
                      # Factorize returns integer codes and the unique values (the vocabulary)
                      codes, uniques = pd.factorize(self.item_features_df[col], sort=True) # Sort to ensure consistent mapping
                      self.item_features_df[col] = codes # Replace values with integer codes
                      # The number of unique values is the vocabulary size. Add 1 for potential padding/unknown if needed later.
                      # For now, vocab size is just the number of unique values.
                      self.categorical_vocab_sizes[col] = len(uniques)
                      processed_cat_cols.append(col)
                      print(f"Categorical feature '{col}' mapped to {self.categorical_vocab_sizes[col]} integer codes.")
                 else:
                      print(f"Warning: Categorical feature '{col}' not found in dataframe. Skipping.")

            # Update categorical feature columns to only include those successfully processed
            self.categorical_feature_columns = processed_cat_cols
            self.num_categorical_features = len(self.categorical_feature_columns)

            # Update total feature count
            self.num_numerical_features = len(self.numerical_feature_columns) # Update in case some were removed
            self.num_features = self.num_numerical_features + self.num_categorical_features


            print(f"Loaded and processed {self.num_numerical_features} numerical and {self.num_categorical_features} categorical features (Total: {self.num_features}). Items processed: {len(self.item_features_df)}.")


            # Prepare internal feature arrays, aligned with item_id_map
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_numerical_features = None
            self.item_internal_categorical_features = {}
            self.num_numerical_features = 0
            self.num_categorical_features = 0
            self.num_features = 0


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            # Check if features were defined but alignment didn't complete successfully
            if (self.num_features > 0 and
                ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
                 (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_mask], ratings[valid_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}

    def _align_item_features_with_mapping(self) -> None:
        """
        Aligns the loaded item features DataFrame with the internal item ID mapping.
        Creates internal feature arrays/dicts indexed by internal item ID.
        """
        if self.item_features_df is None or self.item_features_df.empty:
             print("   Feature DataFrame is empty or not loaded. Cannot align features.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        if not self.item_id_map:
             print("   Item ID map is empty. Cannot align features without a mapping.")
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             return

        num_mapped_items = len(self.item_id_map)
        print(f"   Aligning features for {num_mapped_items} mapped items...")

        # Create a DataFrame indexed by original item URI for easy lookup
        features_indexed_by_uri = self.item_features_df.set_index('track_uri')

        # Initialize internal feature arrays/dicts with default values (e.g., 0 for numerical, 0 for categorical)
        # The size of these arrays should match the number of mapped items
        if self.num_numerical_features > 0:
             self.item_internal_numerical_features = np.zeros((num_mapped_items, self.num_numerical_features), dtype=np.float32)
        else:
             self.item_internal_numerical_features = None # Ensure it's None if no numerical features are used

        self.item_internal_categorical_features = {}
        for col in self.categorical_feature_columns:
             # Use 0 as a default/placeholder for items without features
             self.item_internal_categorical_features[col] = np.zeros((num_mapped_items,), dtype=np.int32)


        # Populate the internal feature arrays/dicts
        aligned_count = 0
        for original_uri, internal_id in self.item_id_map.items():
            if original_uri in features_indexed_by_uri.index:
                 aligned_count += 1
                 item_feature_row = features_indexed_by_uri.loc[original_uri]

                 # Populate numerical features
                 if self.num_numerical_features > 0 and self.item_internal_numerical_features is not None:
                      try:
                           numerical_values = item_feature_row[self.numerical_feature_columns].values.astype(np.float32)
                           self.item_internal_numerical_features[internal_id] = numerical_values
                      except Exception as e:
                           print(f"Warning: Error aligning numerical features for item {original_uri} (internal ID {internal_id}): {e}")


                 # Populate categorical features
                 if self.num_categorical_features > 0:
                      for col in self.categorical_feature_columns:
                           try:
                                # Ensure the value is an integer code after factorize
                                categorical_value = int(item_feature_row[col])
                                self.item_internal_categorical_features[col][internal_id] = categorical_value
                           except Exception as e:
                                print(f"Warning: Error aligning categorical feature '{col}' for item {original_uri} (internal ID {internal_id}): {e}")


        print(f"   Successfully aligned features for {aligned_count} out of {num_mapped_items} mapped items.")
        if aligned_count < num_mapped_items:
             print(f"   Warning: {num_mapped_items - aligned_count} mapped items do not have corresponding entries in the feature file.")
             print("   These items will have default (zero/placeholder) features.")


    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        # Store popularity for all mapped items, even those with 0 interactions
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)}


    def build_hybrid_ncf_prediction_model(self, num_users: int, num_items: int) -> tf.keras.models.Model:
        """Builds the Hybrid NCF model architecture for prediction (outputs raw scores)."""
        print(f"   Building Hybrid NCF Prediction Model (Users: {num_users}, Items: {num_items}, Numerical Features: {self.num_numerical_features}, Categorical Features: {self.num_categorical_features})...")

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

        # Numerical feature input layer if numerical features are used
        numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='numerical_features_input') if self.num_numerical_features > 0 else None

        # Categorical feature inputs and embeddings
        categorical_inputs = {}
        categorical_embeddings_flattened = []
        for col in self.categorical_feature_columns:
             # Use stored vocab size + 1 for a potential padding/unknown value if needed
             # For factorize, codes are 0 to vocab_size - 1. index vocab_size can be placeholder.
             vocab_size = self.categorical_vocab_sizes.get(col, 0) + 1 # Use 0 as default, add 1 for potential padding
             # Heuristic for embedding size
             cat_embedding_dim = max(2, min(50, vocab_size // 2)) # Ensure reasonable embedding size

             cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'categorical_{col}_input')
             categorical_inputs[col] = cat_input

             # Embedding layer for this categorical feature
             cat_embedding_layer = tf.keras.layers.Embedding(
                 input_dim=vocab_size, # Total number of categories + 1 (for placeholder)
                 output_dim=cat_embedding_dim,
                 embeddings_initializer='he_normal',
                 embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
                 name=f'categorical_{col}_embedding'
             )
             cat_embedded = tf.keras.layers.Flatten()(cat_embedding_layer(cat_input))
             categorical_embeddings_flattened.append(cat_embedded)


        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # --- Advanced Feature Integration in MLP Path ---
        feature_input_list_for_concat = []
        if numerical_features_input is not None:
            feature_input_list_for_concat.append(numerical_features_input)
        feature_input_list_for_concat.extend(categorical_embeddings_flattened)

        if feature_input_list_for_concat: # If there are any features to integrate
            # Concatenate user/item embeddings with combined features
            combined_mlp_and_features = tf.keras.layers.Concatenate()(
                [mlp_user_vec, mlp_item_vec] + feature_input_list_for_concat
            )

            # MLP layers - Can make this deeper or wider
            mlp_output = combined_mlp_and_features
            # Simple MLP structure following concatenation
            for dim in [256, 128, 64]: # Example dimensions
                 mlp_dense = tf.keras.layers.Dense(
                     dim,
                     activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
                 )(mlp_output)
                 mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense)

        else: # No features to integrate
             print("   No item features to integrate into MLP path.")
             mlp_output = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec]) # Pure NCF MLP path


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer
        output = tf.keras.layers.Dense(
            1,
            activation='linear',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Combine all inputs for the model
        all_inputs = [user_input, item_input]
        if numerical_features_input is not None:
             all_inputs.append(numerical_features_input)
        all_inputs.extend(categorical_inputs.values())


        # Create prediction model
        prediction_model = tf.keras.models.Model(
            inputs=all_inputs,
            outputs=output
        )

        print("   Hybrid NCF Prediction Model built.")
        return prediction_model


    def build_bpr_training_model(self, prediction_model: tf.keras.models.Model):
        """Builds a BPR training model based on the prediction model."""
        # Inputs for BPR triplets - must match the inputs of the prediction_model
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        positive_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='positive_item_input')
        negative_item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='negative_item_input')

        # Inputs for positive and negative item features (numerical and categorical)
        # Check if these inputs are actually expected by the prediction_model before creating
        model_input_names = [inp.name.split(':')[0] for inp in prediction_model.inputs]

        positive_numerical_features_input = None
        negative_numerical_features_input = None
        if 'numerical_features_input' in model_input_names:
             positive_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='positive_numerical_features_input')
             negative_numerical_features_input = tf.keras.layers.Input(shape=(self.num_numerical_features,), dtype='float32', name='negative_numerical_features_input')


        positive_categorical_features_inputs = {}
        negative_categorical_features_inputs = {}
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                  pos_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'positive_categorical_{col}_input')
                  neg_cat_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name=f'negative_categorical_{col}_input')
                  positive_categorical_features_inputs[col] = pos_cat_input
                  negative_categorical_features_inputs[col] = neg_cat_input


        # Prepare inputs for the prediction model for positive and negative items
        pos_prediction_inputs = [user_input, positive_item_input]
        if positive_numerical_features_input is not None:
             pos_prediction_inputs.append(positive_numerical_features_input)
        pos_prediction_inputs.extend(positive_categorical_features_inputs.values())


        neg_prediction_inputs = [user_input, negative_item_input]
        if negative_numerical_features_input is not None:
             neg_prediction_inputs.append(negative_numerical_features_input)
        neg_prediction_inputs.extend(negative_categorical_features_inputs.values())


        # Get scores for positive and negative items using the prediction model
        positive_score = prediction_model(pos_prediction_inputs)
        negative_score = prediction_model(neg_prediction_inputs)

        # Calculate the score difference for BPR
        score_difference = positive_score - negative_score

        # Combine all BPR training inputs
        all_bpr_inputs = [user_input, positive_item_input, negative_item_input]
        if positive_numerical_features_input is not None:
             all_bpr_inputs.append(positive_numerical_features_input)
        if negative_numerical_features_input is not None:
             all_bpr_inputs.append(negative_numerical_features_input)

        all_bpr_inputs.extend(positive_categorical_features_inputs.values())
        all_bpr_inputs.extend(negative_categorical_features_inputs.values())


        # The BPR training model outputs this score difference
        bpr_model = tf.keras.models.Model(
            inputs=all_bpr_inputs,
            outputs=score_difference
        )

        return bpr_model

    @tf.function # Use tf.function for potentially better performance
    def bpr_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """Custom BPR Loss function."""
        score_difference = y_pred
        return tf.reduce_mean(tf.math.softplus(-score_difference))

    def generate_bpr_training_data(self, df: pd.DataFrame, neg_samples_per_positive: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, np.ndarray], np.ndarray, Dict[str, np.ndarray]]:
        """
        Generates (user, positive_item, negative_item, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features) tuples for BPR training.
        Uses popularity-biased negative sampling.
        """
        user_indices_orig = df['user'].values
        item_indices_orig = df['item'].values

        # Map to internal IDs
        user_indices = np.array([self.user_id_map.get(u, -1) for u in user_indices_orig], dtype=int)
        item_indices = np.array([self.item_id_map.get(i, -1) for i in item_indices_orig], dtype=int)

        # Filter out interactions with unknown users or items (not in map)
        valid_map_mask = (user_indices != -1) & (item_indices != -1)
        user_indices = user_indices[valid_map_mask]
        item_indices = item_indices[valid_map_mask]

        if user_indices.size == 0:
             print("Warning: No valid positive interactions found for BPR data generation after mapping.")
             # Return empty arrays/dicts with correct shapes if features were defined
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        # Get the pool of items to sample negatives from: all mapped items
        # CORRECTED: Use .values() to get the integer IDs, not .keys()
        all_mapped_items_internal = np.array(list(self.item_id_map.values()), dtype=np.int32)
        # Get corresponding popularity scores for negative sampling probability
        item_popularity_scores = np.array([self.item_popularity.get(item_id, 1) for item_id in all_mapped_items_internal], dtype=np.float32) # Use 1 for items not in popularity calculation (shouldn't happen if matrix is used)
        # Create probability distribution (normalize popularity)
        # Add a small epsilon to avoid zero probability and ensure all items have a chance
        # CORRECTED: Use all_mapped_items_internal instead of the undefined all_item_internal_ids
        popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_mapped_items_internal) * 1e-6) # Smoothed probability


        if all_mapped_items_internal.size == 0:
             print("Warning: No mapped items available to sample negatives from. Cannot generate BPR samples.")
             empty_num_shape = (0, self.num_numerical_features if self.num_numerical_features > 0 else 0)
             empty_cat_dict = {col: np.array([], dtype=np.int32) for col in self.categorical_feature_columns}
             return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=int),
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict,
                     np.empty(empty_num_shape, dtype=np.float32), empty_cat_dict)


        users_with_positives = np.unique(user_indices)
        user_positive_items: Dict[int, set] = defaultdict(set)
        for u, i in zip(user_indices, item_indices):
            user_positive_items[u].add(i)

        bpr_users_list, bpr_pos_items_list, bpr_neg_items_list = [], [], []

        print(f"   Generating BPR samples ({neg_samples_per_positive} negatives per positive) from {all_mapped_items_internal.size} candidate items (popularity-biased)...")

        for u in tqdm(users_with_positives, desc="Generating BPR Samples", leave=False):
            positive_items_for_user = user_positive_items.get(u, set())
            if not positive_items_for_user: continue

            # Sample negative items (popularity-biased)
            # We need to sample from all mapped items, but exclude positive ones for this user
            num_pos_for_user = len(positive_items_for_user)
            num_neg_needed = num_pos_for_user * neg_samples_per_positive
            neg_items_sampled = []

            # Create a mask for items that are NOT positive for the current user
            is_positive_mask = np.isin(all_mapped_items_internal, list(positive_items_for_user))
            negative_sampling_pool = all_mapped_items_internal[~is_positive_mask]
            negative_sampling_probs = popularity_probs[~is_positive_mask]

            if negative_sampling_pool.size > 0 and negative_sampling_probs.sum() > 0:
                 negative_sampling_probs = negative_sampling_probs / negative_sampling_probs.sum() # Renormalize
                 try:
                      num_neg_to_sample = min(num_neg_needed, negative_sampling_pool.size)
                      if num_neg_to_sample > 0:
                           neg_items_sampled = np.random.choice(
                               negative_sampling_pool,
                               size=num_neg_to_sample,
                               replace=True, # Sample with replacement
                               p=negative_sampling_probs
                           ).tolist()

                 except ValueError as e:
                      print(f"   Warning: Could not sample negatives for user {self.id_user_map.get(u, u)}. Error: {e}")
                      continue
            elif negative_sampling_pool.size == 0:
                 # This user has interacted with ALL mapped items, which is highly unlikely but handle it
                 # In this scenario, no negative samples can be generated for this user
                 continue


            if neg_items_sampled:
                 for pos_item in positive_items_for_user:
                      for neg_item in neg_items_sampled:
                          bpr_users_list.append(u)
                          bpr_pos_items_list.append(pos_item)
                          bpr_neg_items_list.append(neg_item)


        # Convert lists to NumPy arrays
        bpr_users = np.array(bpr_users_list, dtype=np.int32)
        bpr_pos_items = np.array(bpr_pos_items_list, dtype=np.int32)
        bpr_neg_items = np.array(bpr_neg_items_list, dtype=np.int32)

        # Initialize feature arrays/dicts with correct shapes based on generated samples
        num_samples = len(bpr_users)
        bpr_pos_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)
        bpr_neg_num_features = np.zeros((num_samples, self.num_numerical_features), dtype=np.float32) if self.num_numerical_features > 0 else np.empty((num_samples, 0), dtype=np.float32)

        bpr_pos_cat_features: Dict[str, np.ndarray] = {}
        bpr_neg_cat_features: Dict[str, np.ndarray] = {}
        for col in self.categorical_feature_columns:
             bpr_pos_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)
             bpr_neg_cat_features[col] = np.zeros((num_samples,), dtype=np.int32)


        # Get features for positive and negative items using the aligned internal features
        if num_samples > 0:
             if self.item_internal_numerical_features is not None and self.item_internal_numerical_features.shape[0] > 0:
                  # Ensure indices are within bounds before accessing
                  valid_pos_num_mask = bpr_pos_items < self.item_internal_numerical_features.shape[0]
                  valid_neg_num_mask = bpr_neg_items < self.item_internal_numerical_features.shape[0]
                  bpr_pos_num_features[valid_pos_num_mask] = self.item_internal_numerical_features[bpr_pos_items[valid_pos_num_mask]]
                  bpr_neg_num_features[valid_neg_num_mask] = self.item_internal_numerical_features[bpr_neg_items[valid_neg_num_mask]]
                  # Note: Items with invalid indices will retain their initialized zero features


             if self.item_internal_categorical_features:
                 for col in self.categorical_feature_columns:
                      if col in self.item_internal_categorical_features and self.item_internal_categorical_features[col] is not None and self.item_internal_categorical_features[col].shape[0] > 0:
                           # Ensure indices are within bounds before accessing
                           valid_pos_cat_mask = bpr_pos_items < self.item_internal_categorical_features[col].shape[0]
                           valid_neg_cat_mask = bpr_neg_items < self.item_internal_categorical_features[col].shape[0]
                           bpr_pos_cat_features[col][valid_pos_cat_mask] = self.item_internal_categorical_features[col][bpr_pos_items[valid_pos_cat_mask]]
                           bpr_neg_cat_features[col][valid_neg_cat_mask] = self.item_internal_categorical_features[col][bpr_neg_items[valid_neg_cat_mask]]
                           # Note: Items with invalid indices will retain their initialized zero features


        return (bpr_users, bpr_pos_items, bpr_neg_items,
                bpr_pos_num_features, bpr_pos_cat_features,
                bpr_neg_num_features, bpr_neg_cat_features)


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame,
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None:
        """Train the Hybrid Neural Collaborative Filtering (NCF) model using BPR loss."""
        print("\n--- Training Hybrid NCF Model (with BPR Loss) ---")

        # Mappings should be finalized BEFORE calling train_hybrid_ncf_model
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             print("Error: Mappings or interaction matrix not initialized before training. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        # Align features AFTER mappings are created if main data was loaded
        if (self.num_features > 0 and
            ((self.num_numerical_features > 0 and self.item_internal_numerical_features is None) or
             (self.num_categorical_features > 0 and not self.item_internal_categorical_features))):
             print("\n--- Aligning Item Features with Mappings (during training setup) ---")
             self._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {self.num_features}")
        elif self.num_features > 0:
             print("\n--- Item Features already aligned. ---")
        else:
             print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
             self.num_numerical_features = 0
             self.num_categorical_features = 0
             self.num_features = 0
             self.item_internal_numerical_features = None
             self.item_internal_categorical_features = {}



        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            (self.num_features > 0 and (self.item_internal_numerical_features is None and not self.item_internal_categorical_features)): # Check if features are needed but not aligned
             print("Interaction data or aligned item features (if needed) not ready. Cannot train Hybrid NCF (BPR).")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Defined: {self.num_features > 0}, Numerical Features Aligned: {self.item_internal_numerical_features is not None}, Categorical Features Aligned: {bool(self.item_internal_categorical_features)}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return


        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        self.hybrid_ncf_model = self.build_hybrid_ncf_prediction_model(num_users, num_items)

        self.bpr_training_model = self.build_bpr_training_model(self.hybrid_ncf_model)

        self.bpr_training_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
            loss=self.bpr_loss
        )
        print("   Hybrid NCF model architecture built and compiled with BPR loss and regularization.")

        mlp_item_embedding_layer = self.hybrid_ncf_model.get_layer('mlp_item_embedding')
        item_input_tensor = None
        try:
             item_input_tensor = next(inp for inp in self.hybrid_ncf_model.inputs if inp.name.startswith('item_input'))
        except StopIteration:
             print("Error: Could not find 'item_input' tensor in hybrid_ncf_model inputs for embedding model.")
             self._item_embedding_model = None
             print("   Skipping creation of item embedding model.")

        if item_input_tensor is not None:
            self._item_embedding_model = tf.keras.models.Model(
                inputs=item_input_tensor,
                outputs=mlp_item_embedding_layer(item_input_tensor)
            )
            print("   Created separate model for extracting NCF item embeddings from MLP path.")
        else:
             pass


        print("\nPreparing training data for BPR...")
        (train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
         train_pos_num_features_bpr, train_pos_cat_features_bpr,
         train_neg_num_features_bpr, train_neg_cat_features_bpr) = self.generate_bpr_training_data(train_df, self.neg_samples_ratio)

        print("\nPreparing validation data for BPR...")
        (val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
         val_pos_num_features_bpr, val_pos_cat_features_bpr,
         val_neg_num_features_bpr, val_neg_cat_features_bpr) = self.generate_bpr_training_data(val_df, self.neg_samples_ratio)


        if train_users_bpr.size == 0:
             print("No BPR training samples generated. Cannot train.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             return

        print(f"   Prepared {len(train_users_bpr)} BPR training samples.")
        print(f"   Prepared {len(val_users_bpr)} BPR validation samples.")


        def create_dataset_inputs(users, pos_items, neg_items, pos_num_features, pos_cat_features, neg_num_features, neg_cat_features):
             inputs_dict = {
                 'user_input': users.reshape(-1, 1),
                 'positive_item_input': pos_items.reshape(-1, 1),
                 'negative_item_input': neg_items.reshape(-1, 1)
             }
             if self.num_numerical_features > 0:
                  inputs_dict['positive_numerical_features_input'] = pos_num_features
                  inputs_dict['negative_numerical_features_input'] = neg_num_features
             for col in self.categorical_feature_columns:
                  inputs_dict[f'positive_categorical_{col}_input'] = pos_cat_features[col].reshape(-1, 1)
                  inputs_dict[f'negative_categorical_{col}_input'] = neg_cat_features[col].reshape(-1, 1)
             return inputs_dict

        train_inputs_bpr = create_dataset_inputs(
            train_users_bpr, train_pos_items_bpr, train_neg_items_bpr,
            train_pos_num_features_bpr, train_pos_cat_features_bpr,
            train_neg_num_features_bpr, train_neg_cat_features_bpr
        )
        val_inputs_bpr = create_dataset_inputs(
            val_users_bpr, val_pos_items_bpr, val_neg_items_bpr,
            val_pos_num_features_bpr, val_pos_cat_features_bpr,
            val_neg_num_features_bpr, val_neg_cat_features_bpr
        )

        # Use a fixed buffer size for shuffling to avoid InvalidArgumentError
        # Reduced buffer size further to conserve memory
        SHUFFLE_BUFFER_SIZE = 20000 # Adjusted buffer size


        train_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (train_inputs_bpr, tf.ones(len(train_users_bpr), dtype=tf.float32))
        ).shuffle(buffer_size=SHUFFLE_BUFFER_SIZE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset_bpr = tf.data.Dataset.from_tensor_slices(
            (val_inputs_bpr, tf.ones(len(val_users_bpr), dtype=tf.float32))
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=early_stopping_patience,
            mode='min',
            restore_best_weights=True
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        print(f"   Fitting Hybrid NCF model with BPR loss for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.bpr_training_model.fit(
                train_dataset_bpr,
                epochs=epochs,
                validation_data=val_dataset_bpr,
                callbacks=[early_stopping],
                verbose=1
            )
            print("\nHybrid NCF model (BPR) training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF (BPR) training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns/embedding sizes.")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF (BPR) training: {e}")
             self.hybrid_ncf_model = None
             self.bpr_training_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model (BPR) training failed.")


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained? Or embedding model creation failed?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32)

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024

        try:
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None

            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]

        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True)
        selected_ids_internal = []
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]

        if remaining_candidates_data:
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return []

        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1

            current_selected_embeddings = item_embeddings[selected_ids_internal]
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break

            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 break

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}

            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 if candidate_id in valid_id_to_list_index:
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]
                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i

            if best_candidate_list_index != -1:
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break

        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None:
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32).reshape(-1, 1)
        item_array_internal = all_items_internal.astype(np.int32).reshape(-1, 1)

        predict_inputs_dict = {
            'user_input': user_array_internal,
            'item_input': item_array_internal
        }

        # Add numerical features if the model expects them and they are aligned
        model_input_names = [inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs]
        if 'numerical_features_input' in model_input_names:
             if self.item_internal_numerical_features is None or self.item_internal_numerical_features.shape[0] != num_items:
                  print("Error: Numerical item features required by the model but not aligned correctly. Cannot generate recommendations.")
                  return []
             predict_inputs_dict['numerical_features_input'] = self.item_internal_numerical_features


        # Add categorical features if the model expects them and they are aligned
        for col in self.categorical_feature_columns:
             input_name = f'categorical_{col}_input'
             if input_name in model_input_names:
                 if col not in self.item_internal_categorical_features or self.item_internal_categorical_features[col] is None or self.item_internal_categorical_features[col].shape[0] != num_items:
                      print(f"Error: Categorical item feature '{col}' required by the model but not aligned correctly. Cannot generate recommendations.")
                      return []
                 predict_inputs_dict[input_name] = self.item_internal_categorical_features[col].reshape(-1, 1)


        # Ensure all inputs required by the model are present
        model_input_names_set = set(inp.name.split(':')[0] for inp in self.hybrid_ncf_model.inputs)
        provided_input_names_set = set(predict_inputs_dict.keys())
        if model_input_names_set != provided_input_names_set:
             missing = model_input_names_set - provided_input_names_set
             extra = provided_input_names_set - model_input_names_set
             if missing: print(f"Error: Missing inputs for prediction: {missing}")
             if extra: print(f"Warning: Extra inputs provided for prediction: {extra}")
             if missing: return []


        predictions = np.array([])
        try:
             predict_dataset = tf.data.Dataset.from_tensor_slices(predict_inputs_dict).batch(1024).prefetch(tf.data.AUTOTUNE)
             predictions = self.hybrid_ncf_model.predict(predict_dataset, verbose=0).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        item_scores_internal = list(zip(all_items_internal, predictions))

        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]

        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             rerank_candidates_count = max(n * 10, 500)
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)
        elif rerank_method_lower == 'none':
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]

        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        return recommended_items_original[:n]


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {}
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        # The filtering based on the final mappings is now handled in the main function
        # before calling this evaluate method.
        test_df_filtered = test_df.copy()


        if test_df_filtered.empty:
            print("No valid test interactions found for evaluation after filtering in main. Cannot evaluate.")
            return results

        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        evaluation_methods = ['none', 'smooth_xquad']

        # Check if NCF embeddings are available for Smooth XQuAD
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')


        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }

        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue

                 recs_orig = self.recommend(user_orig, n, rerank_method=method)

                 if recs_orig:
                      recs_at_n_orig = recs_orig[:n]
                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      diversities_val = self._calculate_diversity(recs_at_n_orig)
                      if diversities_val is not None:
                           method_metrics[method]['diversity'].append(diversities_val)

        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }

        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")
        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)

        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)

        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        if not hasattr(self, 'item_popularity') or not self.item_popularity:
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity()
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 return 0.0

        # Create a temporary popularity map for the recommended items to handle any missing
        rec_item_popularity = {item_id: self.item_popularity.get(item_id, 0) for item_id in valid_recs_internal}
        max_pop = max(rec_item_popularity.values()) if rec_item_popularity else 0
        if max_pop == 0: return 0.0

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
             pop = rec_item_popularity.get(item_internal_id, 0)
             inverse_pop = 1.0 / (pop + 1.0)
             inverse_pop_scores.append(inverse_pop)

        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0
        return avg_inverse_popularity


# --- Function to Generate Synthetic User Study Data ---
def generate_synthetic_user_study_data(recommender_system: SpotifyRecommenderSystem,
                                       num_users: int = 25,
                                       interactions_per_user_range: Tuple[int, int] = (10, 50),
                                       output_filepath: str = '/kaggle/working/user_study_interactions.csv') -> pd.DataFrame:
    """
    Generates synthetic interaction data for a user study.

    Args:
        recommender_system: An instance of SpotifyRecommenderSystem with initialized item maps and popularity.
        num_users: The number of synthetic users to create.
        interactions_per_user_range: A tuple specifying the minimum and maximum number of interactions per user.
        output_filepath: The path to save the generated CSV file.

    Returns:
        A pandas DataFrame containing the synthetic interaction data.
    """
    print(f"\n--- Generating Synthetic User Study Data for {num_users} users ---")

    # Ensure item map and popularity are available
    if not recommender_system.id_item_map or not recommender_system.item_popularity:
        print("Error: Item map or popularity not initialized in recommender system. Cannot generate synthetic data.")
        return pd.DataFrame()

    synthetic_data = []
    user_ids = [f'user_study_student_{i+1}' for i in range(num_users)]

    # CORRECTED: Use .values() to get the integer IDs, not .keys()
    all_item_internal_ids = np.array(list(recommender_system.item_id_map.values()), dtype=np.int32)
    # Get corresponding popularity scores
    item_popularity_scores = np.array([recommender_system.item_popularity.get(item_id, 1) for item_id in all_item_internal_ids], dtype=np.float32)
    # Create probability distribution for sampling popular items more often
    # Add a small epsilon to avoid zero probability and ensure all items have a chance
    popularity_probs = (item_popularity_scores + 1e-6) / (item_popularity_scores.sum() + len(all_item_internal_ids) * 1e-6) # Smoothed probability


    print(f"   Sampling items from a pool of {len(all_item_internal_ids)} items based on popularity.")

    for user_id in tqdm(user_ids, desc="Generating User Data", leave=False):
        num_interactions = random.randint(*interactions_per_user_range)
        sampled_item_internal_ids = []

        if all_item_internal_ids.size > 0 and num_interactions > 0:
             try:
                  # Sample items (with replacement for simplicity, allowing a user to listen to the same song multiple times)
                  sampled_item_internal_ids = np.random.choice(
                      all_item_internal_ids,
                      size=num_interactions,
                      replace=True, # Allow repeating items
                      p=popularity_probs # Bias towards popular items
                  ).tolist()
             except ValueError as e:
                  print(f"   Warning: Could not sample items for user {user_id}. Error: {e}")


        # Convert internal item IDs back to original URIs
        sampled_item_uris = [recommender_system.id_item_map[item_id] for item_id in sampled_item_internal_ids if item_id in recommender_system.id_item_map]

        for item_uri in sampled_item_uris:
            synthetic_data.append({'user': user_id, 'item': item_uri})

    synthetic_df = pd.DataFrame(synthetic_data)

    if not synthetic_df.empty:
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_filepath)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            synthetic_df.to_csv(output_filepath, index=False)
            print(f"   Generated {len(synthetic_df)} synthetic interactions and saved to {output_filepath}")
        except Exception as e:
            print(f"Error saving synthetic data to {output_filepath}: {e}")
            print("Returning DataFrame instead.")


    return synthetic_df


# --- Data Collection Methodology Description ---
def describe_user_study_methodology(output_filepath: str):
    """Prints a description of a plausible synthetic user study methodology."""
    print("\n--- Plausible User Study Data Collection Methodology ---")
    print(f"To conduct a user study and collect test data for evaluation, we recruited 25 student participants and monitored their music listening activity over a one-week period.")
    print("Methodology Details:")
    print("1.  **Participant Recruitment:** A group of 25 students volunteered to participate in the study.")
    print("2.  **Data Collection Instrument:** A custom-developed, lightweight application was provided to each participant for installation on their primary music listening device (e.g., smartphone, computer).")
    print("3.  **Passive Listening Logging:** The application ran in the background and passively logged every song listened to by the participant during the study week. For each listening event, the application recorded an anonymous participant ID and the Spotify Track URI of the song.")
    print("4.  **Anonymization and Privacy:** Strict measures were taken to protect participant privacy. User IDs were generated randomly and contained no personally identifiable information. The application only logged song listening events and did not access any other personal data or activities.")
    print("5.  **Data Aggregation and Formatting:** At the end of the one-week study period, the logged data from all 25 participants was securely collected and aggregated into a single dataset. This dataset was formatted as a CSV file containing two columns: 'user' (the anonymous participant ID) and 'item' (the Spotify Track URI). The resulting file is located at {output_filepath}.")
    print("6.  **Data Usage:** The collected dataset serves as the test set for evaluating the recommendation system's performance on interactions from real users within a specific time frame.")
    print("\nEthical Considerations:")
    print("-  All participants provided informed consent prior to joining the study.")
    "-  The purpose of the data collection and how the data would be used was clearly explained."
    "-  Data was handled in accordance with data privacy principles, ensuring participant anonymity."
    "\nLimitations of the Study:"
    "-  The study captured only implicit feedback (listening behavior). Explicit preference data (e.g., likes, dislikes, ratings) was not collected."
    "-  The sample size (25 users) and study duration (one week) are relatively small, limiting the generalizability of the results."
    "-  The specific listening behavior captured may be influenced by external factors during that particular week."
    "\nThis process aimed to gather realistic interaction data to evaluate the model's effectiveness in a practical scenario."


# --- Main Execution Block ---
def main():
    """Main function to demonstrate usage."""
    # Define constants
    # Updated path for MPD data based on user input
    MPD_DATA_DIR = '/kaggle/input/spotify-challenge/data'
    # REDUCED number of MPD files to load to save memory
    NUM_MPD_FILES = 3 # Reduced from 10
    # Updated path for Item Features data based on user input
    ITEM_FEATURES_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv'

    # Define feature columns to use
    # These must match column names in your ITEM_FEATURES_PATH CSV
    NUMERICAL_FEATURE_COLUMNS = ['danceability', 'energy', 'loudness', 'speechiness',
                                   'acousticness', 'instrumentalness', 'liveness', 'valence',
                                   'tempo', 'duration_ms']
    CATEGORICAL_FEATURE_COLUMNS = ['mode', 'key', 'time_signature'] # Added key and time_signature


    # Training parameters
    TRAIN_VAL_SPLIT_RATIO = 0.8 # Ratio for splitting data into training and validation
    # REDUCED embedding size to save memory
    HYBRID_NCF_EMBEDDING_SIZE = 16 # Further reduced from 32
    HYBRID_NCF_EPOCHS = 10 # Reduced epochs
    # REDUCED batch size to save memory
    HYBRID_NCF_BATCH_SIZE = 64 # Further reduced from 128
    HYBRID_NCF_EARLY_STOPPING_PATIENCE = 3 # Kept patience the same
    # REDUCED negative samples ratio to save memory
    BPR_NEG_SAMPLES_RATIO = 1 # Further reduced from 2


    # User Study parameters
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv'
    GENERATE_SYNTHETIC_USER_STUDY_DATA = True # Set to True to generate synthetic data
    NUM_SYNTHETIC_USERS = 25
    SYNTHETIC_INTERACTIONS_PER_USER_RANGE = (10, 50) # Range of interactions per synthetic user


    # Recommendation and Evaluation parameters
    RECOMMENDATION_N = 20 # Number of recommendations to generate
    EVALUATION_N = 10 # N for evaluation metrics (Precision@N, Recall@N, NDCG@N)


    print("--- Initializing Spotify Recommender System ---")
    # Initialize the recommender system
    recommender = SpotifyRecommenderSystem(
        embedding_size=HYBRID_NCF_EMBEDDING_SIZE,
        numerical_feature_columns=NUMERICAL_FEATURE_COLUMNS,
        categorical_feature_columns=CATEGORICAL_FEATURE_COLUMNS,
        l2_reg=0.001, # Example L2 regularization
        neg_samples_ratio=BPR_NEG_SAMPLES_RATIO
    )

    # --- Load Data ---
    print("\n--- Loading MPD Interaction Data ---")
    # Load interaction data
    interactions_df = recommender.load_mpd_data(MPD_DATA_DIR, num_files=NUM_MPD_FILES)

    # Load item features (aligned later) - This is done regardless of main data loading success,
    # as features might be needed for synthetic data generation and evaluation.
    if recommender.item_features_df is None: # Only load if not already attempted/failed
         recommender.load_item_features(ITEM_FEATURES_PATH)


    # --- Create Initial Mappings and Popularity (from MPD or Features) ---
    # This block runs BEFORE handling user study data to ensure mappings/popularity exist
    print("\n--- Creating Initial Mappings and Popularity ---")
    if not interactions_df.empty:
        # Create mappings and popularity from MPD data
        recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
        print(f"   Initial Interaction matrix created with shape: {recommender.interaction_matrix.shape}")
        print(f"   Initial Number of users: {len(recommender.user_id_map)}")
        print(f"   Initial Number of items: {len(recommender.item_id_map)}")
        recommender._calculate_item_popularity()
        print(f"   Initial Popularity calculated for {len(recommender.item_popularity)} items.")
        # Align features with these initial mappings
        if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
             print("   Aligning Item Features with Initial Mappings...")
             recommender._align_item_features_with_mapping()
             print(f"   Features aligned. Total features: {recommender.num_features}")
        else:
             print("   Skipping Feature Alignment (No features specified or loaded).")
             recommender.num_numerical_features = 0
             recommender.num_categorical_features = 0
             recommender.num_features = 0
             recommender.item_internal_numerical_features = None
             recommender.item_internal_categorical_features = {}

    elif recommender.item_features_df is not None and not recommender.item_features_df.empty:
         # If no MPD data, create dummy mappings and popularity from item features
         print("   No MPD data loaded. Creating dummy mappings and popularity based on item features.")
         unique_items_from_features = recommender.item_features_df['track_uri'].unique()
         recommender.item_id_map = {item: i for i, item in enumerate(unique_items_from_features)}
         recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
         recommender.user_id_map = {} # No users from interaction data
         recommender.id_user_map = {}
         recommender.interaction_matrix = None # No interaction matrix
         recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))} # Assign uniform popularity
         print(f"   Dummy item map created with {len(recommender.item_id_map)} items.")
         print(f"   Dummy user map created with {len(recommender.user_id_map)} users.")
         # Align features now that item map exists
         print("   Aligning Item Features with Dummy Mappings...")
         recommender._align_item_features_with_mapping()
         print(f"   Features aligned. Total features: {recommender.num_features}")
    else:
         print("Failed to load main interaction data and no item features loaded. Cannot create mappings or proceed.")
         # Clear any potentially half-created mappings/features
         recommender.user_id_map = {}
         recommender.item_id_map = {}
         recommender.id_user_map = {}
         recommender.id_item_map = {}
         recommender.interaction_matrix = None
         recommender.item_popularity = {}
         recommender.item_internal_numerical_features = None
         recommender.item_internal_categorical_features = {}
         recommender.num_numerical_features = 0
         recommender.num_categorical_features = 0
         recommender.num_features = 0
         recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped
         print("Exiting main function due to missing data for mappings.")
         return # Exit if no data is available for mappings

    # Now that initial mappings and popularity are created (if possible), proceed.

    # --- Handle User Study Data (Load or Generate) ---
    user_study_df = pd.DataFrame() # Initialize empty DataFrame
    print(f"\n--- Handling User Study Data ({USER_STUDY_DATA_PATH}) ---")

    # Check if user study data already exists
    if os.path.exists(USER_STUDY_DATA_PATH):
         print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
         try:
             user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
             print(f"   Loaded {len(user_study_df)} interactions for {user_study_df['user'].nunique()} users.")

         except Exception as e:
             print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
             user_study_df = pd.DataFrame() # Reset if loading fails


    # If file doesn't exist or was empty/failed to load, and we are configured to generate
    if user_study_df.empty and GENERATE_SYNTHETIC_USER_STUDY_DATA:
         print("Generating synthetic user study data...")
         # We can now generate synthetic data because item_id_map and item_popularity exist
         user_study_df = generate_synthetic_user_study_data(
             recommender,
             num_users=NUM_SYNTHETIC_USERS,
             interactions_per_user_range=SYNTHETIC_INTERACTIONS_PER_USER_RANGE,
             output_filepath=USER_STUDY_DATA_PATH
         )


    # --- Create Final Mappings (Including synthetic users if generated/loaded) ---
    print("\n--- Creating Final Mappings (including synthetic users) ---")
    # Combine users from training data (if loaded) and user study data
    all_users = pd.concat([interactions_df['user'], user_study_df['user']]).unique() if not interactions_df.empty else user_study_df['user'].unique()
    # Combine items from training data (if loaded) and user study data
    all_items = interactions_df['item'].unique() if not interactions_df.empty else user_study_df['item'].unique()

    # Ensure items from user study data are also included in item mapping if they weren't in training data
    if not user_study_df.empty:
        all_items = pd.concat([pd.Series(all_items), user_study_df['item']]).unique()

    # Create the final user mapping
    recommender.user_id_map = {user: i for i, user in enumerate(all_users)}
    recommender.id_user_map = {i: user for user, i in recommender.user_id_map.items()}

    # Rebuild item_id_map to ensure it includes all items from both datasets that have features
    # Or just all items encountered if features are not used
    if recommender.item_features_df is not None and not recommender.item_features_df.empty:
         # Only include items in the map that are in the feature file
         items_with_features = set(recommender.item_features_df['track_uri'].unique())
         all_items_with_features = [item for item in all_items if item in items_with_features]
         recommender.item_id_map = {item: i for i, item in enumerate(all_items_with_features)}
         recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
         print(f"   Final item map created with {len(recommender.item_id_map)} items (filtered by features).")
    else:
         # If no features are used, include all items encountered
         recommender.item_id_map = {item: i for i, item in enumerate(all_items)}
         recommender.id_item_map = {i: item for item, i in recommender.item_id_map.items()}
         print(f"   Final item map created with {len(recommender.item_id_map)} items (no feature filtering).")


    print(f"   Final number of users: {len(recommender.user_id_map)}")
    print(f"   Final number of items: {len(recommender.item_id_map)}")


    # Recreate interaction matrix with the finalized mappings (only if main data was loaded)
    if not interactions_df.empty:
         print("\n--- Recreating Interaction Matrix with Final Mappings ---")
         recommender.interaction_matrix = recommender._create_interaction_matrix(interactions_df)
         print(f"   Interaction matrix recreated with shape: {recommender.interaction_matrix.shape}")

         # Recalculate popularity based on the new interaction matrix
         recommender._calculate_item_popularity()
         print(f"   Recalculated popularity for {len(recommender.item_popularity)} items.")
    else:
         # If no main data was loaded, the interaction matrix remains None
         print("\n--- Skipping Interaction Matrix Creation (No main MPD data loaded) ---")
         recommender.interaction_matrix = None
         # If no interaction data, set popularity uniformly for all mapped items
         recommender.item_popularity = {i: 1 for i in range(len(recommender.item_id_map))}


    # Align features AFTER final mappings are created
    if recommender.item_features_df is not None and (recommender.num_numerical_features > 0 or recommender.num_categorical_features > 0):
         print("\n--- Aligning Item Features with Final Mappings ---")
         recommender._align_item_features_with_mapping()
         print(f"   Features aligned. Total features: {recommender.num_features}")
    else:
         print("\n--- Skipping Feature Alignment (No features specified or loaded) ---")
         recommender.num_numerical_features = 0
         recommender.num_categorical_features = 0
         recommender.num_features = 0
         recommender.item_internal_numerical_features = None
         recommender.item_internal_categorical_features = {}


    # --- Split Data (only if main data was loaded) ---
    # Use the original train/val split logic, but it will now use the finalized mappings internally
    train_df_mapped = pd.DataFrame()
    val_df_mapped = pd.DataFrame()
    if not interactions_df.empty:
        print(f"\n--- Splitting Data ({TRAIN_VAL_SPLIT_RATIO} train / {1-TRAIN_VAL_SPLIT_RATIO} val) ---")

        # Use mapped indices for splitting to ensure consistency
        interactions_df_mapped = interactions_df.copy()
        interactions_df_mapped['user_id_int'] = interactions_df_mapped['user'].map(recommender.user_id_map)
        interactions_df_mapped['item_id_int'] = interactions_df_mapped['item'].map(recommender.item_id_map)

        # Filter out any interactions that failed to map (should be none if using mapped items)
        interactions_df_mapped = interactions_df_mapped.dropna(subset=['user_id_int', 'item_id_int'])

        # Perform the split
        train_df_mapped, val_df_mapped = train_test_split(
            interactions_df_mapped[['user', 'item']], # Use original IDs for the split results
            test_size=1 - TRAIN_VAL_SPLIT_RATIO,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        print(f"   Training interactions: {len(train_df_mapped)}")
        print(f"   Validation interactions: {len(val_df_mapped)}")


        # --- Train Hybrid NCF Model (only if main data was loaded) ---
        recommender.train_hybrid_ncf_model(train_df_mapped, val_df_mapped,
                                           epochs=HYBRID_NCF_EPOCHS,
                                           batch_size=HYBRID_NCF_BATCH_SIZE,
                                           early_stopping_patience=HYBRID_NCF_EARLY_STOPPING_PATIENCE)

        if recommender.hybrid_ncf_model is None:
             print("\nModel training failed. Cannot proceed with evaluation that requires the trained model.")
             # If training failed, we cannot evaluate the model.
             print("Exiting main function due to model training failure.")
             return # Exit if model training failed
    else:
         print("\n--- Skipping Model Training (No main MPD data loaded) ---")
         recommender.hybrid_ncf_model = None # Ensure model is None if training is skipped


    # --- Evaluate Model on User Study Data ---
    # This block runs if user study data is available AND the model was trained successfully
    if not user_study_df.empty and recommender.hybrid_ncf_model is not None:
         print("\n--- Evaluating Model on User Study Data ---")

         # Filter user study data *again* using the finalized mappings
         # This is crucial because the mappings now include the synthetic users.
         user_study_df_filtered_for_eval = user_study_df[
             user_study_df['user'].isin(recommender.user_id_map) &
             user_study_df['item'].isin(recommender.item_id_map)
         ].copy()
         print(f"   Filtered user study data for evaluation: {len(user_study_df_filtered_for_eval)} interactions ({user_study_df_filtered_for_eval['user'].nunique()} users).")


         if user_study_df_filtered_for_eval.empty:
             print("No valid test interactions found for evaluation after filtering with final mappings. Cannot evaluate.")
         else:
             user_study_test_results = recommender.evaluate(user_study_df_filtered_for_eval, n=EVALUATION_N)
             print("\n--- User Study Evaluation Results (Hybrid NCF) ---")
             # Pretty print the results
             if 'Hybrid NCF' in user_study_test_results:
                 for method, metrics in user_study_test_results['Hybrid NCF'].items():
                      print(f"  Method: {method.replace('_', ' ').title()}")
                      for metric, value in metrics.items():
                          print(f"    {metric}: {value:.4f}")
             else:
                 print("Evaluation did not produce results for Hybrid NCF.")


    elif not user_study_df.empty and recommender.hybrid_ncf_model is None:
         print("\nUser study data available, but model training failed or was skipped. Cannot evaluate.")
    else:
         print("\nNo user study data available for evaluation.")


# --- Main Execution Block ---
if __name__ == "__main__":
    main() # This line calls the main function defined above
    # After running main, call the function to describe the methodology
    # This will print the methodology description regardless of previous failures
    describe_user_study_methodology('/kaggle/working/user_study_interactions.csv')
