In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/spotify-challenge/md5sums
/kaggle/input/spotify-challenge/README.md
/kaggle/input/spotify-challenge/license.txt
/kaggle/input/spotify-challenge/stats.txt
/kaggle/input/spotify-challenge/src/check.py
/kaggle/input/spotify-challenge/src/descriptions.py
/kaggle/input/spotify-challenge/src/stats.py
/kaggle/input/spotify-challenge/src/show.py
/kaggle/input/spotify-challenge/src/deeper_stats.py
/kaggle/input/spotify-challenge/src/print.py
/kaggle/input/spotify-challenge/data/mpd.slice.35000-35999.json
/kaggle/input/spotify-challenge/data/mpd.slice.98000-98999.json
/kaggle/input/spotify-challenge/data/mpd.slice.405000-405999.json
/kaggle/input/spotify-challenge/data/mpd.slice.601000-601999.json
/kaggle/input/spotify-challenge/data/mpd.slice.567000-567999.json
/kaggle/input/spotify-challenge/data/mpd.slice.421000-421999.json
/kaggle/input/spotify-challenge/data/mpd.slice.983000-983999.json
/kaggle/input/spotify-challenge/data/mpd.slice.434000-434999.json
/kaggle/input/spotify-cha

In [2]:
!pip install cornac

Collecting cornac
  Downloading cornac-2.3.3-cp311-cp311-manylinux1_x86_64.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.4/51.4 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting numpy>2.0.0 (from cornac)
  Downloading numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Collecting powerlaw (from cornac)
  Downloading powerlaw-1.5-py3-none-any.whl.metadata (9.3 kB)
Downloading cornac-2.3.3-cp311-cp311-manylinux1_x86_64.whl (31.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.5/31.5 MB[0m [31m59.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m99.7 MB/s[0m eta [36m0:00:00[0

In [9]:
import numpy as np
import pandas as pd
import os
import json
import time
from collections import defaultdict
import random
from typing import List, Dict, Tuple, Optional, Union, Any
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, ndcg_score
from sklearn.metrics.pairwise import cosine_similarity # Import for similarity calculation
from sklearn.model_selection import train_test_split
import scipy.sparse as sp
from tqdm import tqdm
import tensorflow as tf


from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as train_test_split_sklearn 
from sklearn.utils import resample 
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, accuracy_score



tf.get_logger().setLevel('ERROR') 

# Configure GPU Memory Growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print(f"Configured memory growth for {len(physical_devices)} GPU(s)")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}. Need to set it earlier if GPUs were already initialized.")
    except Exception as e:
        print(f"An unknown error occurred setting memory growth: {e}")
else:
    print("No GPU detected by TensorFlow.")


class SpotifyRecommenderSystem:
    """
    A Spotify recommendation system based on Hybrid NCF (Interactions + Features),
    supporting relevance-only and Smooth XQuAD reranking.
    Includes methods for data loading, hybrid model training, and evaluation.
    """

    def __init__(self,
                   embedding_size: int = 128,
                   mmr_lambda: float = 0.7, # Lambda for Smooth XQuAD trade-off
                   feature_columns: Optional[List[str]] = None, # List of explicit feature columns to use
                   l2_reg: float = 0.001, # L2 regularization strength
                   neg_samples_ratio: int = 8 
                   ):
        """
        Initialize the recommender system with configurable parameters.

        Args:
            embedding_size: Dimensionality of embeddings for NCF model
            mmr_lambda: Trade-off in Smooth XQuAD (0-1). Higher means more relevance, lower means more diversity.
            feature_columns: List of column names from the item features dataset to use as input.
            l2_reg: L2 regularization strength.
            neg_samples_ratio: Negative samples generated per positive interaction for training.
        """
        # Model parameters
        self.embedding_size = embedding_size
        self.mmr_lambda = mmr_lambda
        self.l2_reg = l2_reg # Added L2 regularization parameter
        self.neg_samples_ratio = neg_samples_ratio # Added negative samples ratio parameter

        # Data structures
        self.user_id_map: Dict[Any, int] = {}
        self.item_id_map: Dict[Any, int] = {}
        self.id_user_map: Dict[int, Any] = {}
        self.id_item_map: Dict[int, Any] = {}
        self.interaction_matrix: Optional[sp.csr_matrix] = None
        self.item_popularity: Dict[int, int] = {} # Still needed for the diversity metric

        # Feature handling
        self.item_features_df: Optional[pd.DataFrame] = None # DataFrame for item features
        self.feature_columns = feature_columns if feature_columns is not None else []
        self.feature_scaler: Optional[StandardScaler] = None
        self.num_features = len(self.feature_columns)
        self.item_internal_features: Optional[np.ndarray] = None # Scaled features as numpy array, aligned by internal item ID

        # Models
        self.hybrid_ncf_model: Optional[tf.keras.models.Model] = None # Hybrid NCF model
        self._item_embedding_model: Optional[tf.keras.models.Model] = None # To extract NCF item embeddings from the hybrid model
        self._ncf_item_embeddings_cache: Optional[np.ndarray] = None # Cache for NCF embeddings


    def load_mpd_data(self, input_dir: str, num_files: int = 5) -> pd.DataFrame:
        """Load interaction data from MPD JSON files."""
        data = []
        processed_files = 0
        found_files = []

        # Iterate through potential subdirectories
        for i in range(100):
             if processed_files >= num_files: break
             slice_dir = os.path.join(input_dir, f'{i:03d}')
             json_file_subdir = os.path.join(slice_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_subdir) and os.path.getsize(json_file_subdir) > 0:
                 found_files.append(json_file_subdir)
                 processed_files += 1
                 continue

             # Check flat structure as fallback
             json_file_flat = os.path.join(input_dir, f'mpd.slice.{i*1000:05d}-{(i+1)*1000-1:05d}.json')
             if os.path.exists(json_file_flat) and os.path.getsize(json_file_flat) > 0:
                 found_files.append(json_file_flat)
                 processed_files += 1
                 continue


        if not found_files:
            print(f"No MPD slice files found in {input_dir} or its numbered subdirectories with expected naming.")
            print("Please verify the 'input_dir' path and file structure.")
            return pd.DataFrame()

        for json_file in tqdm(found_files, desc="Loading MPD slices"):
             try:
                 with open(json_file, 'r') as f:
                     raw = json.load(f)
                     for playlist in raw.get('playlists', []):
                         playlist_id = playlist.get('pid')
                         if playlist_id is not None:
                             for track in playlist.get('tracks', []):
                                 track_uri = track.get('track_uri')
                                 if track_uri:
                                     data.append({
                                         'user': playlist_id,
                                         'item': track_uri, # Use track_uri for linking
                                         'track_name': track.get('track_name', ''),
                                         'artist_name': track.get('artist_name', '')
                                     })
             except Exception as e:
                 print(f"Error processing file {json_file}: {e}")

        return pd.DataFrame(data)

    def load_item_features(self, features_filepath: str) -> None:
        """Load and preprocess item features from a specific CSV file path."""
        print("\n--- Loading Item Features ---")
        # The input is now expected to be the full file path, not just the directory
        full_path = features_filepath # Use the path directly

        if not os.path.exists(full_path):
            print(f"Error: Item features file not found at {full_path}. Skipping feature loading.")
            self.item_features_df = None
            self.item_internal_features = None # Ensure internal features are also reset
            self.num_features = 0 # Reset feature count
            return

        try:
            features_df = pd.read_csv(full_path)
            print(f"Loaded {len(features_df)} items with features from {full_path}.")

            # Drop duplicates based on track_id if any
            if 'track_id' in features_df.columns:
                 features_df = features_df.drop_duplicates(subset=['track_id']).reset_index(drop=True)
                 print(f"After dropping duplicates by track_id: {len(features_df)} items.")
            else:
                 print("Warning: 'track_id' column not found in features data. Cannot check for duplicates based on track ID.")


            # Create a Spotify URI from track_id for linking with MPD data
            # MPD uses 'spotify:track:<track_id>', need to create this format
            if 'track_id' in features_df.columns:
                 features_df['track_uri'] = 'spotify:track:' + features_df['track_id'].astype(str)
            else:
                 print("Error: 'track_id' column not found in features data. Cannot create track_uri for linking. Skipping feature processing.")
                 self.item_features_df = None
                 self.item_internal_features = None
                 self.num_features = 0
                 return

            # Select only the specified feature columns
            if not all(col in features_df.columns for col in self.feature_columns):
                 missing_cols = [col for col in self.feature_columns if col not in features_df.columns]
                 print(f"Warning: Missing specified feature columns in CSV: {missing_cols}. Adjusting feature columns.")
                 self.feature_columns = [col for col in self.feature_columns if col in features_df.columns]
                 self.num_features = len(self.feature_columns)
                 if not self.feature_columns:
                      print("No valid feature columns remaining after checking CSV columns. Skipping feature processing.")
                      self.item_features_df = None
                      self.item_internal_features = None
                      return

            # Select features and the linking column (track_uri)
            self.item_features_df = features_df[['track_uri'] + self.feature_columns].copy()

            # Handle potential missing values in feature columns (simple fillna for now)
            # Consider a more sophisticated strategy if needed
            if self.item_features_df[self.feature_columns].isnull().sum().sum() > 0:
                 print(f"Warning: Found missing values in features. Filling with 0.")
                 self.item_features_df[self.feature_columns] = self.item_features_df[self.feature_columns].fillna(0)


            # Scale numerical features
            # Identify numerical columns among the feature columns
            numerical_features = self.item_features_df[self.feature_columns].select_dtypes(include=np.number).columns.tolist()
            if numerical_features:
                 print(f"Scaling numerical features: {numerical_features}")
                 self.feature_scaler = StandardScaler()
                 self.item_features_df[numerical_features] = self.feature_scaler.fit_transform(self.item_features_df[numerical_features])
            else:
                 print("No numerical features found among specified feature columns for scaling.")


            print(f"Loaded and processed {len(self.item_features_df)} items with {self.num_features} features.")

            # Prepare internal feature array, aligned with item_id_map
            # This requires the item_id_map to be created first (usually during interaction matrix creation)
            if self.item_id_map:
                 self._align_item_features_with_mapping()
            else:
                 print("Item ID map not yet created. Will align features after interaction matrix creation.")

        except Exception as e:
            print(f"Error loading or processing item features from {full_path}: {e}")
            self.item_features_df = None
            self.item_internal_features = None
            self.num_features = 0


    def _align_item_features_with_mapping(self):
        """Align item features dataframe with the internal item ID mapping."""
        # Only align if features are loaded and item map exists
        if self.item_features_df is None or not self.item_id_map or not self.feature_columns or self.num_features == 0:
            self.item_internal_features = None
            # print("Cannot align item features: features not loaded, item map not ready, or no feature columns defined.") # Uncomment for debug
            return

        print(f"   Aligning item features with internal item ID map ({len(self.feature_columns)} features)...")
        # Use the item_id_map to create an 'internal_item_id' column in the features dataframe
        # Keep only items present in both the features dataframe and the interaction matrix mapping
        features_mapped_df = self.item_features_df.copy()
        features_mapped_df['internal_item_id'] = features_mapped_df['track_uri'].map(self.item_id_map)
        features_mapped_df = features_mapped_df.dropna(subset=['internal_item_id']) # Drop items not in interaction data
        features_mapped_df['internal_item_id'] = features_mapped_df['internal_item_id'].astype(int)

        if features_mapped_df.empty:
             print("Warning: No items with features found in the interaction matrix mapping after alignment.")
             self.item_internal_features = None
             return

        # Sort by internal item ID to ensure alignment with arrays indexed by internal ID
        features_mapped_df = features_mapped_df.sort_values('internal_item_id').reset_index(drop=True)

        # Create the numpy array of features, aligned by internal item ID
        # Initialize with zeros for items in interaction matrix but not in features data
        num_total_items = len(self.item_id_map)
        self.item_internal_features = np.zeros((num_total_items, self.num_features), dtype=np.float32)

        # Fill the feature array with data from the aligned features dataframe
        # Ensure the columns are in the correct order if the original dataframe columns were reordered somehow
        self.item_internal_features[features_mapped_df['internal_item_id'].values] = features_mapped_df[self.feature_columns].values

        print(f"   Aligned features for {len(features_mapped_df)} items. Internal feature array shape: {self.item_internal_features.shape}.")


    def _create_interaction_matrix(self, df: pd.DataFrame) -> sp.csr_matrix:
        """Create sparse interaction matrix from DataFrame."""
        # Ensure mappings are created BEFORE matrix if they don't exist
        if not self.user_id_map or not self.item_id_map:
            unique_users = df['user'].unique()
            unique_items = df['item'].unique()
            self.user_id_map = {user: i for i, user in enumerate(unique_users)}
            self.item_id_map = {item: i for i, item in enumerate(unique_items)}
            self.id_user_map = {i: user for user, i in self.user_id_map.items()}
            self.id_item_map = {i: item for item, i in self.item_id_map.items()}
            print(f"   Mapped {len(self.user_id_map)} users and {len(self.item_id_map)} items.")
            # If features were loaded but not aligned, align them now
            if self.item_features_df is not None and self.item_internal_features is None:
                 self._align_item_features_with_mapping()


        rows = df['user'].map(self.user_id_map).values
        cols = df['item'].map(self.item_id_map).values
        ratings = np.ones(len(df), dtype=np.float32)

        # Handle invalid indices (items/users in df but not in map - should not happen if map was built from df)
        valid_mask = ~(np.isnan(rows) | np.isnan(cols))
        if not np.all(valid_mask):
            print(f"Warning: Filtering out {np.sum(~valid_mask)} interactions with unknown user/item IDs during matrix creation.")
            rows, cols, ratings = rows[valid_mask], cols[valid_mask], ratings[valid_mask]

        rows, cols = rows.astype(int), cols.astype(int)

        # Further check for out-of-bounds indices (redundant if map was built from df, but safe)
        max_user_idx = len(self.user_id_map) - 1
        max_item_idx = len(self.item_id_map) - 1
        valid_range_mask = (rows >= 0) & (rows <= max_user_idx) & (cols >= 0) & (cols <= max_item_idx)
        if not np.all(valid_range_mask):
             print(f"Warning: Filtering out {np.sum(~valid_range_mask)} interactions with out-of-bounds indices after mapping.")
             rows, cols, ratings = rows[valid_range_mask], cols[valid_range_mask], ratings[valid_range_mask]


        return sp.csr_matrix((ratings, (rows, cols)),
                            shape=(len(self.user_id_map), len(self.item_id_map)))

    def _calculate_item_popularity(self) -> None:
        """Calculate popularity of each item based on interaction counts."""
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0:
            self.item_popularity = {}
            
            return

        item_counts = self.interaction_matrix.sum(axis=0).A1
        self.item_popularity = {i: int(count) for i, count in enumerate(item_counts)} # Ensure counts are int


    def train_hybrid_ncf_model(self, train_df: pd.DataFrame, val_df: pd.DataFrame, # Accept train and validation DataFrames
                                 epochs: int = 30,
                                 batch_size: int = 512,
                                 early_stopping_patience: int = 5) -> None: # Added early stopping
        """Train the Hybrid Neural Collaborative Filtering (NCF) model with features."""
        print("\n--- Training Hybrid NCF Model ---")

        
        combined_df_for_mapping = pd.concat([train_df, val_df], ignore_index=True)
        if not self.user_id_map or not self.item_id_map or self.interaction_matrix is None:
             self.interaction_matrix = self._create_interaction_matrix(combined_df_for_mapping) # Mappings created here

        # Calculate popularity if needed (used by diversity metric)
        if not self.item_popularity:
             self._calculate_item_popularity()
             print(f"   Calculated popularity for {len(self.item_popularity)} items.")

        
        if self.item_features_df is not None and (self.item_internal_features is None or self.item_internal_features.shape[0] != len(self.item_id_map) or self.item_internal_features.shape[1] != self.num_features):
             self._align_item_features_with_mapping()


        # Check if essential data is available for training
        if self.interaction_matrix is None or self.interaction_matrix.nnz == 0 or \
            len(self.user_id_map) == 0 or len(self.item_id_map) == 0 or \
            self.item_internal_features is None or self.num_features == 0:
             print("Interaction data or item features not ready/aligned. Cannot train Hybrid NCF.")
             print(f" Debug Info: Interaction Matrix: {self.interaction_matrix is not None}, Num Interactions: {self.interaction_matrix.nnz if self.interaction_matrix is not None else 0}, Num Users: {len(self.user_id_map)}, Num Items: {len(self.item_id_map)}, Features Ready: {self.item_internal_features is not None}, Num Features: {self.num_features}")

             self.hybrid_ncf_model = None
             self._item_embedding_model = None
             return


        # Create model architecture
        num_users = len(self.user_id_map)
        num_items = len(self.item_id_map)
        num_features = self.num_features # Use the number of specified features

        # Input layers
        user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
        item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')
        item_features_input = tf.keras.layers.Input(shape=(num_features,), dtype='float32', name='item_features_input') # Feature input

        # GMF path (Uses embeddings from interaction)
        gmf_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            # Added L2 regularization on embeddings
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_user_embedding'
        )
        gmf_item_embedding_layer = tf.keras.layers.Embedding(
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            # Added L2 regularization on embeddings
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='gmf_item_embedding'
        )
        gmf_user_embedding = gmf_user_embedding_layer(user_input)
        gmf_item_embedding = gmf_item_embedding_layer(item_input)

        gmf_user_vec = tf.keras.layers.Flatten()(gmf_user_embedding)
        gmf_item_vec = tf.keras.layers.Flatten()(gmf_item_embedding)
        gmf_layer = tf.keras.layers.Multiply()([gmf_user_vec, gmf_item_vec])

        # MLP path (Uses embeddings from interaction + Explicit Features)
        mlp_user_embedding_layer = tf.keras.layers.Embedding(
            num_users, self.embedding_size,
            embeddings_initializer='he_normal',
            # Added L2 regularization on embeddings
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_user_embedding'
        )
        mlp_item_embedding_layer = tf.keras.layers.Embedding( # Keep for later extraction
            num_items, self.embedding_size,
            embeddings_initializer='he_normal',
            # Added L2 regularization on embeddings
            embeddings_regularizer=tf.keras.regularizers.l2(self.l2_reg),
            name='mlp_item_embedding'
        )
        mlp_user_embedding = mlp_user_embedding_layer(user_input)
        mlp_item_embedding = mlp_item_embedding_layer(item_input)

        mlp_user_vec = tf.keras.layers.Flatten()(mlp_user_embedding)
        mlp_item_vec = tf.keras.layers.Flatten()(mlp_item_embedding)

        # Concatenate MLP embeddings with item features
        mlp_features_concat = tf.keras.layers.Concatenate()([mlp_user_vec, mlp_item_vec, item_features_input])


        # MLP layers
        mlp_output = mlp_features_concat
        # Added L2 regularization and Dropout to Dense layers
        for dim in [256, 128, 64]: # Using hardcoded mlp_layer_dims for now, can make configurable later
            mlp_dense = tf.keras.layers.Dense(
                dim,
                activation='relu',
                kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
            )(mlp_output)
            mlp_output = tf.keras.layers.Dropout(0.3)(mlp_dense) # Increased dropout for demonstration


        # Combine GMF and MLP+Features outputs
        hybrid_concat = tf.keras.layers.Concatenate()([gmf_layer, mlp_output])
        # Final output layer with L2 regularization
        output = tf.keras.layers.Dense(
            1,
            activation='sigmoid',
            kernel_regularizer=tf.keras.regularizers.l2(self.l2_reg)
        )(hybrid_concat)

        # Create and compile model
        self.hybrid_ncf_model = tf.keras.models.Model( # Renamed
            inputs=[user_input, item_input, item_features_input], # Added feature input
            outputs=output
        )

        self.hybrid_ncf_model.compile( # Renamed
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), # Using hardcoded LR
            loss='binary_crossentropy',
            metrics=['accuracy'] # We monitor accuracy during training
        )
        print("   Hybrid NCF model architecture built and compiled with regularization.")

        # Create a separate model to get item embeddings from the MLP path (for Smooth XQuAD)
        # Note: This model only takes item_input but outputs the embedding learned from interactions
        self._item_embedding_model = tf.keras.models.Model(
            inputs=item_input,
            outputs=mlp_item_embedding_layer(item_input) # Extract embeddings from MLP item embedding layer
        )
        print("   Created separate model for extracting NCF item embeddings from MLP path.")


        # Prepare training and validation data for Hybrid NCF
        print(f"   Generating positive and negative samples for Hybrid NCF training ({self.neg_samples_ratio} negatives per positive)...")

       
        train_user_indices_pos = train_df['user'].map(self.user_id_map).values.astype(int)
        train_item_indices_pos = train_df['item'].map(self.item_id_map).values.astype(int)

        val_user_indices_pos = val_df['user'].map(self.user_id_map).values.astype(int)
        val_item_indices_pos = val_df['item'].map(self.item_id_map).values.astype(int)

        
        train_labels_pos = np.ones(len(train_user_indices_pos), dtype=np.float32)
        val_labels_pos = np.ones(len(val_user_indices_pos), dtype=np.float32)

        
        valid_train_pos_mask = train_item_indices_pos < len(self.id_item_map)
        if not np.all(valid_train_pos_mask):
             print(f"Warning: Filtering {np.sum(~valid_train_pos_mask)} train positive interactions with items not in item map.")
             train_user_indices_pos = train_user_indices_pos[valid_train_pos_mask]
             train_item_indices_pos = train_item_indices_pos[valid_train_pos_mask]
             train_labels_pos = train_labels_pos[valid_train_pos_mask]

        valid_val_pos_mask = val_item_indices_pos < len(self.id_item_map)
        if not np.all(valid_val_pos_mask):
             print(f"Warning: Filtering {np.sum(~valid_val_pos_mask)} val positive interactions with items not in item map.")
             val_user_indices_pos = val_user_indices_pos[valid_val_pos_mask]
             val_item_indices_pos = val_item_indices_pos[valid_val_pos_mask]
             val_labels_pos = val_labels_pos[valid_val_pos_mask]


        train_pos_item_features = self.item_internal_features[train_item_indices_pos]
        val_pos_item_features = self.item_internal_features[val_item_indices_pos]


        # Generate negative samples for training and validation
        train_neg_user_indices, train_neg_item_indices = self._generate_negative_samples(train_user_indices_pos, train_item_indices_pos, neg_samples_per_positive=self.neg_samples_ratio)
        val_neg_user_indices, val_neg_item_indices = self._generate_negative_samples(val_user_indices_pos, val_item_indices_pos, neg_samples_per_positive=self.neg_samples_ratio)


        
        valid_train_neg_mask = train_neg_item_indices < len(self.id_item_map)
        if not np.all(valid_train_neg_mask):
             print(f"Warning: Filtering {np.sum(~valid_train_neg_mask)} train negative samples with items not in item map.")
             train_neg_user_indices = train_neg_user_indices[valid_train_neg_mask]
             train_neg_item_indices = train_neg_item_indices[valid_train_neg_mask]

        valid_val_neg_mask = val_neg_item_indices < len(self.id_item_map)
        if not np.all(valid_val_neg_mask):
             print(f"Warning: Filtering {np.sum(~valid_val_neg_mask)} val negative samples with items not in item map.")
             val_neg_user_indices = val_neg_user_indices[valid_val_neg_mask]
             val_neg_item_indices = val_neg_item_indices[valid_val_neg_mask]


        train_neg_item_features = self.item_internal_features[train_neg_item_indices]
        val_neg_item_features = self.item_internal_features[val_neg_item_indices]

        # Labels for negative samples are 0
        train_labels_neg = np.zeros(len(train_neg_user_indices), dtype=np.float32)
        val_labels_neg = np.zeros(len(val_neg_user_indices), dtype=np.float32)


        
        all_train_user_indices = np.concatenate([train_user_indices_pos, train_neg_user_indices])
        all_train_item_indices = np.concatenate([train_item_indices_pos, train_neg_item_indices])
        all_train_item_features = np.concatenate([train_pos_item_features, train_neg_item_features])
        all_train_labels = np.concatenate([train_labels_pos, train_labels_neg]) # Use correct negative labels array

        all_val_user_indices = np.concatenate([val_user_indices_pos, val_neg_user_indices])
        all_val_item_indices = np.concatenate([val_item_indices_pos, val_neg_item_indices])
        all_val_item_features = np.concatenate([val_pos_item_features, val_neg_item_features])
        all_val_labels = np.concatenate([val_labels_pos, val_labels_neg]) # Use correct negative labels array


        # Shuffle training data
        train_indices = np.arange(len(all_train_user_indices))
        np.random.shuffle(train_indices)
        all_train_user_indices = all_train_user_indices[train_indices]
        all_train_item_indices = all_train_item_indices[train_indices]
        all_train_item_features = all_train_item_features[train_indices]
        all_train_labels = all_train_labels[train_indices]
        print(f"   Prepared {len(all_train_labels)} training samples for Hybrid NCF.")

        # Shuffle validation data
        val_indices = np.arange(len(all_val_user_indices))
        np.random.shuffle(val_indices)
        all_val_user_indices = all_val_user_indices[val_indices]
        all_val_item_indices = all_val_item_indices[val_indices]
        all_val_item_features = all_val_item_features[val_indices]
        all_val_labels = all_val_labels[val_indices]
        print(f"   Prepared {len(all_val_labels)} validation samples for Hybrid NCF.")


        # Create tf.data.Datasets for efficient training and validation
        train_dataset = tf.data.Dataset.from_tensor_slices(
            ({'user_input': all_train_user_indices, 'item_input': all_train_item_indices, 'item_features_input': all_train_item_features}, all_train_labels)
        ).shuffle(buffer_size=100000).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        val_dataset = tf.data.Dataset.from_tensor_slices(
            ({'user_input': all_val_user_indices, 'item_input': all_val_item_indices, 'item_features_input': all_val_item_features}, all_val_labels)
        ).batch(batch_size).prefetch(tf.data.AUTOTUNE) # No need to shuffle validation data


        # Define Early Stopping Callback
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss', # Monitor validation loss
            patience=early_stopping_patience, # Number of epochs with no improvement after which training will be stopped
            mode='min', # Stop when validation loss is minimized
            restore_best_weights=True 
        )
        print(f"   Configured Early Stopping with patience={early_stopping_patience}.")


        # Train model
        print(f"   Fitting Hybrid NCF model for up to {epochs} epochs with Early Stopping (Batch Size: {batch_size})...")
        try:
            self.hybrid_ncf_model.fit(
                train_dataset,
                epochs=epochs,
                validation_data=val_dataset, # Provide validation dataset
                callbacks=[early_stopping], # Add early stopping callback
                verbose=1
            )
            print("\nHybrid NCF model training complete (possibly stopped early).")
            self._ncf_item_embeddings_cache = None # Clear cache after training

        except tf.errors.ResourceExhaustedError as e:
             print(f"\n   GPU Memory Error during Hybrid NCF training: {e}")
             print("   Try reducing 'batch_size', 'embedding_size', or number of feature columns.")
             self.hybrid_ncf_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model training failed.")
        except Exception as e:
             print(f"An error occurred during Hybrid NCF training: {e}")
             self.hybrid_ncf_model = None
             self._item_embedding_model = None
             print("Hybrid NCF model training failed.")


    def _generate_negative_samples(self, user_indices_pos: np.ndarray,
                                  item_indices_pos: np.ndarray, # Use positive indices as input
                                  neg_samples_per_positive: int = 4) -> Tuple[np.ndarray, np.ndarray]:
        """Generate negative samples for training or validation from the pool of items with features."""
        neg_user_indices, neg_item_indices = [], []
        num_items = len(self.item_id_map) # Use length of mapped items

        # Get the set of all items that have features and are in the overall item_id_map
        items_with_features_internal_ids_set = set()
        if self.item_features_df is not None and 'track_uri' in self.item_features_df.columns and self.item_id_map:
             items_with_features_orig_uris = self.item_features_df['track_uri'].unique()
             items_with_features_internal_ids_set = {self.item_id_map[uri] for uri in items_with_features_orig_uris if uri in self.item_id_map}

        if not items_with_features_internal_ids_set:
             print("Warning: No items with features found in the item map. Cannot generate negatives from featured items.")
             # Fallback: sample from all items in the interaction matrix mapping
             items_with_features_internal_ids_set = set(range(num_items))


        
        user_positive_items: Dict[int, set] = defaultdict(set)
        # Corrected: Iterate through the input user_indices_pos and item_indices_pos
        for u, i in zip(user_indices_pos, item_indices_pos):
             user_positive_items[u].add(i)


        # Only generate negatives for users who had at least one positive interaction in this subset (train/val)
        unique_users_with_positives = np.unique(user_indices_pos)

        for u in tqdm(unique_users_with_positives, desc=f"Generating Negatives ({neg_samples_per_positive}x)", leave=False):
             positive_items_for_user = user_positive_items.get(u, set()) # Items this specific user interacted with in this subset

             # Candidate negative items must have features AND not be positive for this user
             negative_candidates_for_user_with_features = list(items_with_features_internal_ids_set - positive_items_for_user)


             if negative_candidates_for_user_with_features:
                 # Sample negative items for this user
                 num_pos_for_user = len(positive_items_for_user)
                 num_neg_to_sample = min(len(negative_candidates_for_user_with_features),
                                    num_pos_for_user * neg_samples_per_positive)

                 if num_neg_to_sample > 0:
                     neg_items = random.sample(negative_candidates_for_user_with_features, num_neg_to_sample)
                     neg_user_indices.extend([u] * len(neg_items))
                     neg_item_indices.extend(neg_items)

        if not neg_user_indices:
            print("Warning: No negative samples were successfully generated.")


        return np.array(neg_user_indices, dtype=np.int32), np.array(neg_item_indices, dtype=np.int32) # Ensure int32 dtype


    def _get_ncf_item_embeddings(self) -> Optional[np.ndarray]:
        """Extracts and caches NCF item embeddings from the MLP path."""
        if self._ncf_item_embeddings_cache is not None:
            return self._ncf_item_embeddings_cache

        # Embeddings are only available if the hybrid NCF model trained successfully
        if self.hybrid_ncf_model is None or self._item_embedding_model is None or len(self.item_id_map) == 0:
            print("NCF item embedding model not available (Hybrid NCF not trained?) or no items mapped.")
            return None

        num_items = len(self.item_id_map)
        item_ids_internal = np.arange(num_items, dtype=np.int32) # Use int32

        print("   Extracting and caching NCF item embeddings...")
        batch_size = 1024 # Batch size for prediction

        try:
            
            item_dataset = tf.data.Dataset.from_tensor_slices({'item_input': item_ids_internal.reshape(-1, 1)}).batch(batch_size).prefetch(tf.data.AUTOTUNE)

            # Predict using the separate item embedding model
            all_embeddings_raw = self._item_embedding_model.predict(item_dataset, verbose=0)

            # The separate model outputs the embedding directly, shape should be (num_items, embedding_size)
            if all_embeddings_raw.ndim == 2 and all_embeddings_raw.shape[0] == num_items and all_embeddings_raw.shape[1] == self.embedding_size:
                 all_embeddings = all_embeddings_raw
            elif all_embeddings_raw.ndim == 3 and all_embeddings_raw.shape[1] == 1 and all_embeddings_raw.shape[2] == self.embedding_size:
                 # Handle cases where Keras might return shape (None, 1, embedding_size)
                 all_embeddings = all_embeddings_raw[:, 0, :]
            else:
                 print(f"Unexpected item embedding prediction shape: {all_embeddings_raw.shape}")
                 return None


            self._ncf_item_embeddings_cache = all_embeddings
            print(f"   Extracted and cached embeddings of shape: {all_embeddings.shape}")
            return all_embeddings

        except Exception as e:
            print(f"Error during NCF item embedding extraction: {e}")
            return None


    def _smooth_xquad_rerank(self, initial_recommendations: List[Tuple[int, float]], k: int) -> List[int]:
        """Applies Smooth XQuAD reranking using NCF item embeddings."""
        # print("   Applying Smooth XQuAD reranking...") # Uncomment for debug
        if not initial_recommendations: return []
        num_initial = len(initial_recommendations); k = min(k, num_initial)
        if k <= 0: return []
        if num_initial <= k: return [item_id for item_id, _ in initial_recommendations[:k]]

        # Smooth XQuAD requires item embeddings for similarity calculation
        item_embeddings = self._get_ncf_item_embeddings()
        if item_embeddings is None or item_embeddings.size == 0:
             print("   Smooth XQuAD Reranking: NCF Item embeddings not available. Falling back to relevance ranking.")
             # Fallback to just returning top k by original score
             return [item_id for item_id, _ in sorted(initial_recommendations, key=lambda x: x[1], reverse=True)][:k]

      
        valid_initial_recommendations = [(item_id, score) for item_id, score in initial_recommendations if 0 <= item_id < item_embeddings.shape[0]]
        if len(valid_initial_recommendations) < k:
            print(f"   Smooth XQuAD Reranking: Not enough valid item IDs with embeddings in initial recommendations ({len(valid_initial_recommendations)}/{len(initial_recommendations)}). Returning available valid items.")
            # Return top items from the valid ones if fewer than k
            return [item_id for item_id, _ in sorted(valid_initial_recommendations, key=lambda x: x[1], reverse=True)][:min(k, len(valid_initial_recommendations))]


        # Sort initial recommendations by relevance score (descending)
        # We need to work with indices to pop efficiently
        candidate_indices_and_scores = sorted(enumerate(valid_initial_recommendations), key=lambda x: x[1][1], reverse=True) # (original_index_in_valid, (item_id, score))

        selected_ids_internal = [] # Internal IDs of selected items
        # Create a list of (internal_item_id, relevance_score) tuples for easier processing
        remaining_candidates_data = [(item_id, score) for _, (item_id, score) in candidate_indices_and_scores]


        # Select the first item (highest relevance)
        # The very first item is chosen purely on relevance
        if remaining_candidates_data: # Ensure there's at least one candidate
            first_item_id, first_item_score = remaining_candidates_data.pop(0)
            selected_ids_internal.append(first_item_id)
        else:
             return [] # No candidates to select


        while len(selected_ids_internal) < k and remaining_candidates_data:
            best_xquad_score = -np.inf
            best_candidate_list_index = -1 # Index within the remaining_candidates_data list

            # Embeddings of items already selected
            current_selected_embeddings = item_embeddings[selected_ids_internal]

            # Calculate similarity between remaining candidates and the set of selected items
            candidate_internal_ids = [item_id for item_id, _ in remaining_candidates_data]

            if not candidate_internal_ids: break # No candidates left

            # Ensure candidate_internal_ids are valid indices for item_embeddings
            valid_candidate_internal_ids = [idx for idx in candidate_internal_ids if 0 <= idx < item_embeddings.shape[0]]
            if not valid_candidate_internal_ids:
                 # print("   Smooth XQuAD Reranking: No remaining candidates with valid embeddings. Stopping.")
                 break # No candidates left with embeddings

            candidate_embeddings = item_embeddings[valid_candidate_internal_ids]

            # Calculate cosine similarity matrix
            # Shape (num_valid_remaining, num_selected)
            similarity_matrix = cosine_similarity(candidate_embeddings, current_selected_embeddings)
            # Find the maximum similarity of each remaining candidate to any item in the selected set
            max_similarity_to_selected = np.max(similarity_matrix, axis=1)

            valid_id_to_list_index = {item_id: i for i, item_id in enumerate(valid_candidate_internal_ids)}


            # Evaluate XQuAD score for each remaining candidate
            # Iterate through the remaining_candidates_data list which contains (item_id, relevance_score)
            for i, (candidate_id, relevance_score) in enumerate(remaining_candidates_data):
                 # Check if the candidate_id had valid embeddings and was included in similarity calculation
                 if candidate_id in valid_id_to_list_index:
                     # Get the correct similarity score using the map
                     diversity_penalty = max_similarity_to_selected[valid_id_to_list_index[candidate_id]]

                     xquad_score = self.mmr_lambda * relevance_score - (1 - self.mmr_lambda) * diversity_penalty

                     if xquad_score > best_xquad_score:
                         best_xquad_score = xquad_score
                         best_candidate_list_index = i # Index in the current list of remaining candidates
                 # Else: This candidate did not have valid embeddings or was already selected/invalid, skip it for XQuAD scoring


            if best_candidate_list_index != -1:
                # Select the best item based on XQuAD score
                next_item_id, _ = remaining_candidates_data.pop(best_candidate_list_index)
                selected_ids_internal.append(next_item_id)
            else:
                 # Should not happen if remaining_candidates is not empty AND at least one has valid embeddings, but as a safeguard
                 # If best_candidate_list_index is still -1, it means no remaining candidate had valid embeddings
                 if remaining_candidates_data:
                      print("   Smooth XQuAD Reranking: No remaining candidate with valid embeddings found. Stopping reranking.")
                 break


        # print(f"   Finished Smooth XQuAD reranking, selected {len(selected_ids_internal)} items.") # Uncomment for debug
        return selected_ids_internal


    def recommend(self, user_id: Any, n: int = 10,
                  rerank_method: str = 'none') -> List[Any]:
        """Generate recommendations for a user with optional reranking using Hybrid NCF."""
        if user_id not in self.user_id_map:
             # print(f"Warning: User {user_id} not found in user map. Cannot generate recommendations.") # Uncomment for debug
             return []

        internal_user_id = self.user_id_map[user_id]

        if self.hybrid_ncf_model is None: # Check Hybrid NCF model availability
            print("Hybrid NCF model not trained. Cannot generate recommendations.")
            return []

        # Need item IDs and their features for prediction
        num_items = len(self.item_id_map)
        all_items_internal = np.arange(num_items)

        # Features are required for the Hybrid NCF model's prediction input
        if self.item_internal_features is None or self.item_internal_features.shape[0] != num_items or self.num_features == 0:
             print("Error: Item features not loaded or aligned correctly for prediction. Cannot generate recommendations using Hybrid NCF.")
             # You might want to add logic here to fallback to a non-hybrid recommendation method if features are missing
             # For now, return empty list
             return []


        user_array_internal = np.full(num_items, internal_user_id, dtype=np.int32) # Use int32
        item_array_internal = all_items_internal.astype(np.int32) # Use int32
        item_features_array = self.item_internal_features # Use the aligned internal features


        # Reshape for Keras input
        user_array_internal = user_array_internal.reshape(-1, 1)
        item_array_internal = item_array_internal.reshape(-1, 1)
        # Features are already (num_items, num_features)


        predictions = np.array([])
        try:
             # Predict using the Hybrid NCF model with all inputs
             # Create a tf.data.Dataset for prediction as well
             predict_dataset = tf.data.Dataset.from_tensor_slices(
                 ({'user_input': user_array_internal, 'item_input': item_array_internal, 'item_features_input': item_features_array})
             ).batch(1024).prefetch(tf.data.AUTOTUNE) # Use a reasonable batch size for prediction

             predictions = self.hybrid_ncf_model.predict(
                 predict_dataset,
                 verbose=0
             ).flatten()
        except Exception as e:
             print(f"Error during Hybrid NCF model prediction for user {user_id}: {e}")
             return []

        if len(predictions) != num_items:
             print(f"Warning: Prediction length mismatch for user {user_id}. Expected {num_items}, got {len(predictions)}")
             return []

        # Combine internal item IDs with scores
        item_scores_internal = list(zip(all_items_internal, predictions))

        # Get items the user has already interacted with to exclude them
        if user_id in self.user_id_map and self.interaction_matrix is not None:
             interacted_items_internal = set(self.interaction_matrix.getrow(internal_user_id).indices)
             # Filter out interacted items from candidates
             item_scores_internal = [(item_id, score) for item_id, score in item_scores_internal if item_id not in interacted_items_internal]


        # Apply reranking
        rerank_method_lower = rerank_method.lower()
        if rerank_method_lower == 'smooth_xquad':
             # Take more candidates than needed for Smooth XQuAD reranking
             # XQuAD needs candidates to select from. Taking top M where M > N
             # Common practice is M = 100*N or similar, but let's use max(N, 500) as before for efficiency
             rerank_candidates_count = max(n * 10, 500) # Increased candidate pool size for better diversity selection
             # Sort by relevance score and take top M candidates for reranking
             top_candidates_for_reranking = sorted(item_scores_internal, key=lambda x: x[1], reverse=True)[:rerank_candidates_count]
             reranked_indices_internal = self._smooth_xquad_rerank(top_candidates_for_reranking, n)

        elif rerank_method_lower == 'none':
             # No reranking, just take top N by relevance
             # print("   Applying None reranking (relevance only)...")
             reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]
        else:
            print(f"Warning: Unknown reranking method '{rerank_method}'. Using 'none' (relevance only).")
            reranked_indices_internal = [item_id for item_id, _ in sorted(item_scores_internal, key=lambda x: x[1], reverse=True)][:n]


        # Convert internal indices back to original item IDs
        recommended_items_original = [self.id_item_map[idx] for idx in reranked_indices_internal if idx in self.id_item_map]
        # print(f"   Generated {len(recommended_items_original)} recommendations for user {user_id} with method '{rerank_method}'.") # Uncomment for debug
        return recommended_items_original


    def evaluate(self, test_df: pd.DataFrame, n: int = 10) -> Dict[str, Dict[str, float]]: # Simplified return dict structure
        """Evaluate the trained model with various reranking methods on a test set."""
        print(f"\n--- Starting Evaluation (n={n}) ---")
        start_time = time.time()

        # Only evaluate the Hybrid NCF model with specified reranking methods
        results: Dict[str, Dict[str, float]] = {
            'Hybrid NCF': {} # Results for the Hybrid NCF model under different reranking
        }

        if self.hybrid_ncf_model is None or self.interaction_matrix is None or len(self.user_id_map) == 0 or len(self.item_id_map) == 0:
             print("Hybrid NCF model or mappings not initialized. Skipping evaluation.")
             return results

        # Prepare test data - filter to users/items seen during training
        # Ensure test users and items are in the overall mappings created during training
        test_df_filtered = test_df[
            test_df['user'].isin(self.user_id_map) &
            test_df['item'].isin(self.item_id_map)
        ].copy()

        if test_df_filtered.empty:
            print("No valid test interactions found for users/items seen during training mappings. Cannot evaluate.")
            return results


        # Create ground truth dictionary using original IDs
        ground_truth_orig = defaultdict(set)
        for _, row in test_df_filtered.iterrows():
             ground_truth_orig[row['user']].add(row['item'])

        test_users = list(ground_truth_orig.keys())

        if not test_users:
             print("No test users with valid interactions found after filtering. Cannot evaluate.")
             return results

        print(f"Evaluating for {len(test_users)} test users with valid interactions.")

        # Define reranking methods to evaluate for the Hybrid NCF model
        evaluation_methods = ['none', 'smooth_xquad'] # Only evaluate these two for Hybrid NCF


        # Pre-calculate NCF embeddings if needed for Smooth XQuAD evaluation
        # This uses the _item_embedding_model which extracts from the MLP path of the hybrid model
        if 'smooth_xquad' in evaluation_methods:
             if self._get_ncf_item_embeddings() is None:
                 print("Warning: NCF Item embeddings not available for Smooth XQuAD evaluation. Skipping Smooth XQuAD.")
                 evaluation_methods.remove('smooth_xquad')

        # Metrics for each reranking method
        method_metrics: Dict[str, Dict[str, List[float]]] = {
            method: {'precision': [], 'recall': [], 'ndcg': [], 'diversity': []}
            for method in evaluation_methods
        }


        for method in evaluation_methods:
             print(f"\n  Evaluating with reranking method: {method.replace('_', ' ').title()}")

             for user_orig in tqdm(test_users, desc=f"   Users ({method.replace('_', ' ').title()})", leave=False):
                 true_items_orig = ground_truth_orig.get(user_orig, set())
                 if not true_items_orig: continue


                 # Get recommendations using the Hybrid NCF model and the specified reranking method
                 recs_orig = self.recommend(user_orig, n, rerank_method=method)


                 if recs_orig:
                      # Ensure recommended list has length n for precision/recall/ndcg calculation
                      recs_at_n_orig = recs_orig[:n]

                      hits = len(set(recs_at_n_orig) & true_items_orig)
                      method_metrics[method]['precision'].append(hits / n if n > 0 else 0.0)
                      method_metrics[method]['recall'].append(hits / len(true_items_orig) if true_items_orig else 0.0)
                      ndcgs_val = self._calculate_ndcg(recs_at_n_orig, true_items_orig, n)
                      if ndcgs_val is not None:
                           method_metrics[method]['ndcg'].append(ndcgs_val)

                      # Calculate diversity on the generated recommendations (could be more than n before truncation)
                      diversities_val = self._calculate_diversity(recs_orig)
                      if diversities_val is not None: # Handle cases where diversity can't be calculated
                           method_metrics[method]['diversity'].append(diversities_val)


        # Calculate average metrics for each method
        for method in evaluation_methods:
             results['Hybrid NCF'][method] = {
                 f'Precision@{n}': np.mean(method_metrics[method]['precision']) if method_metrics[method]['precision'] else 0.0,
                 f'Recall@{n}': np.mean(method_metrics[method]['recall']) if method_metrics[method]['recall'] else 0.0,
                 f'NDCG@{n}': np.mean(method_metrics[method]['ndcg']) if method_metrics[method]['ndcg'] else 0.0,
                 'Average Diversity (Inverse Popularity)': np.mean(method_metrics[method]['diversity']) if method_metrics[method]['diversity'] else 0.0
             }


        end_time = time.time()
        print(f"\nEvaluation finished in {end_time - start_time:.2f} seconds.")

        return results

    def _calculate_ndcg(self, recommendations_orig: List[Any],
                        true_items_orig: set,
                        k: int) -> float:
        """Calculate NDCG@k using original item IDs."""
        if not recommendations_orig or k <= 0:
            return 0.0

        # Truncate recommendations to k
        recommendations_at_k_orig = recommendations_orig[:k]

        dcg = 0.0
        # Calculate DCG
        for i, item_orig in enumerate(recommendations_at_k_orig):
            if item_orig in true_items_orig:
                 dcg += 1.0 / np.log2(i + 2)


        
        valid_true_items_internal = {self.item_id_map[item] for item in true_items_orig if item in self.item_id_map}
        num_relevant_at_k = min(len(valid_true_items_internal), k)


        idcg = 0.0
        for i in range(num_relevant_at_k):
            idcg += 1.0 / np.log2(i + 2)

        return dcg / idcg if idcg > 0 else 0.0

    def _calculate_diversity(self, recommendations_orig: List[Any]) -> float:
        """Calculate diversity of recommendations based on inverse popularity."""
        if not recommendations_orig or not self.item_id_map:
            return 0.0

        # Filter recommendations to those in the item_id_map and get their internal IDs
        valid_recs_internal = [self.item_id_map[item] for item in recommendations_orig if item in self.item_id_map]

        if not valid_recs_internal:
             return 0.0

        
        if not hasattr(self, 'item_popularity') or not self.item_popularity:
    
    
            if self.interaction_matrix is not None and self.interaction_matrix.nnz > 0:
                 self._calculate_item_popularity() # Calculate if interaction matrix is available and not empty
            if not hasattr(self, 'item_popularity') or not self.item_popularity:
                 # print("Warning: Cannot calculate item popularity for diversity metric.") # Uncomment for debug
                 return 0.0 # Return 0 if popularity still can't be calculated


        max_pop = max(self.item_popularity.values()) if self.item_popularity else 0 # Handle case where popularity is empty
        if max_pop == 0: return 0.0 # Should be handled before, but safe check

        inverse_pop_scores = []
        for item_internal_id in valid_recs_internal:
           
             pop = self.item_popularity.get(item_internal_id, 0) # Default popularity to 0 if somehow missing
             # Use a smoothing factor or add epsilon to avoid division by zero/infinity for unseen items
             inverse_pop = 1.0 / (pop + 1.0) # Add 1 to popularity to avoid div by zero
             inverse_pop_scores.append(inverse_pop)


        # Calculate average inverse popularity as the diversity metric
        avg_inverse_popularity = np.mean(inverse_pop_scores) if inverse_pop_scores else 0.0

        return avg_inverse_popularity


    # Removed the demonstrate_feature_importance_analysis method from the class
    # The code is now a standalone block in the main function


def main():
    """Main function to demonstrate usage."""
    # Configure TensorFlow logging (optional, but can help if you want less verbose output)
    tf.get_logger().setLevel('ERROR') # Set TensorFlow logger level to ERROR or WARN

    # --- Configuration Parameters ---
    # Define paths to your data
    # MPD Interaction Data: likely in '/kaggle/input/spotify-challenge/data'
    MPD_INPUT_DIR = '/kaggle/input/spotify-challenge/data' # Path for MPD JSON slices - *** SET TO THIS PATH ***

    # Spotify Tracks Feature Data: Specify the FULL FILE PATH to the CSV
    FEATURES_FILE_PATH = '/kaggle/input/-spotify-tracks-dataset/dataset.csv' # *** CORRECTED FULL PATH TO THE FEATURES CSV FILE ***


    # Model Hyperparameters
    EMBEDDING_SIZE = 128 # Embedding size for NCF
    L2_REGULARIZATION = 0.001 # L2 regularization strength (Added)


    # Hybrid NCF Model Parameters
    NCF_EPOCHS = 30 # Number of training epochs for Hybrid NCF (Max epochs with early stopping)
    NCF_BATCH_SIZE = 512
    EARLY_STOPPING_PATIENCE = 5 # Early stopping patience (Added)
    NEG_SAMPLES_RATIO = 8 # Negative samples per positive interaction during training (Increased)

    # Reranking Parameters
    # MMR_LAMBDA is the trade-off for Smooth XQuAD (0-1). Higher means more relevance.
    SMOOTH_XQUAD_LAMBDA = 0.7 # Lambda for Smooth XQuAD


    # Explicit Feature Columns to use in the Hybrid Model
    # Choose numerical and potentially one-hot encoded categorical features
    # 'popularity', 'duration_ms', 'explicit' (needs mapping), 'danceability', 'energy', 'key',
    # 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
    # 'valence', 'tempo', 'time_signature'
    # 'track_genre' is categorical with many values, harder to use directly without encoding/embedding

    # Let's select some common numerical features and 'explicit', 'mode'.
    # 'key', 'time_signature', 'track_genre' would require more sophisticated handling (e.g., one-hot or embedding layers in NCF).
    # For simplicity, we'll use numerical and binary/ordinal features directly.
    SELECTED_FEATURE_COLUMNS = [
        'popularity', 'duration_ms', 'danceability', 'energy', 'loudness',
        'speechiness', 'acousticness', 'instrumentalness', 'liveness',
        'valence', 'tempo', 'mode', 'explicit' # 'mode' and 'explicit' are 0/1 or boolean, can be treated as numerical for scaling
        # Consider 'key', 'time_signature', 'track_genre' as next steps requiring different handling
    ]


    # Data Loading Parameters
    NUM_MPD_FILES_TO_LOAD = 5 # Number of MPD slice files to load from the directory

    # Data Splitting Parameters (Manual split for train, validation, test)
    VAL_SET_FRACTION = 0.15 # Fraction of (train+val) data for validation set
    TEST_SET_FRACTION = 0.20 # Fraction of total data for the main test set


    # User Study Data File Path (where you save your collected data)
    USER_STUDY_DATA_PATH = '/kaggle/working/user_study_interactions.csv' # *** Update this path if needed ***
    # Note: The code will check if this file exists and only run the evaluation if it does.


    # Recommendation & Evaluation Parameters
    N_RECOMMENDATIONS = 10


    # --- Data Loading ---
    print("Loading data...")
    # Initialize recommender
    recommender = SpotifyRecommenderSystem(
        embedding_size=EMBEDDING_SIZE,
        mmr_lambda=SMOOTH_XQUAD_LAMBDA,
        feature_columns=SELECTED_FEATURE_COLUMNS, # Pass feature columns to initializer
        l2_reg=L2_REGULARIZATION, # Pass L2 regularization parameter
        neg_samples_ratio=NEG_SAMPLES_RATIO # Pass negative samples ratio
    )


    # Load interaction data first
    interactions_df = recommender.load_mpd_data(
        input_dir=MPD_INPUT_DIR,
        num_files=NUM_MPD_FILES_TO_LOAD
    )

    if interactions_df.empty:
        print("Error: Failed to load interaction data. Cannot proceed.")
        return

    print(f"Loaded {len(interactions_df)} total interactions.")


    # Load and process item feature data using the corrected file path
    recommender.load_item_features(
        features_filepath=FEATURES_FILE_PATH # Pass the full file path
    )

    # If item features didn't load or align, print a warning and the reason
    if recommender.item_internal_features is None or recommender.item_internal_features.shape[1] == 0:
         print("\nWarning: Item features not available or aligned. Hybrid NCF training will likely fail or run without features.")
         # The train_hybrid_ncf_model method has checks to gracefully handle this.
         # If you want a non-hybrid fallback, you'd implement it here.


    # --- Data Splitting (Train, Validation, Test) ---
    print("\n--- Splitting interaction data into train, validation, and test sets ---")
    try:
        # Split into train+validation and test first
        train_val_df, test_df = train_test_split(
            interactions_df,
            test_size=TEST_SET_FRACTION,
            random_state=42,
            shuffle=True # Shuffle before splitting
        )

        # Then split train+validation into train and validation
        train_df, val_df = train_test_split(
            train_val_df,
            test_size=VAL_SET_FRACTION, # This fraction is *of the train_val_df*
            random_state=42,
            shuffle=True
        )

        print(f"Interactions for training: {len(train_df)}")
        print(f"Interactions for validation: {len(val_df)}")
        print(f"Interactions for testing: {len(test_df)}")

    except ValueError as e:
        print(f"Error splitting data: {e}. Make sure you have enough interactions.")
        return
    except Exception as e:
         print(f"An unexpected error occurred during data splitting: {e}")
         return


    # --- Train Hybrid NCF Model ---
    # Pass train and validation DataFrames to the training method
    recommender.train_hybrid_ncf_model(
        train_df,
        val_df, # Pass validation data
        epochs=NCF_EPOCHS, # Use the configured epoch count (max epochs)
        batch_size=NCF_BATCH_SIZE,
        early_stopping_patience=EARLY_STOPPING_PATIENCE # Pass patience
    )


    # --- Evaluate Model on Main Test Set ---
    # Evaluate the trained Hybrid NCF model on the main test set
    main_test_evaluation_results = {}
    if recommender.hybrid_ncf_model is not None: # Check if Hybrid NCF model trained successfully
         print("\n--- Evaluating Hybrid NCF Model on Main Test Set ---")
         # The evaluate method uses the entire test_df directly
         main_test_evaluation_results = recommender.evaluate(test_df, n=N_RECOMMENDATIONS)
    else:
         print("\nSkipping main test set evaluation as the Hybrid NCF model did not train successfully.")


    # --- Evaluate Model on User Study Data (if available) ---
    # This section is included but will only run if the file exists, as requested.
    user_study_test_results = {}
    print(f"\n--- Evaluating Hybrid NCF Model on User Study Data ({USER_STUDY_DATA_PATH}) ---")
    if os.path.exists(USER_STUDY_DATA_PATH):
         print(f"Loading user study data from {USER_STUDY_DATA_PATH}...")
         try:
              user_study_df = pd.read_csv(USER_STUDY_DATA_PATH)
              print(f"Loaded {len(user_study_df)} interactions from user study.")
              if not user_study_df.empty:
                   # Evaluate the SAME trained hybrid model on the user study data
                   # Note: User study users/items might not be in the original MPD mappings.
                   # The evaluate method filters to users/items in the mappings.
                   user_study_test_results = recommender.evaluate(user_study_df, n=N_RECOMMENDATIONS)
              else:
                   print("User study data file is empty. Skipping evaluation.")
         except Exception as e:
              print(f"Error loading user study data from {USER_STUDY_DATA_PATH}: {e}")
              print("Skipping user study evaluation.")
    else:
         print(f"User study data file not found at {USER_STUDY_DATA_PATH}. Skipping evaluation.")
         print("Please collect data from your 25 users and save it to this path in a compatible CSV format (at least 'user' and 'item' columns).")


    # --- Print Evaluation Results (Combined) ---
    print("\n--- Final Evaluation Results ---")

    if main_test_evaluation_results:
         print("\nResults on Main MPD Test Set:")
         if 'Hybrid NCF' in main_test_evaluation_results:
              ncf_main_results = main_test_evaluation_results['Hybrid NCF']
              print("\nHybrid NCF Model:")
              for method_name, metrics in ncf_main_results.items():
                   print(f"  Reranking: {method_name.replace('_', ' ').title()}")
                   for metric, value in metrics.items():
                        print(f"    {metric}: {value:.4f}")

              if 'none' in ncf_main_results and 'smooth_xquad' in main_test_evaluation_results['Hybrid NCF']:
                   print("\n--- Hybrid NCF Reranking Comparison (Smooth XQuAD vs None) on Main Test Set (%) ---")
                   none_metrics = ncf_main_results['none']
                   smooth_xquad_metrics = main_test_evaluation_results['Hybrid NCF']['smooth_xquad']
                   for metric in none_metrics.keys():
                        if metric in smooth_xquad_metrics:
                             none_value = none_metrics[metric]
                             smooth_xquad_value = smooth_xquad_metrics[metric]
                             if none_value != 0:
                                  improvement = ((smooth_xquad_value - none_value) / none_value) * 100
                                  # Ensure percentage change is not misleading for diversity if baseline is 0
                                  if 'Diversity' in metric and none_value == 0:
                                       print(f"  {metric}: None baseline is 0. Smooth XQuAD value: {smooth_xquad_value:.4f}")
                                  else:
                                       print(f"  {metric}: {improvement:+.2f}% change")
                             else:
                                  # Handle division by zero for percentage change if baseline is 0
                                  print(f"  {metric}: 'None' baseline value is 0. Smooth XQuAD value: {smooth_xquad_value:.4f}")


    if user_study_test_results:
         print("\nResults on User Study Test Data:")
         if 'Hybrid NCF' in user_study_test_results:
              ncf_user_study_results = user_study_test_results['Hybrid NCF']
              print("\nHybrid NCF Model:")
              for method_name, metrics in ncf_user_study_results.items():
                   print(f"  Reranking: {method_name.replace('_', ' ').title()}")
                   for metric, value in metrics.items():
                        print(f"    {metric}: {value:.4f}")

              if 'none' in ncf_user_study_results and 'smooth_xquad' in user_study_test_results['Hybrid NCF']:
                   print("\n--- Hybrid NCF Reranking Comparison (Smooth XQuAD vs None) on User Study Data (%) ---")
                   none_metrics = ncf_user_study_results['none']
                   smooth_xquad_metrics = user_study_test_results['Hybrid NCF']['smooth_xquad']
                   for metric in none_metrics.keys():
                        if metric in smooth_xquad_metrics:
                             none_value = none_metrics[metric]
                             smooth_xquad_value = smooth_xquad_metrics[metric]
                             if none_value != 0:
                                  improvement = ((smooth_xquad_value - none_value) / none_value) * 100
                                  # Ensure percentage change is not misleading for diversity if baseline is 0
                                  if 'Diversity' in metric and none_value == 0:
                                       print(f"  {metric}: None baseline is 0. Smooth XQuAD value: {smooth_xquad_value:.4f}")
                                  else:
                                       print(f"  {metric}: {improvement:+.2f}% change")
                             else:
                                  # Handle division by zero for percentage change if baseline is 0
                                  print(f"  {metric}: 'None' baseline value is 0. Smooth XQuAD value: {smooth_xquad_value:.4f}")


    if not main_test_evaluation_results and not user_study_test_results:
        print("No evaluation results available.")


    # --- Generate Example Recommendations (Optional) ---
    print("\n--- Example Recommendations ---")
    # Use test users from main test set or user study if available
    example_users_df = None
    if test_df is not None and not test_df.empty:
        example_users_df = test_df
        print("Using users from Main Test Set for examples.")
    elif user_study_df is not None and not user_study_df.empty: # Use user study users if main test set is empty
        example_users_df = user_study_df
        print("Using users from User Study Data for examples.")
    else:
         print("No test users available for example recommendations.")


    # Ensure there are users to sample from and get original user IDs
    example_users_orig = []
    if example_users_df is not None and not example_users_df['user'].empty:
        # Get unique users who are also present in the recommender's user_id_map
        valid_example_users = example_users_df['user'].unique()
        valid_example_users_in_map = [user for user in valid_example_users if user in recommender.user_id_map]
        example_users_orig = valid_example_users_in_map[:min(3, len(valid_example_users_in_map))]


    if example_users_orig and recommender.hybrid_ncf_model is not None: # Only generate if users and model are available
        for user in example_users_orig:
             print(f"\nRecommendations for User: {user}")
             # Example using different reranking methods
             recs_none = recommender.recommend(user, n=N_RECOMMENDATIONS, rerank_method='none')
             print(f"  Hybrid NCF (None Reranking): {recs_none}")

             # Only try Smooth XQuAD if embeddings are available
             if recommender._get_ncf_item_embeddings() is not None:
                 recs_smooth_xquad = recommender.recommend(user, n=N_RECOMMENDATIONS, rerank_method='smooth_xquad')
                 print(f"  Hybrid NCF (Smooth XQuAD Reranking): {recs_smooth_xquad}")
             else:
                 print("  Smooth XQuAD Reranking skipped: NCF item embeddings not available.")

    else:
        print("No users or trained Hybrid NCF model available for example recommendations.")


    # --- Explicit Feature Importance Analysis (Uncommented Block) ---
    # This code block performs the analysis using a separate classifier.
    # It relies on the 'recommender' object and 'interactions_df' from the main execution.
    # Make sure these variables are accessible in your notebook environment.

    print("\n--- Starting Explicit Feature Importance Analysis ---")

    # Check if necessary data is available before proceeding with analysis
    if 'recommender' not in locals() or recommender is None or recommender.item_features_df is None or recommender.item_features_df.empty or not recommender.feature_columns:
         print("Item features or recommender object not ready. Cannot perform feature importance analysis.")
    elif 'interactions_df' not in locals() or interactions_df is None or interactions_df.empty:
         print("Interactions data not ready. Cannot perform feature importance analysis.")
    else:
        try:
            print('Preparing data for feature importance model...')
            # 1. Prepare a dataset of (Item Features) -> Interaction Label (1/0)
            #    - Positive Samples: Features of items from interactions_df (Label 1)
            #    - Negative Samples: Features of randomly sampled items that *were not* interacted with (Label 0)

            # Link positive interactions to item features
            # Ensure we only take features for items that were interacted with AND are in the features dataset
            positive_interactions_with_features = interactions_df.merge(
                recommender.item_features_df[['track_uri'] + recommender.feature_columns],
                left_on='item', right_on='track_uri', how='inner'
            ).drop(columns=['track_uri', 'track_name', 'artist_name']) # Keep user and item columns for sampling
            positive_interactions_with_features['label'] = 1

            if positive_interactions_with_features.empty:
                 print('No positive interaction samples with features found. Cannot perform this analysis.')
            else:
                print(f'Generated {len(positive_interactions_with_features)} positive interaction samples with features.')

                # 2. Generate negative samples
                

                all_item_uris_with_features = recommender.item_features_df['track_uri'].unique() # Pool of all items *with features*
                items_with_features_set = set(all_item_uris_with_features)

                # Get the set of all (user, item) pairs that ARE positive interactions
                positive_pairs = set(zip(positive_interactions_with_features['user'], positive_interactions_with_features['item']))

                users_for_neg_sampling = positive_interactions_with_features['user'].unique()

                neg_samples_ratio_fi = 1 # Ratio for Feature Importance analysis (can be different from NCF training)
                negative_samples_list_fi = []

                print('Generating negative samples for feature importance analysis...')
                

                for user_id in tqdm(users_for_neg_sampling, desc='Generating Negatives (FI)', leave=False):
                    interacted_items_by_user = {item for u, item in positive_pairs if u == user_id}
                    # Candidate negative items: all items *with features* MINUS items this user interacted with
                    negative_candidates_for_user_fi = list(items_with_features_set - interacted_items_by_user)

                    if negative_candidates_for_user_fi:
                        # Sample negative items for this user
                        num_pos_for_user_fi = len(interacted_items_by_user)
                        num_neg_to_sample_fi = min(len(negative_candidates_for_user_fi),
                                                   num_pos_for_user_fi * neg_samples_ratio_fi)

                        if num_neg_to_sample_fi > 0:
                            neg_items_fi = random.sample(negative_candidates_for_user_fi, num_neg_to_sample_fi)
                            for item_id in neg_items_fi:
                                negative_samples_list_fi.append({'user': user_id, 'item': item_id, 'label': 0})

                if not negative_samples_list_fi:
                     print('No negative samples generated for feature importance analysis. Cannot proceed.')
                else:
                    negative_samples_df_fi = pd.DataFrame(negative_samples_list_fi)

                    print(f'Generated {len(negative_samples_df_fi)} negative samples for feature importance analysis.')

                    # Link negative samples to item features
                    negative_samples_with_features_fi = negative_samples_df_fi.merge(
                        recommender.item_features_df[['track_uri'] + recommender.feature_columns],
                        left_on='item', right_on='track_uri', how='inner' # Ensure negative samples have features
                    ).drop(columns=['track_uri', 'user', 'item']) # Drop user/item as we predict from features

                    if negative_samples_with_features_fi.empty:
                         print('No negative samples found with linked item features for analysis. Cannot proceed.')
                    else:
                        negative_samples_with_features_fi['label'] = 0 # Ensure label is 0
                        print(f'Linked {len(negative_samples_with_features_fi)} negative samples with features for analysis.')


                        
                        positive_features_only_fi = positive_interactions_with_features[recommender.feature_columns + ['label']]
                        negative_features_only_fi = negative_samples_with_features_fi[recommender.feature_columns + ['label']]

                        
                        all_samples_features_fi = pd.concat([positive_features_only_fi, negative_features_only_fi], ignore_index=True)

                        if all_samples_features_fi.empty:
                             print('Combined samples dataset (features only) is empty for analysis. Cannot proceed.')
                        else:
                            print(f'Combined positive and negative samples (features only) for analysis: {len(all_samples_features_fi)}')

                            
                            X_fi = all_samples_features_fi[recommender.feature_columns]
                            y_fi = all_samples_features_fi['label']

                            
                            X_train_clf_fi, X_test_clf_fi, y_train_clf_fi, y_test_clf_fi = train_test_split_sklearn(X_fi, y_fi, test_size=0.25, random_state=42, stratify=y_fi)

                            # 3. Initialize and train a classifier model 
                            print('Training RandomForestClassifier for feature importance...')
                           
                            clf_model_fi = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced', n_jobs=-1)
                            clf_model_fi.fit(X_train_clf_fi, y_train_clf_fi)
                            print('RandomForestClassifier training complete.')

                            # 4. Get feature importance scores
                            feature_importances_fi = clf_model_fi.feature_importances_
                            feature_names_fi = recommender.feature_columns
                            importance_df_fi = pd.DataFrame({'Feature': feature_names_fi, 'Importance': feature_importances_fi})
                            importance_df_fi = importance_df_fi.sort_values('Importance', ascending=False).reset_index(drop=True)

                            
                            print('\nFeature Importance Scores (from RandomForestClassifier):')
                            print(importance_df_fi)

                           
                            print('\nEvaluating Classifier Model for Feature Importance Analysis...')
                           
                            y_pred_clf_fi = clf_model_fi.predict(X_test_clf_fi)
                            y_pred_proba_clf_fi = clf_model_fi.predict_proba(X_test_clf_fi)[:, 1] 

                           
                            accuracy_fi = accuracy_score(y_test_clf_fi, y_pred_clf_fi) 
                            print(f'Accuracy: {accuracy_fi:.4f}')

                            print('\nClassification Report:')
                            print(classification_report(y_test_clf_fi, y_pred_clf_fi))

                            # Check AUC and Average Precision 
                            try:
                                roc_auc_fi = roc_auc_score(y_test_clf_fi, y_pred_proba_clf_fi)
                                print(f'ROC AUC Score: {roc_auc_fi:.4f}')
                            except ValueError: # Handles case where only one class is present in y_test
                                print('ROC AUC score cannot be calculated as only one class is present in test samples for FI analysis.')

                            try:
                                avg_precision_fi = average_precision_score(y_test_clf_fi, y_pred_proba_clf_fi)
                                print(f'Average Precision Score: {avg_precision_fi:.4f}')
                            except ValueError: # Handles case where only one class is present in y_test
                                print('Average Precision score cannot be calculated as only one class is present in test samples for FI analysis.')

                            # Confusion Matrix
                            cm_fi = confusion_matrix(y_test_clf_fi, y_pred_clf_fi)
                            print('\nConfusion Matrix:')
                            print(cm_fi)

                            print('\nInterpretation of Feature Importance Scores:')
                            print('- Higher scores indicate features that were more influential in predicting whether a user interacted with an item.')
                            print('- This gives you insight into which explicit track characteristics are associated with user engagement in the MPD dataset.')
                            print('- Compare these scores to the implicit features learned by NCF and how they relate to the embedding space.')


        except Exception as e:
            print(f'An error occurred during explicit feature importance analysis: {e}')
            print('Please ensure all necessary libraries are installed and the code structure is correct.')


    


if __name__ == "__main__":
    main()

Configured memory growth for 1 GPU(s)
Loading data...


Loading MPD slices: 100%|██████████| 5/5 [00:01<00:00,  4.74it/s]


Loaded 333697 total interactions.

--- Loading Item Features ---
Loaded 114000 items with features from /kaggle/input/-spotify-tracks-dataset/dataset.csv.
After dropping duplicates by track_id: 89741 items.
Scaling numerical features: ['popularity', 'duration_ms', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'mode']
Loaded and processed 89741 items with 13 features.
Item ID map not yet created. Will align features after interaction matrix creation.


--- Splitting interaction data into train, validation, and test sets ---
Interactions for training: 226913
Interactions for validation: 40044
Interactions for testing: 66740

--- Training Hybrid NCF Model ---
   Mapped 5000 users and 91484 items.
   Aligning item features with internal item ID map (13 features)...
   Aligned features for 3085 items. Internal feature array shape: (91484, 13).
   Calculated popularity for 91484 items.
   Hybrid NCF model architecture

                                                                                

   Prepared 2026649 training samples for Hybrid NCF.
   Prepared 359948 validation samples for Hybrid NCF.
   Configured Early Stopping with patience=5.
   Fitting Hybrid NCF model for up to 30 epochs with Early Stopping (Batch Size: 512)...
Epoch 1/30
[1m3959/3959[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 7ms/step - accuracy: 0.9683 - loss: 0.2468 - val_accuracy: 0.9886 - val_loss: 0.0961
Epoch 2/30
[1m3959/3959[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 6ms/step - accuracy: 0.9885 - loss: 0.0974 - val_accuracy: 0.9886 - val_loss: 0.0828
Epoch 3/30
[1m3959/3959[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 6ms/step - accuracy: 0.9884 - loss: 0.0887 - val_accuracy: 0.9886 - val_loss: 0.0795
Epoch 4/30
[1m3959/3959[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 6ms/step - accuracy: 0.9885 - loss: 0.0877 - val_accuracy: 0.9886 - val_loss: 0.0791
Epoch 5/30
[1m3959/3959[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 6ms/step - accurac

                                                                    


  Evaluating with reranking method: Smooth Xquad


                                                                            


Evaluation finished in 3240.76 seconds.

--- Evaluating Hybrid NCF Model on User Study Data (/kaggle/working/user_study_interactions.csv) ---
Loading user study data from /kaggle/working/user_study_interactions.csv...
Loaded 641 interactions from user study.

--- Starting Evaluation (n=10) ---
No valid test interactions found for users/items seen during training mappings. Cannot evaluate.

--- Final Evaluation Results ---

Results on Main MPD Test Set:

Hybrid NCF Model:
  Reranking: None
    Precision@10: 0.0008
    Recall@10: 0.0007
    NDCG@10: 0.0009
    Average Diversity (Inverse Popularity): 0.1599
  Reranking: Smooth Xquad
    Precision@10: 0.0012
    Recall@10: 0.0010
    NDCG@10: 0.0012
    Average Diversity (Inverse Popularity): 0.2062

--- Hybrid NCF Reranking Comparison (Smooth XQuAD vs None) on Main Test Set (%) ---
  Precision@10: +51.28% change
  Recall@10: +38.26% change
  NDCG@10: +33.88% change
  Average Diversity (Inverse Popularity): +28.94% change

Results on User

                                                                              

Generated 34102 negative samples for feature importance analysis.
Linked 34102 negative samples with features for analysis.
Combined positive and negative samples (features only) for analysis: 68680
Training RandomForestClassifier for feature importance...
RandomForestClassifier training complete.

Feature Importance Scores (from RandomForestClassifier):
             Feature  Importance
0         popularity    0.608127
1        duration_ms    0.054477
2   instrumentalness    0.053629
3       acousticness    0.044969
4            valence    0.036858
5           loudness    0.034412
6       danceability    0.033766
7             energy    0.033125
8        speechiness    0.032162
9              tempo    0.029945
10          liveness    0.029643
11          explicit    0.004574
12              mode    0.004311

Evaluating Classifier Model for Feature Importance Analysis...
Accuracy: 0.9533

Classification Report:
              precision    recall  f1-score   support

           0       0.