In [1]:

import os

In [2]:
os.chdir("../")

In [3]:
%pwd

'd:\\recommendation-engine'

In [4]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    implicit_model_path: Path
    nn_model_path: Path
    scaler_path: Path
    encoders_path: Path
    all_params: dict
    metrics_file_name: Path
    k: int

In [5]:
from src.hybrid_recommender.constants import *
from src.hybrid_recommender.utils.common import read_yaml, create_directories, save_json

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        params = self.params.HybridRecommender

        create_directories([config.root_dir])

        return ModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            implicit_model_path=config.implicit_model_path,
            nn_model_path=config.nn_model_path,
            scaler_path=config.scaler_path,
            encoders_path=config.encoders_path,
            all_params=params,
            metrics_file_name=config.metrics_file_name,
            k=params.k
        )

In [None]:
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
from typing import Dict, Tuple, List
import json
from scipy.sparse import csr_matrix
from src.hybrid_recommender import logger

In [None]:
class HybridRecommenderEvaluator:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
        self.load_models()
        
    def load_models(self):
        """Load all required models and encoders"""
        self.implicit_model = joblib.load(self.config.implicit_model_path)
        self.nn_model = tf.keras.models.load_model(self.config.nn_model_path)
        self.scaler = joblib.load(self.config.scaler_path)
        encoders = joblib.load(self.config.encoders_path)
        
        self.user_encoder = encoders['user_encoder']
        self.item_encoder = encoders['item_encoder']
        self.organizer_encoder = encoders['organizer_encoder']
        
        # Create inverse mappings
        self.user_decoder = {i: u for u, i in self.user_encoder.items()}
        self.item_decoder = {i: m for m, i in self.item_encoder.items()}
        self.organizer_decoder = {i: o for o, i in self.organizer_encoder.items()}

    def evaluate_recommendations(self, test_data: pd.DataFrame) -> Dict[str, float]:
        """Evaluate recommendation quality using multiple metrics"""
        # Filter test data to only include known users/events
        test_data = test_data[
            test_data['user_id'].isin(self.user_encoder) & 
            test_data['event_id'].isin(self.item_encoder)
        ]
        
        # Group by user and get actual interactions
        user_events = test_data.groupby('user_id')['event_id'].apply(set).to_dict()
        
        # Calculate metrics
        metrics = {
            'precision@k': [],
            'recall@k': [],
            'ndcg@k': [],
            'map@k': [],
            'coverage': self.calculate_coverage(test_data),
            'popularity_bias': self.calculate_popularity_bias(test_data)
        }
        
        for user_id, actual_events in user_events.items():
            recommended = self._recommend(user_id)
            metrics['precision@k'].append(self._precision(actual_events, recommended))
            metrics['recall@k'].append(self._recall(actual_events, recommended))
            metrics['ndcg@k'].append(self._ndcg(actual_events, recommended))
            metrics['map@k'].append(self._average_precision(actual_events, recommended))
        
        # Aggregate metrics
        results = {
            'precision@k': np.mean(metrics['precision@k']),
            'recall@k': np.mean(metrics['recall@k']),
            'ndcg@k': np.mean(metrics['ndcg@k']),
            'map@k': np.mean(metrics['map@k']),
            'coverage': metrics['coverage'],
            'popularity_bias': metrics['popularity_bias'],
            'f1_score': 2 * (metrics['precision@k'] * metrics['recall@k']) / 
                        (metrics['precision@k'] + metrics['recall@k'])
                        if (metrics['precision@k'] + metrics['recall@k']) > 0 else 0
        }
        
        return results

    def _precision(self, actual: set, recommended: list) -> float:
        """Calculate precision@k"""
        relevant = len(set(recommended) & actual)
        return relevant / len(recommended) if recommended else 0

    def _recall(self, actual: set, recommended: list) -> float:
        """Calculate recall@k"""
        relevant = len(set(recommended) & actual)
        return relevant / len(actual) if actual else 0

    def _ndcg(self, actual: set, recommended: list) -> float:
        """Calculate Normalized Discounted Cumulative Gain"""
        relevances = [1 if event in actual else 0 for event in recommended]
        discounts = np.log2(np.arange(2, len(relevances) + 2))
        dcg = np.sum(relevances / discounts)
        ideal_relevances = [1] * min(len(actual), len(recommended))
        idcg = np.sum(ideal_relevances / np.log2(np.arange(2, len(ideal_relevances) + 2)))
        return dcg / idcg if idcg > 0 else 0

    def _average_precision(self, actual: set, recommended: list) -> float:
        """Calculate Average Precision"""
        relevant = []
        for i, event in enumerate(recommended):
            if event in actual:
                relevant.append(self._precision(actual, recommended[:i+1]))
        return np.mean(relevant) if relevant else 0

    def calculate_coverage(self, test_data: pd.DataFrame) -> float:
        """Calculate what percentage of events can be recommended"""
        all_events = set(self.item_decoder.values())
        recommended_events = set()
        
        for user_id in test_data['user_id'].unique():
            recommended_events.update(self._recommend(user_id))
        
        return len(recommended_events) / len(all_events)

    def calculate_popularity_bias(self, test_data: pd.DataFrame) -> float:
        """Calculate how biased recommendations are toward popular events"""
        event_popularity = test_data['event_id'].value_counts().to_dict()
        recommendations_popularity = []
        
        for user_id in test_data['user_id'].unique():
            for event_id in self._recommend(user_id):
                recommendations_popularity.append(event_popularity.get(event_id, 0))
        
        avg_rec_pop = np.mean(recommendations_popularity) if recommendations_popularity else 0
        avg_all_pop = np.mean(list(event_popularity.values()))
        
        return avg_rec_pop / avg_all_pop if avg_all_pop > 0 else 0

    def _recommend(self, user_id: int) -> List[str]:
        """Generate recommendations for a single user"""
        user_encoded = self.user_encoder[user_id]
        
        user_items = csr_matrix((1, len(self.item_encoder)), dtype=np.float32)
        
        # Get implicit recommendations
        implicit_recs = self.implicit_model.recommend(
            userid=user_encoded,
            user_items=user_items,
            N=self.config.k * 3,
            filter_already_liked_items=False
        )
        
        # Get organizer IDs for recommended events
        recommended_events = [self.item_decoder[item] for item in implicit_recs[0]]
        organizer_ids = [self._get_organizer_for_event(event) for event in recommended_events]
        organizer_encoded = [self.organizer_encoder.get(o, 0) for o in organizer_ids]
        
        # Score with neural network
        user_array = np.array([user_encoded] * len(implicit_recs[0]))
        event_array = np.array(implicit_recs[0])
        organizer_array = np.array(organizer_encoded)
        
        nn_scores = self.nn_model.predict(
            [user_array, event_array, organizer_array], 
            verbose=0
        )
        nn_scores = self.scaler.inverse_transform(nn_scores.reshape(-1, 1)).flatten()
        
        # Combine and sort
        combined_scores = implicit_recs[1] * 0.6 + nn_scores * 0.4
        top_indices = np.argsort(combined_scores)[::-1][:self.config.k]
        
        return [self.item_decoder[implicit_recs[0][i]] for i in top_indices]

    def _get_organizer_for_event(self, event_id: str) -> str:
        """Helper method to get organizer for an event"""
        # Implement actual organizer lookup here
        return str(hash(event_id) % 1000)

    def save_results(self):
        """Run evaluation and save metrics"""
        test_data = pd.read_csv(self.config.test_data_path)
        metrics = self.evaluate_recommendations(test_data)
        
        full_results = {
            **metrics,
            "model_parameters": self.config.all_params,
            "num_users": len(self.user_encoder),
            "num_events": len(self.item_encoder),
            "num_organizers": len(self.organizer_encoder),
            "evaluation_time": pd.Timestamp.now().isoformat()
        }
        
        with open(self.config.metrics_file_name, 'w') as f:
            json.dump(full_results, f, indent=4)
        
        logger.info(f"Evaluation results saved to {self.config.metrics_file_name}")

In [9]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    evaluator = HybridRecommenderEvaluator(config=model_evaluation_config)
    evaluator.save_results()
except Exception as e:
    logger.exception("Error during model evaluation")
    raise e

[2025-06-30 23:12:39,373: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-06-30 23:12:39,375: INFO: common: yaml file: params.yaml loaded successfully]
[2025-06-30 23:12:39,376: INFO: common: created directory at: artifacts]
[2025-06-30 23:12:39,377: INFO: common: created directory at: artifacts/model_evaluation]


  from .autonotebook import tqdm as notebook_tqdm


[2025-06-30 23:12:39,778: ERROR: 1754327610: Error during model evaluation]
Traceback (most recent call last):
  File "C:\Users\newst\AppData\Local\Temp\ipykernel_4072\1754327610.py", line 5, in <module>
    evaluator.save_results()
  File "C:\Users\newst\AppData\Local\Temp\ipykernel_4072\548256017.py", line 157, in save_results
    metrics = self.evaluate_recommendations(test_data)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\newst\AppData\Local\Temp\ipykernel_4072\548256017.py", line 39, in evaluate_recommendations
    'coverage': self.calculate_coverage(test_data),
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\newst\AppData\Local\Temp\ipykernel_4072\548256017.py", line 98, in calculate_coverage
    recommended_events.update(self._recommend(user_id))
                              ^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\newst\AppData\Local\Temp\ipykernel_4072\548256017.py", line 121, in _recommend
    implicit_recs = self.implicit_mod

ValueError: user_items needs to be a CSR sparse matrix