In [1]:
import os

In [2]:
os.chdir("../")

In [3]:
%pwd

'd:\\recommendation-engine'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    model_name: str
    embedding_dim: int
    epochs: int
    learning_rate: float
    alpha: float
    k: int 

In [5]:
from src.hybrid_recommender.constants import *
from src.hybrid_recommender.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
        schema_filepath=SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.HybridRecommender

        create_directories([config.root_dir])

        return ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            model_name=config.model_name,
            embedding_dim=params.embedding_dim,
            epochs=params.epochs,
            learning_rate=params.learning_rate,
            alpha=params.alpha,
            k=params.k
        )

In [7]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from implicit.als import AlternatingLeastSquares
from implicit.nearest_neighbours import bm25_weight
from scipy.sparse import csr_matrix
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import joblib
from src.hybrid_recommender import logger

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
class HybridRecommender:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        self.user_encoder = None
        self.item_encoder = None
        self.organizer_encoder = None
        self.scaler = MinMaxScaler()
        
    def prepare_data(self, df):
        """Prepare data for training with event recommendation data"""
        # Create encoders
        self.user_encoder = {u: i for i, u in enumerate(df['user_id'].unique())}
        self.item_encoder = {m: i for i, m in enumerate(df['event_id'].unique())}
        self.organizer_encoder = {o: i for i, o in enumerate(df['organizer_id'].unique())}
        
        # Create inverse mappings
        self.user_decoder = {i: u for u, i in self.user_encoder.items()}
        self.item_decoder = {i: m for m, i in self.item_encoder.items()}
        self.organizer_decoder = {i: o for o, i in self.organizer_encoder.items()}
        
        # Map IDs to encoded values
        df['user_id_encoded'] = df['user_id'].map(self.user_encoder)
        df['event_id_encoded'] = df['event_id'].map(self.item_encoder)
        df['organizer_id_encoded'] = df['organizer_id'].map(self.organizer_encoder)
        
        # Create interaction score (weighted combination of different interactions)
        df['interaction_score'] = (
            df['is_booked'] * 0.5 + 
            df['num_bookings'] * 0.1 +
            df['is_liked'] * 0.2 +
            df['num_likes'] * 0.05 +
            df['is_commented'] * 0.3 +
            df['num_comments'] * 0.1
        )
        
        return df

    def build_hybrid_model(self, n_users, n_items, n_organizers):
        """Build hybrid neural network model with organizer information"""
        # User embedding path
        user_input = Input(shape=(1,), name='user_input')
        user_embedding = Embedding(n_users, self.config.embedding_dim, name='user_embedding')(user_input)
        user_vec = Flatten(name='user_flatten')(user_embedding)
        
        # Event embedding path
        event_input = Input(shape=(1,), name='event_input')
        event_embedding = Embedding(n_items, self.config.embedding_dim, name='event_embedding')(event_input)
        event_vec = Flatten(name='event_flatten')(event_embedding)
        
        # Organizer embedding path
        organizer_input = Input(shape=(1,), name='organizer_input')
        organizer_embedding = Embedding(n_organizers, self.config.embedding_dim//2, name='organizer_embedding')(organizer_input)
        organizer_vec = Flatten(name='organizer_flatten')(organizer_embedding)
        
        # Dot product of user and event embeddings
        dot_product = Dot(axes=1, name='dot_product')([user_vec, event_vec])
        
        # Combine with organizer information
        merged = Concatenate()([dot_product, organizer_vec])
        
        # Add dense layers
        dense = Dense(32, activation='relu')(merged)
        dense = Dense(16, activation='relu')(dense)
        
        # Final output
        output = Dense(1, activation='sigmoid', name='output')(dense)
        
        model = Model(inputs=[user_input, event_input, organizer_input], outputs=output)
        model.compile(optimizer=Adam(learning_rate=self.config.learning_rate), 
                     loss='binary_crossentropy',
                     metrics=['accuracy'])
        
        return model

    def train_implicit_model(self, user_items):
        """Train ALS model from implicit"""
        # Apply BM25 weighting
        weighted = bm25_weight(user_items, K1=100, B=0.8)
        
        # Initialize model
        model = AlternatingLeastSquares(
            factors=self.config.embedding_dim,
            iterations=self.config.epochs,
            regularization=self.config.alpha,
            random_state=42
        )
        
        # Train model
        model.fit(weighted)
        return model

    def train(self):
        """Train hybrid recommendation system"""
        # Load and prepare data
        df = pd.read_csv(self.config.train_data_path)
        df = self.prepare_data(df)
        
        # Create user-item matrix for implicit
        user_items = csr_matrix(
            (df['interaction_score'].values,
             (df['user_id_encoded'], df['event_id_encoded'])),
            shape=(len(self.user_encoder), len(self.item_encoder))
        )
        
        # Train implicit ALS model
        logger.info("Training implicit ALS model...")
        implicit_model = self.train_implicit_model(user_items)
        
        # Train neural network model
        logger.info("Training neural network hybrid model...")
        n_users = len(self.user_encoder)
        n_items = len(self.item_encoder)
        n_organizers = len(self.organizer_encoder)
        
        nn_model = self.build_hybrid_model(n_users, n_items, n_organizers)
        
        # Prepare data for NN
        X_user = df['user_id_encoded'].values
        X_event = df['event_id_encoded'].values
        X_organizer = df['organizer_id_encoded'].values
        y = df['interaction_score'].values
        
        # Normalize interaction scores
        y = self.scaler.fit_transform(y.reshape(-1, 1)).flatten()
        
        # Train NN
        nn_model.fit(
            [X_user, X_event, X_organizer], y,
            epochs=self.config.epochs,
            batch_size=64,
            validation_split=0.1,
            verbose=1
        )
        
        # Save models
        logger.info("Saving models...")
        models_dir = self.config.root_dir
        joblib.dump(implicit_model, os.path.join(models_dir, 'implicit_model.joblib'))
        nn_model.save(os.path.join(models_dir, 'nn_model.h5'))
        joblib.dump(self.scaler, os.path.join(models_dir, 'scaler.joblib'))
        joblib.dump({
            'user_encoder': self.user_encoder,
            'item_encoder': self.item_encoder,
            'organizer_encoder': self.organizer_encoder
        }, os.path.join(models_dir, 'encoders.joblib'))
        
        logger.info("Training completed!")

    def recommend(self, user_id, n_recommendations=10):
        """Generate recommendations for a user"""
        # Load models
        models_dir = self.config.root_dir
        implicit_model = joblib.load(os.path.join(models_dir, 'implicit_model.joblib'))
        nn_model = tf.keras.models.load_model(os.path.join(models_dir, 'nn_model.h5'))
        scaler = joblib.load(os.path.join(models_dir, 'scaler.joblib'))
        encoders = joblib.load(os.path.join(models_dir, 'encoders.joblib'))
        
        self.user_encoder = encoders['user_encoder']
        self.item_encoder = encoders['item_encoder']
        self.organizer_encoder = encoders['organizer_encoder']
        self.user_decoder = {i: u for u, i in self.user_encoder.items()}
        self.item_decoder = {i: m for m, i in self.item_encoder.items()}
        self.organizer_decoder = {i: o for o, i in self.organizer_encoder.items()}
        
        # Get user encoded ID
        user_encoded = self.user_encoder.get(user_id)
        if user_encoded is None:
            return []  # Return empty list for cold-start users
        
        # Get implicit recommendations
        implicit_recs = implicit_model.recommend(
            user_encoded, 
            coo_matrix((len(self.user_encoder), len(self.item_encoder))),
            N=n_recommendations*3  # Get more candidates for NN to score
        )
        
        # Get organizer IDs for the recommended events
        recommended_events = [self.item_decoder[item] for item in implicit_recs[0]]
        # In a real implementation, you'd need a way to get organizer_id for these events
        
        # For demo purposes, we'll create dummy organizer IDs
        organizer_array = np.random.randint(0, len(self.organizer_encoder), size=len(implicit_recs[0]))
        
        # Score with neural network
        user_array = np.array([user_encoded] * len(implicit_recs[0]))
        event_array = np.array(implicit_recs[0])
        
        nn_scores = nn_model.predict([user_array, event_array, organizer_array])
        nn_scores = scaler.inverse_transform(nn_scores.reshape(-1, 1)).flatten()
        
        # Combine and sort (weighted combination of implicit and NN scores)
        combined_scores = implicit_recs[1] * 0.6 + nn_scores * 0.4
        top_indices = np.argsort(combined_scores)[::-1][:n_recommendations]
        
        # Return recommendations with scores
        recommendations = []
        for idx in top_indices:
            event_id = self.item_decoder[implicit_recs[0][idx]]
            score = combined_scores[idx]
            recommendations.append((event_id, score))
        
        return recommendations

In [9]:
try:
    config = ConfigurationManager()
    recommender_config = config.get_model_trainer_config()
    recommender = HybridRecommender(config=recommender_config)
    recommender.train()
except Exception as e:
    logger.exception("Error in training hybrid recommender")
    raise e

[2025-06-30 23:18:58,497: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-06-30 23:18:58,500: INFO: common: yaml file: params.yaml loaded successfully]
[2025-06-30 23:18:58,502: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-06-30 23:18:58,503: INFO: common: created directory at: artifacts]
[2025-06-30 23:18:58,503: INFO: common: created directory at: artifacts/model_trainer]
[2025-06-30 23:18:58,787: INFO: 302607816: Training implicit ALS model...]


  check_blas_config()
100%|██████████| 20/20 [00:00<00:00, 51.95it/s]

[2025-06-30 23:18:59,320: INFO: 302607816: Training neural network hybrid model...]





Epoch 1/20
[1m2110/2110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 8ms/step - accuracy: 0.2967 - loss: 0.3707 - val_accuracy: 0.3015 - val_loss: 0.3006
Epoch 2/20
[1m2110/2110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 8ms/step - accuracy: 0.3032 - loss: 0.2784 - val_accuracy: 0.3015 - val_loss: 0.3148
Epoch 3/20
[1m2110/2110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 8ms/step - accuracy: 0.3019 - loss: 0.2548 - val_accuracy: 0.3015 - val_loss: 0.3110
Epoch 4/20
[1m2110/2110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 8ms/step - accuracy: 0.3014 - loss: 0.2418 - val_accuracy: 0.3015 - val_loss: 0.3177
Epoch 5/20
[1m2110/2110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 8ms/step - accuracy: 0.3014 - loss: 0.2419 - val_accuracy: 0.3015 - val_loss: 0.3171
Epoch 6/20
[1m2110/2110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 7ms/step - accuracy: 0.3007 - loss: 0.2418 - val_accuracy: 0.3015 - val_loss: 0.3165
Epoch 7/20

In [10]:
recommendations = recommender.recommend(user_id=12345, n_recommendations=10)


