In [1]:
import os

In [2]:
os.chdir("../")

In [3]:
%pwd

'd:\\recommendation-engine'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    model_name: str
    embedding_dim: int
    epochs: int
    learning_rate: float
    alpha: float
    k: int 

In [5]:
from src.hybrid_recommender.constants import *
from src.hybrid_recommender.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
        schema_filepath=SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.HybridRecommender

        create_directories([config.root_dir])

        return ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            model_name=config.model_name,
            embedding_dim=params.embedding_dim,
            epochs=params.epochs,
            learning_rate=params.learning_rate,
            alpha=params.alpha,
            k=params.k
        )

In [7]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from implicit.als import AlternatingLeastSquares
from implicit.nearest_neighbours import bm25_weight
from scipy.sparse import coo_matrix  # Added import
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import joblib
from src.hybrid_recommender import logger

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
class HybridRecommender:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        self.user_encoder = None
        self.item_encoder = None
        self.scaler = MinMaxScaler()
        
    def prepare_data(self, df):
        """Prepare data for training"""
        # Create user and item encoders
        self.user_encoder = {u: i for i, u in enumerate(df['User-ID'].unique())}
        self.item_encoder = {m: i for i, m in enumerate(df['ISBN'].unique())}
        
        # Create inverse mappings
        self.user_decoder = {i: u for u, i in self.user_encoder.items()}
        self.item_decoder = {i: m for m, i in self.item_encoder.items()}
        
        # Map IDs to encoded values
        df['user_id'] = df['User-ID'].map(self.user_encoder)
        df['item_id'] = df['ISBN'].map(self.item_encoder)
        
        return df

    def build_hybrid_model(self, n_users, n_items):
        """Build hybrid neural network model"""
        # User embedding path
        user_input = Input(shape=(1,), name='user_input')
        user_embedding = Embedding(n_users, self.config.embedding_dim, name='user_embedding')(user_input)
        user_vec = Flatten(name='user_flatten')(user_embedding)
        
        # Item embedding path
        item_input = Input(shape=(1,), name='item_input')
        item_embedding = Embedding(n_items, self.config.embedding_dim, name='item_embedding')(item_input)
        item_vec = Flatten(name='item_flatten')(item_embedding)
        
        # Dot product of user and item embeddings
        dot_product = Dot(axes=1, name='dot_product')([user_vec, item_vec])
        
        # Additional features can be added here
        merged = dot_product
        
        # Final output
        output = Dense(1, activation='sigmoid', name='output')(merged)
        
        model = Model(inputs=[user_input, item_input], outputs=output)
        model.compile(optimizer=Adam(learning_rate=self.config.learning_rate), 
                     loss='binary_crossentropy',
                     metrics=['accuracy'])
        
        return model

    def train_implicit_model(self, user_items):
        """Train ALS model from implicit"""
        # Apply BM25 weighting
        weighted = bm25_weight(user_items, K1=100, B=0.8)
        
        # Initialize model
        model = AlternatingLeastSquares(
            factors=self.config.embedding_dim,
            iterations=self.config.epochs,
            regularization=self.config.alpha,
            random_state=42
        )
        
        # Train model
        model.fit(weighted)
        return model

    def train(self):
        """Train hybrid recommendation system"""
        # Load and prepare data
        df = pd.read_csv(self.config.train_data_path)
        df = self.prepare_data(df)
        
        # Create user-item matrix for implicit
        user_items = coo_matrix(
            (np.ones(len(df)), 
             (df['user_id'], df['item_id'])),
            shape=(len(self.user_encoder), len(self.item_encoder))
        )
        
        # Train implicit ALS model
        logger.info("Training implicit ALS model...")
        implicit_model = self.train_implicit_model(user_items)
        
        # Train neural network model
        logger.info("Training neural network hybrid model...")
        n_users = len(self.user_encoder)
        n_items = len(self.item_encoder)
        
        nn_model = self.build_hybrid_model(n_users, n_items)
        
        # Prepare data for NN
        X_user = df['user_id'].values
        X_item = df['item_id'].values
        y = df['Book-Rating'].values
        
        # Normalize ratings
        y = self.scaler.fit_transform(y.reshape(-1, 1)).flatten()
        
        # Train NN
        nn_model.fit(
            [X_user, X_item], y,
            epochs=self.config.epochs,
            batch_size=64,
            validation_split=0.1,
            verbose=1
        )
        
        # Save models
        logger.info("Saving models...")
        models_dir = self.config.root_dir
        joblib.dump(implicit_model, os.path.join(models_dir, 'implicit_model.joblib'))
        nn_model.save(os.path.join(models_dir, 'nn_model.h5'))
        joblib.dump(self.scaler, os.path.join(models_dir, 'scaler.joblib'))
        joblib.dump({
            'user_encoder': self.user_encoder,
            'item_encoder': self.item_encoder
        }, os.path.join(models_dir, 'encoders.joblib'))
        
        logger.info("Training completed!")

    def recommend(self, user_id, n_recommendations=10):
        """Generate recommendations for a user"""
        # Load models
        models_dir = self.config.root_dir
        implicit_model = joblib.load(os.path.join(models_dir, 'implicit_model.joblib'))
        nn_model = tf.keras.models.load_model(os.path.join(models_dir, 'nn_model.h5'))
        scaler = joblib.load(os.path.join(models_dir, 'scaler.joblib'))
        encoders = joblib.load(os.path.join(models_dir, 'encoders.joblib'))
        
        self.user_encoder = encoders['user_encoder']
        self.item_encoder = encoders['item_encoder']
        self.user_decoder = {i: u for u, i in self.user_encoder.items()}
        self.item_decoder = {i: m for m, i in self.item_encoder.items()}
        
        # Get user encoded ID
        user_encoded = self.user_encoder.get(user_id)
        if user_encoded is None:
            return []  # Return empty list for cold-start users
        
        # Get implicit recommendations
        implicit_recs = implicit_model.recommend(
            user_encoded, 
            coo_matrix((len(self.user_encoder), len(self.item_encoder))),
            N=n_recommendations*2
        )
        
        # Score with neural network
        user_array = np.array([user_encoded] * len(implicit_recs[0]))
        item_array = np.array(implicit_recs[0])
        
        nn_scores = nn_model.predict([user_array, item_array])
        nn_scores = scaler.inverse_transform(nn_scores.reshape(-1, 1)).flatten()
        
        # Combine and sort
        combined_scores = implicit_recs[1] * nn_scores
        top_indices = np.argsort(combined_scores)[::-1][:n_recommendations]
        
        # Return recommendations
        return [self.item_decoder[item] for item in item_array[top_indices]]

In [9]:
try:
    config = ConfigurationManager()
    recommender_config = config.get_model_trainer_config()
    recommender = HybridRecommender(config=recommender_config)
    recommender.train()
except Exception as e:
    logger.exception("Error in training hybrid recommender")
    raise e

[2025-06-17 06:21:36,164: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-06-17 06:21:36,166: INFO: common: yaml file: params.yaml loaded successfully]
[2025-06-17 06:21:36,166: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-06-17 06:21:36,166: INFO: common: created directory at: artifacts]
[2025-06-17 06:21:36,171: INFO: common: created directory at: artifacts/model_trainer]
[2025-06-17 06:21:40,506: INFO: 1614537693: Training implicit ALS model...]


  check_blas_config()
100%|██████████| 20/20 [00:03<00:00,  5.26it/s]


[2025-06-17 06:21:46,308: INFO: 1614537693: Training neural network hybrid model...]
Epoch 1/20
[1m 2527/10876[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m23:18[0m 168ms/step - accuracy: 0.6229 - loss: 0.6339

KeyboardInterrupt: 