In [63]:
#!/usr/bin/env python3
"""
Deep Learning Models for Customer Satisfaction Prediction
Implements and compares 5 different deep learning architectures
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, roc_auc_score
from sklearn.metrics import precision_recall_fscore_support
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
    Dense, LSTM, Bidirectional, Conv1D, MaxPooling1D, GlobalMaxPooling1D,
    Embedding, Dropout, Input, concatenate, Attention, MultiHeadAttention,
    LayerNormalization, Add, GlobalAveragePooling1D
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical
import pickle
import warnings
import os
import json
from datetime import datetime
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [64]:


# Load preprocessed data
print("Loading preprocessed data...")
data = np.load('data/preprocessed_data.npz')

X_num_train = data['X_num_train']
X_num_val = data['X_num_val']
X_num_test = data['X_num_test']
X_text_train = data['X_text_train']
X_text_val = data['X_text_val']
X_text_test = data['X_text_test']
X_num_train_balanced = data['X_num_train_balanced']
X_text_train_balanced = data['X_text_train_balanced']
y_train = data['y_train']
y_val = data['y_val']
y_test = data['y_test']
y_train_balanced = data['y_train_balanced']

# Load metadata
with open('data/metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

vocab_size = metadata['vocab_size']
max_len = metadata['max_sequence_length']
num_features = len(metadata['feature_columns'])
num_classes = metadata['num_classes']
class_weights = metadata['class_weights']
class_names = metadata['class_names']

print(f"Vocab size: {vocab_size}")
print(f"Max sequence length: {max_len}")
print(f"Number of features: {num_features}")
print(f"Number of classes: {num_classes}")
print(f"Training with balanced data: {X_text_train_balanced.shape[0]} samples")


Loading preprocessed data...
Vocab size: 27319
Max sequence length: 100
Number of features: 14
Number of classes: 5
Training with balanced data: 19868 samples


In [65]:
from tensorflow.keras.layers import (
    Input, Embedding, Conv1D, MaxPooling1D, LSTM, Dense, 
    Dropout, concatenate, BatchNormalization
)
from tensorflow.keras.models import Model
class DeepLearningModels:
    def __init__(self, vocab_size, max_len, num_features, num_classes, embedding_dim=100):
        self.vocab_size = vocab_size
        self.max_len = max_len
        self.num_features = num_features
        self.num_classes = num_classes
        self.embedding_dim = embedding_dim
        self.models = {}
        self.histories = {}
        
    def create_lstm_model(self):
        """Model 1: LSTM-based RNN for sequential text processing"""
        # Text input branch
        text_input = Input(shape=(self.max_len,), name='text_input')
        text_embedding = Embedding(self.vocab_size, self.embedding_dim, mask_zero=True)(text_input)
        text_lstm = LSTM(64, dropout=0.3, recurrent_dropout=0.3)(text_embedding)
        
        # Numerical input branch
        num_input = Input(shape=(self.num_features,), name='numerical_input')
        num_dense = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(num_input)
        num_dropout = Dropout(0.3)(num_dense)
        
        # Combine branches
        combined = concatenate([text_lstm, num_dropout])
        hidden = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(combined)
        hidden = Dropout(0.4)(hidden)
        hidden = Dense(32, activation='relu')(hidden)
        output = Dense(self.num_classes, activation='softmax')(hidden)
        
        model = Model(inputs=[text_input, num_input], outputs=output)
        return model
    
    def create_bilstm_attention_model(self):
        """Model 2: Bidirectional LSTM with attention mechanism"""
        # Text input branch
        text_input = Input(shape=(self.max_len,), name='text_input')
        text_embedding = Embedding(self.vocab_size, self.embedding_dim, mask_zero=True)(text_input)
        
        # Bidirectional LSTM
        bilstm = Bidirectional(LSTM(64, dropout=0.3, recurrent_dropout=0.3, return_sequences=True))(text_embedding)
        
        # Self-attention mechanism
        attention = MultiHeadAttention(num_heads=4, key_dim=64)(bilstm, bilstm)
        attention = Add()([bilstm, attention])
        attention = LayerNormalization()(attention)
        
        # Global pooling
        text_features = GlobalAveragePooling1D()(attention)
        
        # Numerical input branch
        num_input = Input(shape=(self.num_features,), name='numerical_input')
        num_dense = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(num_input)
        num_dropout = Dropout(0.3)(num_dense)
        
        # Combine branches
        combined = concatenate([text_features, num_dropout])
        hidden = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(combined)
        hidden = Dropout(0.4)(hidden)
        hidden = Dense(64, activation='relu')(hidden)
        hidden = Dropout(0.3)(hidden)
        output = Dense(self.num_classes, activation='softmax')(hidden)
        
        model = Model(inputs=[text_input, num_input], outputs=output)
        return model

    def create_fast_bilstm_attention_model(self):
        """Optimized BiLSTM with Attention Model for Speed"""
        # Text input branch
        text_input = Input(shape=(self.max_len,), name='text_input')
        
        # Optimized embedding with smaller dimension
        text_embedding = Embedding(
            self.vocab_size, 
            min(128, self.embedding_dim),  # Reduced embedding size
            mask_zero=True
        )(text_input)
        
        # Faster Bidirectional LSTM
        bilstm = Bidirectional(
            LSTM(64,  # Maintained size but optimized settings
                dropout=0.2,  # Reduced dropout
                recurrent_dropout=0.1,  # Reduced recurrent dropout
                return_sequences=True,
                activation='tanh',  # Faster than default
                recurrent_activation='sigmoid')  # Faster than default
        )(text_embedding)
        
        # Optimized attention mechanism
        attention = MultiHeadAttention(
            num_heads=2,  # Reduced heads
            key_dim=64,  # Matches LSTM units
            dropout=0.1  # Added attention dropout
        )(bilstm, bilstm)
        
        # Simplified residual connection
        attention = Add()([bilstm, attention])
        
        # Removed LayerNorm for speed (optional: can keep if critical for performance)
        
        # Efficient context extraction
        text_features = GlobalAveragePooling1D()(attention)
        
        # Numerical input branch (optimized)
        num_input = Input(shape=(self.num_features,), name='numerical_input')
        num_dense = Dense(32, activation='relu')(num_input)  # Removed regularizer
        
        # Combine branches
        combined = concatenate([text_features, num_dense])
        
        # Optimized classifier head
        hidden = Dense(64, activation='relu')(combined)  # Smaller layer
        hidden = Dropout(0.3)(hidden)
        output = Dense(self.num_classes, activation='softmax')(hidden)
        
        model = Model(inputs=[text_input, num_input], outputs=output)
        return model
        
    def create_robust_model(self):
        # Text
        text_input = Input(shape=(self.max_len,))
        x = Embedding(self.vocab_size, 128)(text_input)
        x = Bidirectional(LSTM(64))(x)
        
        # Numerical
        num_input = Input(shape=(self.num_features,))
        y = Dense(64)(num_input)
        
        # Combined
        z = concatenate([x, y])
        z = Dense(128, activation='relu')(z)
        output = Dense(self.num_classes, activation='softmax')(z)
        
        model = Model(inputs=[text_input, num_input], outputs=output)
        model.compile(
            optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        return model

    
    from keras.layers import Attention  # Use Keras' built-in attention

    def create_accurate_bilstm_attention_model(self):
        """High-performance BiLSTM with Attention"""
        # Text input branch
        text_input = Input(shape=(self.max_len,), name='text_input')
        text_embedding = Embedding(
            self.vocab_size,
            self.embedding_dim * 2,  # Increased capacity
            mask_zero=True
        )(text_input)
        
        # Enhanced Bidirectional LSTM
        bilstm = Bidirectional(
            LSTM(128,  # Doubled units
                dropout=0.3,
                recurrent_dropout=0.25,
                return_sequences=True,
                kernel_regularizer=l2(1e-4))  # Added regularization
        )(text_embedding)
        bilstm = BatchNormalization()(bilstm)  # Stabilizes training
        
        # Powerful attention mechanism
        attention = MultiHeadAttention(
            num_heads=8,  # More attention heads
            key_dim=128,  # Matches LSTM units
            dropout=0.2,
            kernel_regularizer=l2(1e-4)
        )(bilstm, bilstm)
        
        # Residual connection with layer norm
        attention = Add()([bilstm, attention])
        attention = LayerNormalization()(attention)
        
        # Context extraction
        text_features = GlobalAveragePooling1D()(attention)
        
        # Enhanced numerical branch
        num_input = Input(shape=(self.num_features,), name='numerical_input')
        num_dense = Dense(64, activation='relu', kernel_regularizer=l2(1e-4))(num_input)
        num_dense = BatchNormalization()(num_dense)
        
        # Feature fusion
        combined = concatenate([text_features, num_dense])
        combined = Dropout(0.4)(combined)
        
        # Deep classifier head
        hidden = Dense(256, activation='relu', kernel_regularizer=l2(1e-4))(combined)
        hidden = BatchNormalization()(hidden)
        hidden = Dropout(0.4)(hidden)
        hidden = Dense(128, activation='relu')(hidden)
        output = Dense(self.num_classes, activation='softmax')(hidden)
        
        model = Model(inputs=[text_input, num_input], outputs=output)
        return model
          
    def create_cnn_model(self):
        """Model 3: CNN for text classification with multiple filter sizes"""
        # Text input branch
        text_input = Input(shape=(self.max_len,), name='text_input')
        text_embedding = Embedding(self.vocab_size, self.embedding_dim)(text_input)
        
        # Multiple CNN branches with different filter sizes
        conv_branches = []
        filter_sizes = [3, 4, 5]
        
        for filter_size in filter_sizes:
            conv = Conv1D(64, filter_size, activation='relu', padding='same')(text_embedding)
            conv = Dropout(0.3)(conv)
            conv = MaxPooling1D(2)(conv)
            conv = Conv1D(32, filter_size, activation='relu', padding='same')(conv)
            conv = GlobalMaxPooling1D()(conv)
            conv_branches.append(conv)
        
        # Combine CNN branches
        if len(conv_branches) > 1:
            text_features = concatenate(conv_branches)
        else:
            text_features = conv_branches[0]
        
        # Numerical input branch
        num_input = Input(shape=(self.num_features,), name='numerical_input')
        num_dense = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(num_input)
        num_dropout = Dropout(0.3)(num_dense)
        
        # Combine branches
        combined = concatenate([text_features, num_dropout])
        hidden = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(combined)
        hidden = Dropout(0.4)(hidden)
        hidden = Dense(64, activation='relu')(hidden)
        output = Dense(self.num_classes, activation='softmax')(hidden)
        
        model = Model(inputs=[text_input, num_input], outputs=output)
        return model
    
    def create_transformer_model(self):
        """Model 4: Transformer-based model (simplified BERT-like architecture)"""
        # Text input branch
        text_input = Input(shape=(self.max_len,), name='text_input')
        text_embedding = Embedding(self.vocab_size, self.embedding_dim)(text_input)
        
        # Positional encoding (simplified)
        positions = tf.range(start=0, limit=self.max_len, delta=1)
        position_embedding = Embedding(self.max_len, self.embedding_dim)(positions)
        text_embedded = text_embedding + position_embedding
        
        # Transformer blocks
        for _ in range(2):  # 2 transformer blocks
            # Multi-head attention
            attention = MultiHeadAttention(num_heads=8, key_dim=self.embedding_dim//8)(
                text_embedded, text_embedded
            )
            attention = Dropout(0.1)(attention)
            attention = Add()([text_embedded, attention])
            attention = LayerNormalization()(attention)
            
            # Feed forward
            ff = Dense(256, activation='relu')(attention)
            ff = Dropout(0.1)(ff)
            ff = Dense(self.embedding_dim)(ff)
            ff = Add()([attention, ff])
            text_embedded = LayerNormalization()(ff)
        
        # Global pooling
        text_features = GlobalAveragePooling1D()(text_embedded)
        
        # Numerical input branch
        num_input = Input(shape=(self.num_features,), name='numerical_input')
        num_dense = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(num_input)
        num_dropout = Dropout(0.3)(num_dense)
        
        # Combine branches
        combined = concatenate([text_features, num_dropout])
        hidden = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(combined)
        hidden = Dropout(0.4)(hidden)
        hidden = Dense(64, activation='relu')(hidden)
        output = Dense(self.num_classes, activation='softmax')(hidden)
        
        model = Model(inputs=[text_input, num_input], outputs=output)
        return model
    
    def create_hybrid_cnn_lstm_model(self):
        """Model 5: Hybrid CNN-LSTM model"""
        # Text input branch
        text_input = Input(shape=(self.max_len,), name='text_input')
        text_embedding = Embedding(self.vocab_size, self.embedding_dim)(text_input)
        
        # CNN feature extraction
        conv1 = Conv1D(64, 3, activation='relu', padding='same')(text_embedding)
        conv1 = Dropout(0.3)(conv1)
        conv2 = Conv1D(64, 5, activation='relu', padding='same')(text_embedding)
        conv2 = Dropout(0.3)(conv2)
        
        # Combine CNN features
        conv_combined = concatenate([conv1, conv2])
        
        # LSTM on top of CNN features
        lstm_out = LSTM(64, dropout=0.3, recurrent_dropout=0.3)(conv_combined)
        
        # Numerical input branch
        num_input = Input(shape=(self.num_features,), name='numerical_input')
        num_dense = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(num_input)
        num_dropout = Dropout(0.3)(num_dense)
        
        # Combine branches
        combined = concatenate([lstm_out, num_dropout])
        hidden = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(combined)
        hidden = Dropout(0.4)(hidden)
        hidden = Dense(64, activation='relu')(hidden)
        output = Dense(self.num_classes, activation='softmax')(hidden)
        
        model = Model(inputs=[text_input, num_input], outputs=output)
        return model
    
    def create_hybrid_cnn_lstm_model_modified(self):
        """Fixed version with all required imports"""
        # Text input branch
        text_input = Input(shape=(self.max_len,), name='text_input')
        text_embedding = Embedding(self.vocab_size, self.embedding_dim)(text_input)
        
        # CNN with MaxPooling
        conv1 = Conv1D(128, 3, activation='relu', padding='same')(text_embedding)
        conv1 = MaxPooling1D(2)(conv1)
        conv2 = Conv1D(128, 5, activation='relu', padding='same')(text_embedding)
        conv2 = MaxPooling1D(2)(conv2)
        
        conv_combined = concatenate([conv1, conv2])
        conv_combined = BatchNormalization()(conv_combined)  # Now properly imported
        
        # LSTM
        lstm_out = LSTM(128, dropout=0.2, recurrent_dropout=0.2)(conv_combined)
        
        # Numerical branch
        num_input = Input(shape=(self.num_features,), name='numerical_input')
        num_dense = Dense(32, activation='relu')(num_input)
        
        # Combine branches
        combined = concatenate([lstm_out, num_dense])
        hidden = Dense(128, activation='relu')(combined)
        output = Dense(self.num_classes, activation='softmax')(hidden)
        
        model = Model(inputs=[text_input, num_input], outputs=output)
        return model
    
    def compile_model(self, model, learning_rate=0.001):
        """Compile model with appropriate optimizer and loss function"""
        model.compile(
            optimizer=Adam(learning_rate=learning_rate),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        return model
    
    def create_callbacks(self):
        """Create training callbacks"""
        early_stopping = EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        )
        
        reduce_lr = ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-7
        )
        
        return [early_stopping, reduce_lr]
    
    def train_model(self, model, model_name, X_text_train, X_num_train, y_train,
                   X_text_val, X_num_val, y_val, class_weights, epochs=100):
        """Train a model with given data"""
        print(f"\nTraining {model_name}...")
        
        callbacks = self.create_callbacks()
        
        history = model.fit(
            [X_text_train, X_num_train], y_train,
            validation_data=([X_text_val, X_num_val], y_val),
            epochs=epochs,
            batch_size=16,
            class_weight=class_weights,
            callbacks=callbacks,
            verbose=1
        )
        
        self.models[model_name] = model
        self.histories[model_name] = history
        
        return model, history
    
    def evaluate_model(self, model, model_name, X_text_test, X_num_test, y_test, class_names):
        """Evaluate model performance"""
        print(f"\nEvaluating {model_name}...")
        
        # Make predictions
        y_pred_proba = model.predict([X_text_test, X_num_test])
        y_pred = np.argmax(y_pred_proba, axis=1)
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        f1_weighted = f1_score(y_test, y_pred, average='weighted')
        f1_macro = f1_score(y_test, y_pred, average='macro')
        
        # Multi-class ROC AUC
        try:
            auc_score = roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='weighted')
        except:
            auc_score = 0.0
        
        # Precision, Recall, F1 per class
        precision, recall, f1, support = precision_recall_fscore_support(y_test, y_pred, average=None)
        
        # Classification report
        report = classification_report(y_test, y_pred, target_names=class_names, output_dict=True)
        
        # Confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        
        results = {
            'model_name': model_name,
            'accuracy': accuracy,
            'f1_weighted': f1_weighted,
            'f1_macro': f1_macro,
            'auc_score': auc_score,
            'precision': precision,
            'recall': recall,
            'f1_per_class': f1,
            'support': support,
            'classification_report': report,
            'confusion_matrix': cm,
            'y_pred': y_pred,
            'y_pred_proba': y_pred_proba
        }
        
        return results
    
    def plot_training_history(self, model_name):
        """Plot training history"""
        if model_name not in self.histories:
            return
        
        history = self.histories[model_name]
        
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
        
        # Plot accuracy
        ax1.plot(history.history['accuracy'], label='Training Accuracy')
        ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
        ax1.set_title(f'{model_name} - Accuracy')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Accuracy')
        ax1.legend()
        ax1.grid(True)
        
        # Plot loss
        ax2.plot(history.history['loss'], label='Training Loss')
        ax2.plot(history.history['val_loss'], label='Validation Loss')
        ax2.set_title(f'{model_name} - Loss')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('Loss')
        ax2.legend()
        ax2.grid(True)
        
        plt.tight_layout()
        plt.savefig(f'charts/{model_name.lower().replace(" ", "_")}_training_history.png', 
                   dpi=300, bbox_inches='tight')
        plt.close()
    
    def plot_confusion_matrix(self, results, class_names):
        """Plot confusion matrix"""
        cm = results['confusion_matrix']
        model_name = results['model_name']
        
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                   xticklabels=class_names, yticklabels=class_names)
        plt.title(f'{model_name} - Confusion Matrix')
        plt.ylabel('Actual')
        plt.xlabel('Predicted')
        plt.tight_layout()
        plt.savefig(f'charts/{model_name.lower().replace(" ", "_")}_confusion_matrix.png', 
                   dpi=300, bbox_inches='tight')
        plt.close()

    def save_all_models(self, all_results):
        """Save all trained models with their evaluation results and supporting files for API use"""
        # Create directories if they don't exist
        os.makedirs('api_models', exist_ok=True)
        os.makedirs('api_models/data', exist_ok=True)

        # Actual features from your Temu reviews dataset
        feature_columns = [
            'ReviewCount', 'UserCountry_encoded',
            'text_length', 'word_count', 'avg_word_length',
            'exclamation_count', 'question_count', 'upper_case_ratio',
            'title_text_length', 'title_word_count', 'title_avg_word_length',
            'title_exclamation_count', 'title_question_count', 'title_upper_case_ratio'
        ]

        # Class names based on ReviewRating (1-5 stars)
        class_names = [
            '1 Star - Very Poor',
            '2 Stars - Poor',
            '3 Stars - Average',
            '4 Stars - Good',
            '5 Stars - Excellent'
        ]

        # Create a package for each model that contains everything needed for serving
        for result in all_results:
            model_name = result['model_name']
            if model_name in self.models:
                # Create a directory for this model
                model_dir = os.path.join('api_models', model_name.lower().replace(' ', '_'))
                os.makedirs(model_dir, exist_ok=True)

                # 1. Save the model in SavedModel format
                model_path = os.path.join(model_dir, 'model.keras')
                self.models[model_name].save(model_path)

                # 2. Save metadata needed for preprocessing
                metadata = {
                    'max_sequence_length': self.max_len,
                    'feature_columns': feature_columns,
                    'class_names': class_names,
                    'input_details': {
                        'text_input': {
                            'shape': [None, self.max_len],
                            'dtype': 'int32',
                            'description': 'Tokenized review text from ReviewText column'
                        },
                        'numerical_input': {
                            'shape': [None, len(feature_columns)],
                            'dtype': 'float32',
                            'description': f'Numerical features in order: {", ".join(feature_columns)}'
                        }
                    },
                    'output_details': {
                        'description': 'Probability scores for each rating level (1-5 stars)',
                        'class_order': class_names
                    },
                    'data_source': 'temu_reviews_cleaned.csv',
                    'text_columns_used': ['ReviewText', 'ReviewTitle'],  # Which text columns were used
                    'model_format': 'keras'  # Indicate the saved format
                }

                with open(os.path.join(model_dir, 'metadata.json'), 'w') as f:
                    json.dump(metadata, f, indent=2)

                print(f"✅ Saved API-ready {model_name} package to {model_dir}")

                # Update the result with the path
                result['api_model_path'] = model_dir
            else:
                print(f"⚠️ Model {model_name} not found in trained models")

        # Save tokenizer if exists
        if hasattr(self, 'tokenizer'):
            tokenizer_path = os.path.join('api_models', 'tokenizer.pkl')
            with open(tokenizer_path, 'wb') as f:
                pickle.dump(self.tokenizer, f)
            print(f"✅ Saved tokenizer to {tokenizer_path}")

        # Save complete results
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        results_path = os.path.join('api_models', 'data', f'model_results_{timestamp}.pkl')
        with open(results_path, 'wb') as f:
            pickle.dump(all_results, f)

        print(f"\nAll models saved in API-ready format.")
        print(f"You can now deploy any model by copying its directory to your API server.")
        return results_path



In [66]:
# model_configs = [
#    ('LSTM Model', model_builder.create_lstm_model),
#    ('BiLSTM with Attention', model_builder.create_bilstm_attention_model),
#    ('CNN Model', model_builder.create_cnn_model),
#    ('Transformer Model', model_builder.create_transformer_model),
#    ('Hybrid CNN-LSTM', model_builder.create_hybrid_cnn_lstm_model)
# ]


# Initialize model builder
model_builder = DeepLearningModels(vocab_size, max_len, num_features, num_classes)

#'LSTM Model', model_builder.create_lstm_model)
# Define models to train
model_configs = [
    ('LSTM Model', model_builder.create_lstm_model)
]

# Train and evaluate all models
all_results = []

for model_name, model_func in model_configs:
    print(f"\n{'='*50}")
    print(f"Building and training {model_name}")
    print('='*50)
    
    # Create and compile model
    model = model_func()
    model = model_builder.compile_model(model)
    
    print(f"\n{model_name} Architecture:")
    model.summary()
    
    # Train model
    model, history = model_builder.train_model(
        model, model_name,
        X_text_train_balanced, X_num_train_balanced, y_train_balanced,
        X_text_val, X_num_val, y_val,
        class_weights, epochs=50
    )
    
    # Plot training history
    model_builder.plot_training_history(model_name)
    
    # Evaluate model
    results = model_builder.evaluate_model(
        model, model_name, X_text_test, X_num_test, y_test, class_names
    )
    
    # Plot confusion matrix
    model_builder.plot_confusion_matrix(results, class_names)
    
    all_results.append(results)
    
    # Print results
    print(f"\n{model_name} Results:")
    print(f"Accuracy: {results['accuracy']:.4f}")
    print(f"F1-Score (Weighted): {results['f1_weighted']:.4f}")
    print(f"F1-Score (Macro): {results['f1_macro']:.4f}")
    print(f"AUC Score: {results['auc_score']:.4f}")
    
    # Save results
    with open('data/model_results.pkl', 'wb') as f:
        pickle.dump(all_results, f)
    



Building and training LSTM Model

LSTM Model Architecture:



Training LSTM Model...
Epoch 1/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 117ms/step - accuracy: 0.2561 - loss: 2.6519 - val_accuracy: 0.1875 - val_loss: 2.4667 - learning_rate: 0.0010
Epoch 2/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 113ms/step - accuracy: 0.3404 - loss: 2.0127 - val_accuracy: 0.2324 - val_loss: 2.0706 - learning_rate: 0.0010
Epoch 3/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 113ms/step - accuracy: 0.4347 - loss: 1.8319 - val_accuracy: 0.4096 - val_loss: 1.8242 - learning_rate: 0.0010
Epoch 4/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 123ms/step - accuracy: 0.4944 - loss: 1.7354 - val_accuracy: 0.4515 - val_loss: 1.7426 - learning_rate: 0.0010
Epoch 5/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 113ms/step - accuracy: 0.5408 - loss: 1.6491 - val_accuracy: 0.4574 - val_loss: 1.8233 - learning_rate: 0.0010
Epoch 6/50
[1m1

In [67]:
# model_configs = [
#    ('LSTM Model', model_builder.create_lstm_model),
#    ('BiLSTM with Attention', model_builder.create_bilstm_attention_model),
#    ('CNN Model', model_builder.create_cnn_model),
#    ('Transformer Model', model_builder.create_transformer_model),
#    ('Hybrid CNN-LSTM', model_builder.create_hybrid_cnn_lstm_model)
# ]


# Initialize model builder
model_builder = DeepLearningModels(vocab_size, max_len, num_features, num_classes)

#'BiLSTM with Attention', model_builder.create_bilstm_attention_model)
# Define models to train
model_configs = [
    ('BiLSTM with Attention', model_builder.create_robust_model)
]

# Train and evaluate all models
for model_name, model_func in model_configs:
    print(f"\n{'='*50}")
    print(f"Building and training {model_name}")
    print('='*50)
    
    # Create and compile model
    model = model_func()
    model = model_builder.compile_model(model)
    
    print(f"\n{model_name} Architecture:")
    model.summary()
    
    # Train model
    model, history = model_builder.train_model(
        model, model_name,
        X_text_train_balanced, X_num_train_balanced, y_train_balanced,
        X_text_val, X_num_val, y_val,
        class_weights, epochs=50
    )
    
    # Plot training history
    model_builder.plot_training_history(model_name)
    
    # Evaluate model
    results = model_builder.evaluate_model(
        model, model_name, X_text_test, X_num_test, y_test, class_names
    )
    
    # Plot confusion matrix
    model_builder.plot_confusion_matrix(results, class_names)
    
    all_results.append(results)
    
    # Print results
    print(f"\n{model_name} Results:")
    print(f"Accuracy: {results['accuracy']:.4f}")
    print(f"F1-Score (Weighted): {results['f1_weighted']:.4f}")
    print(f"F1-Score (Macro): {results['f1_macro']:.4f}")
    print(f"AUC Score: {results['auc_score']:.4f}")
    
    # Save results
    with open('data/model_results.pkl', 'wb') as f:
        pickle.dump(all_results, f)
    




Building and training BiLSTM with Attention

BiLSTM with Attention Architecture:



Training BiLSTM with Attention...
Epoch 1/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 101ms/step - accuracy: 0.3215 - loss: 2.0184 - val_accuracy: 0.4419 - val_loss: 1.6866 - learning_rate: 0.0010
Epoch 2/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 103ms/step - accuracy: 0.5119 - loss: 1.6319 - val_accuracy: 0.4346 - val_loss: 1.9196 - learning_rate: 0.0010
Epoch 3/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 99ms/step - accuracy: 0.6029 - loss: 1.3997 - val_accuracy: 0.4449 - val_loss: 2.1414 - learning_rate: 0.0010
Epoch 4/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 98ms/step - accuracy: 0.6674 - loss: 1.1775 - val_accuracy: 0.4757 - val_loss: 2.0609 - learning_rate: 0.0010
Epoch 5/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 98ms/step - accuracy: 0.7148 - loss: 0.9478 - val_accuracy: 0.5066 - val_loss: 2.1444 - learning_rate: 0.0010
Epoch 6/

In [68]:
# model_configs = [
#    ('LSTM Model', model_builder.create_lstm_model),
#    ('BiLSTM with Attention', model_builder.create_bilstm_attention_model),
#    ('CNN Model', model_builder.create_cnn_model),
#    ('Transformer Model', model_builder.create_transformer_model),
#    ('Hybrid CNN-LSTM', model_builder.create_hybrid_cnn_lstm_model)
# ]


# Initialize model builder
model_builder = DeepLearningModels(vocab_size, max_len, num_features, num_classes)

#'CNN Model', model_builder.create_cnn_model)
# Define models to train
model_configs = [
    ('CNN Model', model_builder.create_cnn_model)
]

# Train and evaluate all models

for model_name, model_func in model_configs:
    print(f"\n{'='*50}")
    print(f"Building and training {model_name}")
    print('='*50)
    
    # Create and compile model
    model = model_func()
    model = model_builder.compile_model(model)
    
    print(f"\n{model_name} Architecture:")
    model.summary()
    
    # Train model
    model, history = model_builder.train_model(
        model, model_name,
        X_text_train_balanced, X_num_train_balanced, y_train_balanced,
        X_text_val, X_num_val, y_val,
        class_weights, epochs=50
    )
    
    # Plot training history
    model_builder.plot_training_history(model_name)
    
    # Evaluate model
    results = model_builder.evaluate_model(
        model, model_name, X_text_test, X_num_test, y_test, class_names
    )
    
    # Plot confusion matrix
    model_builder.plot_confusion_matrix(results, class_names)
    
    all_results.append(results)
    
    # Print results
    print(f"\n{model_name} Results:")
    print(f"Accuracy: {results['accuracy']:.4f}")
    print(f"F1-Score (Weighted): {results['f1_weighted']:.4f}")
    print(f"F1-Score (Macro): {results['f1_macro']:.4f}")
    print(f"AUC Score: {results['auc_score']:.4f}")
    
    # Save results
    with open('data/model_results.pkl', 'wb') as f:
        pickle.dump(all_results, f)
    




Building and training CNN Model

CNN Model Architecture:



Training CNN Model...
Epoch 1/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 36ms/step - accuracy: 0.2584 - loss: 2.6991 - val_accuracy: 0.0985 - val_loss: 2.1314 - learning_rate: 0.0010
Epoch 2/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 35ms/step - accuracy: 0.3746 - loss: 1.9317 - val_accuracy: 0.4390 - val_loss: 1.6459 - learning_rate: 0.0010
Epoch 3/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 35ms/step - accuracy: 0.5073 - loss: 1.6871 - val_accuracy: 0.4346 - val_loss: 1.6381 - learning_rate: 0.0010
Epoch 4/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 36ms/step - accuracy: 0.5852 - loss: 1.4805 - val_accuracy: 0.4765 - val_loss: 1.7161 - learning_rate: 0.0010
Epoch 5/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 35ms/step - accuracy: 0.6423 - loss: 1.2846 - val_accuracy: 0.4610 - val_loss: 1.6083 - learning_rate: 0.0010
Epoch 6/50
[1m1242/1242[0

In [69]:
# model_configs = [
#    ('LSTM Model', model_builder.create_lstm_model),
#    ('BiLSTM with Attention', model_builder.create_bilstm_attention_model),
#    ('CNN Model', model_builder.create_cnn_model),
#    ('Transformer Model', model_builder.create_transformer_model),
#    ('Hybrid CNN-LSTM', model_builder.create_hybrid_cnn_lstm_model)
# ]


# Initialize model builder
model_builder = DeepLearningModels(vocab_size, max_len, num_features, num_classes)

#'Transformer Model', model_builder.create_transformer_model)
# Define models to train
model_configs = [
    ('Transformer Model', model_builder.create_transformer_model)
]

# Train and evaluate all models

for model_name, model_func in model_configs:
    print(f"\n{'='*50}")
    print(f"Building and training {model_name}")
    print('='*50)
    
    # Create and compile model
    model = model_func()
    model = model_builder.compile_model(model)
    
    print(f"\n{model_name} Architecture:")
    model.summary()
    
    # Train model
    model, history = model_builder.train_model(
        model, model_name,
        X_text_train_balanced, X_num_train_balanced, y_train_balanced,
        X_text_val, X_num_val, y_val,
        class_weights, epochs=50
    )
    
    # Plot training history
    model_builder.plot_training_history(model_name)
    
    # Evaluate model
    results = model_builder.evaluate_model(
        model, model_name, X_text_test, X_num_test, y_test, class_names
    )
    
    # Plot confusion matrix
    model_builder.plot_confusion_matrix(results, class_names)
    
    all_results.append(results)
    
    # Print results
    print(f"\n{model_name} Results:")
    print(f"Accuracy: {results['accuracy']:.4f}")
    print(f"F1-Score (Weighted): {results['f1_weighted']:.4f}")
    print(f"F1-Score (Macro): {results['f1_macro']:.4f}")
    print(f"AUC Score: {results['auc_score']:.4f}")
    
    # Save results
    with open('data/model_results.pkl', 'wb') as f:
        pickle.dump(all_results, f)
    




Building and training Transformer Model

Transformer Model Architecture:



Training Transformer Model...
Epoch 1/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 119ms/step - accuracy: 0.2573 - loss: 2.8454 - val_accuracy: 0.2868 - val_loss: 1.8903 - learning_rate: 0.0010
Epoch 2/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 116ms/step - accuracy: 0.3511 - loss: 1.9867 - val_accuracy: 0.4316 - val_loss: 1.6883 - learning_rate: 0.0010
Epoch 3/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 117ms/step - accuracy: 0.4330 - loss: 1.8469 - val_accuracy: 0.5868 - val_loss: 1.3917 - learning_rate: 0.0010
Epoch 4/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 117ms/step - accuracy: 0.4962 - loss: 1.7549 - val_accuracy: 0.4368 - val_loss: 1.7976 - learning_rate: 0.0010
Epoch 5/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 117ms/step - accuracy: 0.5257 - loss: 1.6704 - val_accuracy: 0.3956 - val_loss: 2.0351 - learning_rate: 0.0010
Epoch 6/5

In [70]:
# model_configs = [
#    ('LSTM Model', model_builder.create_lstm_model),
#    ('BiLSTM with Attention', model_builder.create_bilstm_attention_model),
#    ('CNN Model', model_builder.create_cnn_model),
#    ('Transformer Model', model_builder.create_transformer_model),
#    ('Hybrid CNN-LSTM', model_builder.create_hybrid_cnn_lstm_model)
# ]


# Initialize model builder
model_builder = DeepLearningModels(vocab_size, max_len, num_features, num_classes)

#'Hybrid CNN-LSTM', model_builder.create_hybrid_cnn_lstm_model)
# Define models to train
model_configs = [
    ('Hybrid CNN-LSTM', model_builder.create_hybrid_cnn_lstm_model_modified)
]

# Train and evaluate all models

for model_name, model_func in model_configs:
    print(f"\n{'='*50}")
    print(f"Building and training {model_name}")
    print('='*50)
    
    # Create and compile model
    model = model_func()
    model = model_builder.compile_model(model)
    
    print(f"\n{model_name} Architecture:")
    model.summary()
    
    # Train model
    model, history = model_builder.train_model(
        model, model_name,
        X_text_train_balanced, X_num_train_balanced, y_train_balanced,
        X_text_val, X_num_val, y_val,
        class_weights, epochs=50
    )
    
    # Plot training history
    model_builder.plot_training_history(model_name)
    
    # Evaluate model
    results = model_builder.evaluate_model(
        model, model_name, X_text_test, X_num_test, y_test, class_names
    )
    
    # Plot confusion matrix
    model_builder.plot_confusion_matrix(results, class_names)
    
    all_results.append(results)
    
    # Print results
    print(f"\n{model_name} Results:")
    print(f"Accuracy: {results['accuracy']:.4f}")
    print(f"F1-Score (Weighted): {results['f1_weighted']:.4f}")
    print(f"F1-Score (Macro): {results['f1_macro']:.4f}")
    print(f"AUC Score: {results['auc_score']:.4f}")
    
    # Save results
    with open('data/model_results.pkl', 'wb') as f:
        pickle.dump(all_results, f)
    




Building and training Hybrid CNN-LSTM

Hybrid CNN-LSTM Architecture:



Training Hybrid CNN-LSTM...
Epoch 1/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 161ms/step - accuracy: 0.2671 - loss: 2.1110 - val_accuracy: 0.1110 - val_loss: 2.6376 - learning_rate: 0.0010
Epoch 2/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 158ms/step - accuracy: 0.3372 - loss: 1.9020 - val_accuracy: 0.1949 - val_loss: 2.5058 - learning_rate: 0.0010
Epoch 3/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 161ms/step - accuracy: 0.4104 - loss: 1.7548 - val_accuracy: 0.4125 - val_loss: 1.7920 - learning_rate: 0.0010
Epoch 4/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 158ms/step - accuracy: 0.5228 - loss: 1.5784 - val_accuracy: 0.4941 - val_loss: 1.7985 - learning_rate: 0.0010
Epoch 5/50
[1m1242/1242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 159ms/step - accuracy: 0.5841 - loss: 1.4384 - val_accuracy: 0.5294 - val_loss: 1.6735 - learning_rate: 0.0010
Epoch 6/50


In [71]:
# Create comparison summary
summary_data = []
for result in all_results:
    summary_data.append({
        'Model': result['model_name'],
        'Accuracy': result['accuracy'],
        'F1-Weighted': result['f1_weighted'],
        'F1-Macro': result['f1_macro'],
        'AUC Score': result['auc_score']
    })

summary_df = pd.DataFrame(summary_data)
print(all_results)
#summary_df = summary_df.sort_values('F1-Weighted', ascending=False)


[{'model_name': 'LSTM Model', 'accuracy': 0.5277675616035307, 'f1_weighted': 0.5829629168629785, 'f1_macro': 0.3485824084333385, 'auc_score': np.float64(0.8413423767976255), 'precision': array([0.88469185, 0.        , 0.09256198, 0.17560976, 0.77281947]), 'recall': array([0.62853107, 0.        , 0.43410853, 0.49090909, 0.48596939]), 'f1_per_class': array([0.73492981, 0.        , 0.15258856, 0.25868263, 0.59671104]), 'support': array([1416,  170,  129,  220,  784]), 'classification_report': {'Rating 1': {'precision': 0.8846918489065606, 'recall': 0.6285310734463276, 'f1-score': 0.7349298100743188, 'support': 1416.0}, 'Rating 2': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 170.0}, 'Rating 3': {'precision': 0.09256198347107437, 'recall': 0.43410852713178294, 'f1-score': 0.15258855585831063, 'support': 129.0}, 'Rating 4': {'precision': 0.17560975609756097, 'recall': 0.4909090909090909, 'f1-score': 0.25868263473053893, 'support': 220.0}, 'Rating 5': {'precision': 0.7728194

In [73]:
# Create comparison summary
summary_data = []
for result in all_results:
    summary_data.append({
        'Model': result['model_name'],
        'Accuracy': result['accuracy'],
        'F1-Weighted': result['f1_weighted'],
        'F1-Macro': result['f1_macro'],
        'AUC Score': result['auc_score']
    })

summary_df = pd.DataFrame(summary_data)
summary_df = summary_df.sort_values('F1-Weighted', ascending=False)

print(f"\n{'='*80}")
print("MODEL PERFORMANCE COMPARISON")
print('='*80)
print(summary_df.to_string(index=False, float_format='%.4f'))

# Save summary
summary_df.to_csv('data/model_comparison_summary.csv', index=False)

# Plot comparison
plt.figure(figsize=(14, 8))

metrics = ['Accuracy', 'F1-Weighted', 'F1-Macro', 'AUC Score']
x = np.arange(len(summary_df))
width = 0.2

for i, metric in enumerate(metrics):
    plt.bar(x + i*width, summary_df[metric], width, label=metric, alpha=0.8)

plt.xlabel('Models')
plt.ylabel('Score')
plt.title('Deep Learning Models Performance Comparison')
plt.xticks(x + width*1.5, summary_df['Model'], rotation=45, ha='right')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('charts/model_comparison.png', dpi=300, bbox_inches='tight')
plt.close()

print(f"\nBest performing model: {summary_df.iloc[0]['Model']}")
print(f"Best F1-Weighted Score: {summary_df.iloc[0]['F1-Weighted']:.4f}")

# After training all models, save everything
results_path = model_builder.save_all_models(all_results)
print(f"All models and results saved. Results path: {results_path}")
    
    




MODEL PERFORMANCE COMPARISON
                Model  Accuracy  F1-Weighted  F1-Macro  AUC Score
    Transformer Model    0.5535       0.6152    0.3667     0.8853
            CNN Model    0.5388       0.5901    0.3388     0.8425
      Hybrid CNN-LSTM    0.5362       0.5848    0.3367     0.8150
           LSTM Model    0.5278       0.5830    0.3486     0.8413
BiLSTM with Attention    0.4200       0.4980    0.3047     0.8495

Best performing model: Transformer Model
Best F1-Weighted Score: 0.6152
⚠️ Model LSTM Model not found in trained models
⚠️ Model BiLSTM with Attention not found in trained models
⚠️ Model CNN Model not found in trained models
⚠️ Model Transformer Model not found in trained models
✅ Saved API-ready Hybrid CNN-LSTM package to api_models\hybrid_cnn-lstm

All models saved in API-ready format.
You can now deploy any model by copying its directory to your API server.
All models and results saved. Results path: api_models\data\model_results_20250808_122546.pkl
