BLOCK 1: Setup & Configuration

In [None]:
#Import libraries and functions
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
from keras.models import Sequential, Model
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout
from keras.layers import Input, LSTM, Concatenate, RepeatVector, Permute, BatchNormalization
from keras.optimizers import Adam

# Configuration class
class Config:
    """Configuration parameters for the CVD prediction experiment."""
    DATA_PATH = "Dataset.csv"
    TARGET_COL = 'target'
    CATEGORICAL_COLS = ['chest pain type', 'resting ecg', 'ST slope']

    # Model parameters
    RANDOM_STATE = 42
    N_SPLITS = 5
    EPOCHS = 100
    CNN_BATCH_SIZE = 16
    LSTM_BATCH_SIZE = 32
    CNN_LEARNING_RATE = 0.0001
    LSTM_LEARNING_RATE = 0.001

    # Model architecture
    CNN_FILTERS = [64, 128, 256]
    CNN_KERNEL_SIZES = [5, 3, 1]
    LSTM_UNITS = [256, 128]

# Set random seeds for reproducibility
np.random.seed(Config.RANDOM_STATE)
import tensorflow as tf
tf.random.set_seed(Config.RANDOM_STATE)

print("Setup complete. Configuration loaded.")

BLOCK 2: Data Loading & Exploration

In [None]:
print("Loading dataset...")

# Load dataset
df = pd.read_csv(Config.DATA_PATH)
print(f"Dataset shape: {df.shape}")
print(f"Features: {df.columns.tolist()}")

# Display basic information
print("\nDataset Overview:")
print("-" * 50)
print(f"Total samples: {len(df)}")
print(f"Number of features: {len(df.columns) - 1}")  # Excluding target
print(f"Target variable: '{Config.TARGET_COL}'")
print(f"Missing values: {df.isnull().sum().sum()}")

# Target distribution
target_counts = df[Config.TARGET_COL].value_counts()
print(f"\nTarget Distribution:")
print(f"  Class 0 (No CVD): {target_counts.get(0, 0)} samples ({target_counts.get(0, 0)/len(df)*100:.1f}%)")
print(f"  Class 1 (CVD): {target_counts.get(1, 0)} samples ({target_counts.get(1, 0)/len(df)*100:.1f}%)")

# Display first few rows
print("\nData Preview (first 5 rows):")
print(df.head())

print("\nData loading complete.")

BLOCK 3: Data Preprocessing Pipeline

In [None]:
def preprocess_data(df):
    """
    Preprocess the cardiovascular disease dataset.

    Steps:
    1. Remove null values
    2. Standard scale numerical features
    3. Remove outliers using Z-score method
    4. One-hot encode categorical variables
    5. Separate features and target

    Returns:
        X (DataFrame): Preprocessed features
        y (Series): Target variable
        df_encoded (DataFrame): Fully processed dataframe
    """
    print("Starting data preprocessing...")

    # Step 1: Remove null values
    initial_count = len(df)
    df_clean = df.dropna().reset_index(drop=True)
    print(f"   Removed {initial_count - len(df_clean)} records with null values")

    # Step 2: Standard scaling (Z-score normalization)
    scaler = StandardScaler()
    numerical_cols = [col for col in df_clean.columns
                     if col not in Config.CATEGORICAL_COLS + [Config.TARGET_COL]]

    if numerical_cols:
        df_clean[numerical_cols] = scaler.fit_transform(df_clean[numerical_cols])
        print(f"  Scaled {len(numerical_cols)} numerical features")

    # Step 3: Outlier detection and removal (Z-score > 3)
    z_scores = (df_clean.select_dtypes(include=[np.number]) -
                df_clean.select_dtypes(include=[np.number]).mean()) / \
               df_clean.select_dtypes(include=[np.number]).std()

    outliers = (z_scores > 3) | (z_scores < -3)
    df_no_outliers = df_clean[~outliers.any(axis=1)].reset_index(drop=True)
    print(f"  Removed {len(df_clean) - len(df_no_outliers)} outliers (|Z| > 3)")

    # Step 4: One-hot encoding for categorical variables
    df_encoded = pd.get_dummies(df_no_outliers, columns=Config.CATEGORICAL_COLS)
    print(f"  One-hot encoded {len(Config.CATEGORICAL_COLS)} categorical variables")

    # Step 5: Separate features and target
    X = df_encoded.drop(Config.TARGET_COL, axis=1)
    y = df_encoded[Config.TARGET_COL]

    print(f"  Final shape: {X.shape[0]} samples, {X.shape[1]} features")
    print("Preprocessing complete.")

    return X, y, df_encoded

# Apply preprocessing
X, y, df_processed = preprocess_data(df)

# Display processed data info
print(f"\nProcessed Data Summary:")
print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"Feature names: {list(X.columns[:5])}..." if len(X.columns) > 5 else list(X.columns))

BLOCK 4: Data Balancing with SMOTE

In [None]:
def apply_smote_balancing(X, y):
    """
    Apply SMOTE to balance the dataset.

    Args:
        X: Features
        y: Target variable

    Returns:
        X_balanced: Balanced features (as numpy array)
        y_balanced: Balanced target (as numpy array)
    """
    print("\nApplying SMOTE for class balancing...")

    # Check initial class distribution
    unique, counts = np.unique(y, return_counts=True)
    print(f"  Initial class distribution:")
    for cls, count in zip(unique, counts):
        print(f"    Class {cls}: {count} samples ({count/len(y)*100:.1f}%)")

    # Apply SMOTE
    smote = SMOTE(random_state=Config.RANDOM_STATE)
    X_balanced, y_balanced = smote.fit_resample(X, y)

    #Convert to numpy arrays with consistent dtype
    X_balanced = np.array(X_balanced, dtype=np.float32)
    y_balanced = np.array(y_balanced, dtype=np.float32)

    # Check balanced class distribution
    unique_bal, counts_bal = np.unique(y_balanced, return_counts=True)
    print(f"\n  After SMOTE balancing:")
    for cls, count in zip(unique_bal, counts_bal):
        print(f"    Class {cls}: {count} samples ({count/len(y_balanced)*100:.1f}%)")

    print(f"\nBalancing complete. New shape: {X_balanced.shape}")

    return X_balanced, y_balanced

# Apply SMOTE balancing
X_balanced, y_balanced = apply_smote_balancing(X, y)

BLOCK 5: CNN Model Definition & Training

In [None]:
def create_cnn_model(input_shape):
    """
    Create a 1D CNN model for binary classification.

    Architecture:
    - Three Conv1D layers with increasing filters
    - MaxPooling after each convolutional layer
    - Fully connected layers with dropout for regularization
    - Sigmoid output for binary classification

    Args:
        input_shape: Number of features in input data

    Returns:
        model: Compiled CNN model
    """
    model = Sequential([
        # First convolutional block
        Conv1D(filters=Config.CNN_FILTERS[0], kernel_size=Config.CNN_KERNEL_SIZES[0],
               activation='relu', input_shape=(input_shape, 1)),
        MaxPooling1D(pool_size=2),

        # Second convolutional block
        Conv1D(filters=Config.CNN_FILTERS[1], kernel_size=Config.CNN_KERNEL_SIZES[1],
               activation='relu'),
        MaxPooling1D(pool_size=2),

        # Third convolutional block
        Conv1D(filters=Config.CNN_FILTERS[2], kernel_size=Config.CNN_KERNEL_SIZES[2],
               activation='relu'),
        MaxPooling1D(pool_size=2),

        # Flatten and fully connected layers
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(32, activation='relu'),
        Dropout(0.5),

        # Output layer
        Dense(1, activation='sigmoid')
    ])

    # Compile model
    optimizer = Adam(learning_rate=Config.CNN_LEARNING_RATE)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model

def train_cnn_model(X, y):
    """
    Train CNN model using 5-fold cross-validation.

    Args:
        X: Features
        y: Target variable

    Returns:
        results_dict: Dictionary containing evaluation metrics
        histories: Training history for each fold
    """
    print("\n" + "="*60)
    print("TRAINING CNN MODEL (5-Fold Cross-Validation)")
    print("="*60)

    # Convert to numpy arrays
    X_array = np.array(X)
    y_array = np.array(y)

    # Initialize results storage
    results_dict = {
        'fold': [], 'accuracy': [], 'precision': [],
        'recall': [], 'f1': [], 'loss': []
    }
    histories = []

    # Setup cross-validation
    skf = StratifiedKFold(n_splits=Config.N_SPLITS,
                          shuffle=True,
                          random_state=Config.RANDOM_STATE)

    # Setup visualization
    fig, axes = plt.subplots(2, 1, figsize=(10, 8))
    plt.suptitle('CNN Model: Training Progress (5-Fold CV)', fontsize=14, y=1.02)

    # Cross-validation loop
    for fold, (train_idx, test_idx) in enumerate(skf.split(X_array, y_array), 1):
        print(f"\nFold {fold}/{Config.N_SPLITS}")
        print("-" * 40)

        # Split data
        X_train, X_test = X_array[train_idx], X_array[test_idx]
        y_train, y_test = y_array[train_idx], y_array[test_idx]

        # Reshape for CNN (add channel dimension)
        X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
        X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

        # Convert to float32
        X_train_cnn = X_train_cnn.astype('float32')
        X_test_cnn = X_test_cnn.astype('float32')

        # Create and train model
        model = create_cnn_model(X_train_cnn.shape[1])
        history = model.fit(
            X_train_cnn, y_train,
            epochs=Config.EPOCHS,
            batch_size=Config.CNN_BATCH_SIZE,
            validation_data=(X_test_cnn, y_test),
            verbose=0
        )
        histories.append(history)

        # Plot training curves
        axes[0].plot(history.history['accuracy'],
                    label=f'Fold {fold} Train', alpha=0.7, linewidth=1)
        axes[0].plot(history.history['val_accuracy'],
                    label=f'Fold {fold} Val', alpha=0.7, linewidth=1, linestyle='--')

        axes[1].plot(history.history['loss'],
                    label=f'Fold {fold} Train', alpha=0.7, linewidth=1)
        axes[1].plot(history.history['val_loss'],
                    label=f'Fold {fold} Val', alpha=0.7, linewidth=1, linestyle='--')

        # Evaluate model
        y_pred = np.round(model.predict(X_test_cnn, verbose=0)).astype(int)

        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        # Store results
        results_dict['fold'].append(fold)
        results_dict['accuracy'].append(accuracy)
        results_dict['precision'].append(precision)
        results_dict['recall'].append(recall)
        results_dict['f1'].append(f1)
        results_dict['loss'].append(history.history['val_loss'][-1])

        # Print fold results
        print(f"  Accuracy:  {accuracy:.3f}")
        print(f"  Precision: {precision:.3f}")
        print(f"  Recall:    {recall:.3f}")
        print(f"  F1-Score:  {f1:.3f}")

    # Configure plots
    axes[0].set_title('Model Accuracy', fontsize=12)
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Accuracy')
    axes[0].legend(loc='lower right', fontsize=9)
    axes[0].grid(True, alpha=0.3)

    axes[1].set_title('Model Loss', fontsize=12)
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Loss')
    axes[1].legend(loc='upper right', fontsize=9)
    axes[1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # Print average results
    print("\n" + "="*60)
    print("CNN MODEL - AVERAGE RESULTS (5-Fold CV)")
    print("="*60)
    print(f"Average Accuracy:  {np.mean(results_dict['accuracy']):.3f}")
    print(f"Average Precision: {np.mean(results_dict['precision']):.3f}")
    print(f"Average Recall:    {np.mean(results_dict['recall']):.3f}")
    print(f"Average F1-Score:  {np.mean(results_dict['f1']):.3f}")
    print("="*60)

    return results_dict, histories

# Train CNN model
cnn_results, cnn_histories = train_cnn_model(X_balanced, y_balanced)

BLOCK 6: LSTM Model Definition & Training

In [None]:
def create_lstm_model(input_shape, dropout_rate=0.3):
    """
    Create an LSTM model with attention mechanism for binary classification.

    Architecture:
    - LSTM layer with dropout and batch normalization
    - Attention mechanism to weight important features
    - Second LSTM layer for sequence processing
    - Sigmoid output for binary classification

    Args:
        input_shape: Number of features in input data
        dropout_rate: Dropout rate for regularization

    Returns:
        model: Compiled LSTM model
    """
    # Input layer
    inputs = Input(shape=(1, input_shape))

    # First LSTM layer with regularization
    lstm_out = LSTM(Config.LSTM_UNITS[0], return_sequences=True)(inputs)
    lstm_out = Dropout(dropout_rate)(lstm_out)
    lstm_out = BatchNormalization()(lstm_out)

    # Attention mechanism
    attention = Dense(1, activation='tanh')(lstm_out)
    attention = Flatten()(attention)
    attention = RepeatVector(input_shape)(attention)
    attention = Permute((2, 1))(attention)

    # Apply attention and second LSTM
    attention_out = Concatenate(axis=-1)([lstm_out, attention])
    attention_out = LSTM(Config.LSTM_UNITS[1])(attention_out)

    # Output layer
    outputs = Dense(1, activation='sigmoid')(attention_out)

    # Create and compile model
    model = Model(inputs=inputs, outputs=outputs)
    optimizer = Adam(learning_rate=Config.LSTM_LEARNING_RATE)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model

def train_lstm_model(X, y):
    """
    Train LSTM model using 5-fold cross-validation.

    Args:
        X: Features (must be numpy array)
        y: Target variable (must be numpy array)

    Returns:
        results_dict: Dictionary containing evaluation metrics
        histories: Training history for each fold
    """
    print("\n" + "="*60)
    print("TRAINING LSTM MODEL (5-Fold Cross-Validation)")
    print("="*60)

    # CRITICAL FIX: Ensure data is in correct format
    # Convert to numpy arrays with explicit float32 dtype
    X_array = np.array(X, dtype=np.float32)
    y_array = np.array(y, dtype=np.float32)

    # Validate data types
    print(f"Data type check:")
    print(f"  X_array dtype: {X_array.dtype}, shape: {X_array.shape}")
    print(f"  y_array dtype: {y_array.dtype}, shape: {y_array.shape}")

    # Check for any non-numeric values
    if np.isnan(X_array).any():
        print("  Warning: X_array contains NaN values")
        X_array = np.nan_to_num(X_array)  # Replace NaN with 0

    # Initialize results storage
    results_dict = {
        'fold': [], 'accuracy': [], 'precision': [],
        'recall': [], 'f1': [], 'loss': []
    }
    histories = []

    # Setup cross-validation
    kf = KFold(n_splits=Config.N_SPLITS,
               shuffle=True,
               random_state=Config.RANDOM_STATE)

    # Setup visualization
    fig, axes = plt.subplots(2, 1, figsize=(10, 8))
    plt.suptitle('LSTM Model: Training Progress (5-Fold CV)', fontsize=14, y=1.02)

    # Cross-validation loop
    for fold, (train_idx, test_idx) in enumerate(kf.split(X_array), 1):
        print(f"\nFold {fold}/{Config.N_SPLITS}")
        print("-" * 40)

        # Split data
        X_train, X_test = X_array[train_idx], X_array[test_idx]
        y_train, y_test = y_array[train_idx], y_array[test_idx]

        # Reshape for LSTM (add timestep dimension)
        X_train_lstm = X_train.reshape(-1, 1, X_train.shape[1]).astype(np.float32)
        X_test_lstm = X_test.reshape(-1, 1, X_test.shape[1]).astype(np.float32)

        print(f"  Training set shape: {X_train_lstm.shape}")
        print(f"  Test set shape: {X_test_lstm.shape}")

        # Create and train model
        model = create_lstm_model(X_train.shape[1])
        history = model.fit(
            X_train_lstm, y_train,
            epochs=Config.EPOCHS,
            batch_size=Config.LSTM_BATCH_SIZE,
            validation_data=(X_test_lstm, y_test),
            verbose=0
        )
        histories.append(history)

        # Plot training curves
        axes[0].plot(history.history['accuracy'],
                    label=f'Fold {fold} Train', alpha=0.7, linewidth=1)
        axes[0].plot(history.history['val_accuracy'],
                    label=f'Fold {fold} Val', alpha=0.7, linewidth=1, linestyle='--')

        axes[1].plot(history.history['loss'],
                    label=f'Fold {fold} Train', alpha=0.7, linewidth=1)
        axes[1].plot(history.history['val_loss'],
                    label=f'Fold {fold} Val', alpha=0.7, linewidth=1, linestyle='--')

        # Evaluate model
        y_pred = model.predict(X_test_lstm, verbose=0)
        y_pred_class = (y_pred > 0.5).astype(int)

        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred_class)
        precision = precision_score(y_test, y_pred_class)
        recall = recall_score(y_test, y_pred_class)
        f1 = f1_score(y_test, y_pred_class)

        # Store results
        results_dict['fold'].append(fold)
        results_dict['accuracy'].append(accuracy)
        results_dict['precision'].append(precision)
        results_dict['recall'].append(recall)
        results_dict['f1'].append(f1)
        results_dict['loss'].append(history.history['val_loss'][-1])

        # Print fold results
        print(f"  Accuracy:  {accuracy:.3f}")
        print(f"  Precision: {precision:.3f}")
        print(f"  Recall:    {recall:.3f}")
        print(f"  F1-Score:  {f1:.3f}")

    # Configure plots
    axes[0].set_title('Model Accuracy', fontsize=12)
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Accuracy')
    axes[0].legend(loc='lower right', fontsize=9)
    axes[0].grid(True, alpha=0.3)

    axes[1].set_title('Model Loss', fontsize=12)
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Loss')
    axes[1].legend(loc='upper right', fontsize=9)
    axes[1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # Print average results
    print("\n" + "="*60)
    print("LSTM MODEL - AVERAGE RESULTS (5-Fold CV)")
    print("="*60)
    print(f"Average Accuracy:  {np.mean(results_dict['accuracy']):.3f}")
    print(f"Average Precision: {np.mean(results_dict['precision']):.3f}")
    print(f"Average Recall:    {np.mean(results_dict['recall']):.3f}")
    print(f"Average F1-Score:  {np.mean(results_dict['f1']):.3f}")
    print("="*60)

    return results_dict, histories

# Train LSTM model - FIXED: Ensure data is numpy array before passing
lstm_results, lstm_histories = train_lstm_model(np.array(X_balanced), np.array(y_balanced))

BLOCK 7: Results Comparison

In [None]:
def compare_results(cnn_results, lstm_results):
    """
    Compare CNN and LSTM results and generate visualizations.

    Args:
        cnn_results: Results dictionary from CNN model
        lstm_results: Results dictionary from LSTM model
    """
    print("\n" + "="*60)
    print("MODEL COMPARISON: CNN vs LSTM")
    print("="*60)

    # Calculate average metrics
    cnn_avg = {
        'accuracy': np.mean(cnn_results['accuracy']),
        'precision': np.mean(cnn_results['precision']),
        'recall': np.mean(cnn_results['recall']),
        'f1': np.mean(cnn_results['f1'])
    }

    lstm_avg = {
        'accuracy': np.mean(lstm_results['accuracy']),
        'precision': np.mean(lstm_results['precision']),
        'recall': np.mean(lstm_results['recall']),
        'f1': np.mean(lstm_results['f1'])
    }

    # Create comparison table
    comparison_df = pd.DataFrame({
        'Model': ['CNN', 'LSTM'],
        'Accuracy': [cnn_avg['accuracy'], lstm_avg['accuracy']],
        'Precision': [cnn_avg['precision'], lstm_avg['precision']],
        'Recall': [cnn_avg['recall'], lstm_avg['recall']],
        'F1-Score': [cnn_avg['f1'], lstm_avg['f1']]
    })

    # Display results
    print("\nAverage Performance Metrics:")
    print("-" * 65)
    print(comparison_df.to_string(index=False))
    print("-" * 65)

    # Visualizations
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    plt.suptitle('CNN vs LSTM: Performance Comparison', fontsize=16, y=1.02)

    # Plot 1: Accuracy comparison across folds
    axes[0, 0].plot(cnn_results['fold'], cnn_results['accuracy'],
                   'o-', label='CNN', linewidth=2, markersize=8)
    axes[0, 0].plot(lstm_results['fold'], lstm_results['accuracy'],
                   's--', label='LSTM', linewidth=2, markersize=8)
    axes[0, 0].set_title('Accuracy by Fold', fontsize=12)
    axes[0, 0].set_xlabel('Fold')
    axes[0, 0].set_ylabel('Accuracy')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)

    # Plot 2: F1-Score comparison across folds
    axes[0, 1].plot(cnn_results['fold'], cnn_results['f1'],
                   'o-', label='CNN', linewidth=2, markersize=8)
    axes[0, 1].plot(lstm_results['fold'], lstm_results['f1'],
                   's--', label='LSTM', linewidth=2, markersize=8)
    axes[0, 1].set_title('F1-Score by Fold', fontsize=12)
    axes[0, 1].set_xlabel('Fold')
    axes[0, 1].set_ylabel('F1-Score')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)

    # Plot 3: Bar chart of average metrics
    metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
    cnn_vals = [cnn_avg['accuracy'], cnn_avg['precision'],
                cnn_avg['recall'], cnn_avg['f1']]
    lstm_vals = [lstm_avg['accuracy'], lstm_avg['precision'],
                 lstm_avg['recall'], lstm_avg['f1']]

    x = np.arange(len(metrics))
    width = 0.35

    axes[1, 0].bar(x - width/2, cnn_vals, width, label='CNN', alpha=0.8)
    axes[1, 0].bar(x + width/2, lstm_vals, width, label='LSTM', alpha=0.8)
    axes[1, 0].set_title('Average Performance Metrics', fontsize=12)
    axes[1, 0].set_xlabel('Metric')
    axes[1, 0].set_ylabel('Score')
    axes[1, 0].set_xticks(x)
    axes[1, 0].set_xticklabels(metrics)
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3, axis='y')

    # Plot 4: Performance difference (LSTM - CNN)
    performance_diff = {
        'Accuracy': lstm_avg['accuracy'] - cnn_avg['accuracy'],
        'Precision': lstm_avg['precision'] - cnn_avg['precision'],
        'Recall': lstm_avg['recall'] - cnn_avg['recall'],
        'F1-Score': lstm_avg['f1'] - cnn_avg['f1']
    }

    # Determine bar colors (green if positive, red if negative)
    colors = ['green' if val > 0 else 'red' for val in performance_diff.values()]

    axes[1, 1].bar(metrics, list(performance_diff.values()), color=colors, alpha=0.7)
    axes[1, 1].axhline(y=0, color='black', linestyle='-', linewidth=0.5)
    axes[1, 1].set_title('Performance Difference: LSTM - CNN', fontsize=12)
    axes[1, 1].set_xlabel('Metric')
    axes[1, 1].set_ylabel('Difference (LSTM - CNN)')
    axes[1, 1].grid(True, alpha=0.3, axis='y')

    # Add value labels on bars
    for i, v in enumerate(performance_diff.values()):
        axes[1, 1].text(i, v + (0.01 if v >= 0 else -0.02),
                       f'{v:.3f}', ha='center', va='bottom' if v >= 0 else 'top')

    plt.tight_layout()
    plt.show()

    # Print summary insights
    print("\nKEY INSIGHTS:")
    print("-" * 60)

    # Determine best model based on accuracy
    if lstm_avg['accuracy'] > cnn_avg['accuracy']:
        accuracy_diff = lstm_avg['accuracy'] - cnn_avg['accuracy']
        print(f"• LSTM has higher accuracy by {accuracy_diff:.3f}")
        best_model_accuracy = "LSTM"
    elif cnn_avg['accuracy'] > lstm_avg['accuracy']:
        accuracy_diff = cnn_avg['accuracy'] - lstm_avg['accuracy']
        print(f"• CNN has higher accuracy by {accuracy_diff:.3f}")
        best_model_accuracy = "CNN"
    else:
        print("• Both models have equal accuracy")
        best_model_accuracy = "Tie"

    # Determine best model based on F1-Score
    if lstm_avg['f1'] > cnn_avg['f1']:
        f1_diff = lstm_avg['f1'] - cnn_avg['f1']
        print(f"• LSTM has higher F1-Score by {f1_diff:.3f}")
        best_model_f1 = "LSTM"
    elif cnn_avg['f1'] > lstm_avg['f1']:
        f1_diff = cnn_avg['f1'] - lstm_avg['f1']
        print(f"• CNN has higher F1-Score by {f1_diff:.3f}")
        best_model_f1 = "CNN"
    else:
        print("• Both models have equal F1-Score")
        best_model_f1 = "Tie"

    # Check consistency across folds
    cnn_std = np.std(cnn_results['accuracy'])
    lstm_std = np.std(lstm_results['accuracy'])

    print(f"\nModel Stability (Lower STD = More Consistent):")
    print(f"   CNN Accuracy STD:  {cnn_std:.4f}")
    print(f"   LSTM Accuracy STD: {lstm_std:.4f}")

    if cnn_std < lstm_std:
        print("   • CNN shows more consistent performance across folds")
    elif lstm_std < cnn_std:
        print("   • LSTM shows more consistent performance across folds")
    else:
        print("   • Both models show equal consistency")


# Compare results
compare_results(cnn_results, lstm_results)
print("\n" + "="*60)