# changing the model2 by increasing the blocks from 3 to 6 

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
import openpyxl
import os

# Try to import imblearn, provide install instructions if it fails
try:
    from imblearn.over_sampling import SMOTE
except ImportError:
    print("Error: The 'imbalanced-learn' library is required but not installed.")
    print("Please install it by running the following command in your terminal:")
    print("pip install imbalanced-learn")
    exit()


# --- Configuration ---
DATA_FILE = '../classifier_data.csv' 

CLASSIFIER_TARGET_COLUMNS = [
    'incident_type', 'incident_mechanism_1', 'incident_mechanism_2',
    'incident_mechanism_3', 'eap_enacted_y_n_due_to_incident',
    'fatalities_number', 'other_infrastructure_impacts', 'response',
    'incident_report_produced'
]

# --- Main Processing Function ---
def train_and_evaluate_model(X, y, target_name, metrics_list):
    """
    Trains an upgraded neural network and collects performance metrics.
    """
    print(f"--- Processing target: {target_name} ---")

    # --- Pre-split Data Cleaning ---
    value_counts = y.value_counts()
    single_sample_classes = value_counts[value_counts < 2].index

    if not single_sample_classes.empty:
        print(f"Warning for target '{target_name}': Removing classes with only 1 sample: {list(single_sample_classes)}")
        mask = ~y.isin(single_sample_classes)
        X = X[mask].copy()
        y = y[mask].copy()
        print(f"Removed {len(single_sample_classes)} rows.")

    if y.nunique() < 2:
        print(f"Skipping '{target_name}' because it has fewer than 2 valid classes.\n")
        return

    # --- Preprocessing ---
    categorical_features = X.select_dtypes(include=['object', 'category']).columns
    numerical_features = X.select_dtypes(include=np.number).columns
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ],
        remainder='passthrough'
    )
    y_series = pd.Series(y).astype('category')
    y_codes = y_series.cat.codes
    class_names = y_series.cat.categories.tolist()
    X_train, X_test, y_train, y_test = train_test_split(X, y_codes, test_size=0.2, random_state=42, stratify=y_codes)
    X_train_processed = preprocessor.fit_transform(X_train)
    X_test_processed = preprocessor.transform(X_test)

    # --- SMOTE for Imbalanced Data ---
    min_class_samples = pd.Series(y_train).value_counts().min()
    if y_series.nunique() > 1 and min_class_samples > 1:
        k_neighbors = min(5, min_class_samples - 1)
        print(f"Applying SMOTE... Using k_neighbors={k_neighbors}.")
        smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
        X_train_resampled, y_train_resampled = smote.fit_resample(X_train_processed, y_train)
    else:
        print(f"Skipping SMOTE for '{target_name}'.")
        X_train_resampled, y_train_resampled = X_train_processed, y_train

    # --- UPGRADED: Build and Train Model ---
    l2_reg = 0.001 # L2 regularization factor
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(X_train_resampled.shape[1],)),
        
        # Block 1
        tf.keras.layers.Dense(256, kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.Dropout(0.4),

        # Block 2
        tf.keras.layers.Dense(128, kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.Dropout(0.4),

        # Block 3
        tf.keras.layers.Dense(64, kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.Dropout(0.3),



                # Block 4
        tf.keras.layers.Dense(64, kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.Dropout(0.3),



                # Block 5
        tf.keras.layers.Dense(64, kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.Dropout(0.3),



                # Block 6
        tf.keras.layers.Dense(64, kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.Dropout(0.3),

        # Output Layer
        tf.keras.layers.Dense(len(class_names), activation='softmax')
    ])
    
    # UPGRADED: Optimizer with a specific learning rate
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
    
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    print("Training the upgraded model...")
    
    # UPGRADED: Add a learning rate scheduler
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001)

    model.fit(X_train_resampled, y_train_resampled, epochs=150, validation_split=0.2, callbacks=[early_stopping, reduce_lr], verbose=0)
    
    model_filename = f'{target_name}.h5'
    model.save(model_filename)
    print(f"Model saved as {model_filename}")

    # --- Evaluation ---
    y_pred = np.argmax(model.predict(X_test_processed), axis=1)
    all_class_labels = range(len(class_names))
    cm = confusion_matrix(y_test, y_pred, labels=all_class_labels)

    # --- Calculate and Collect Performance Metrics ---
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    metrics_result = {
        'Model Output': target_name,
        'Accuracy': accuracy,
        'Precision (Weighted)': precision,
        'Recall (Weighted)': recall,
        'F1-Score (Weighted)': f1
    }
    
    if len(class_names) == 2:
        tn, fp, fn, tp = cm.ravel()
        metrics_result['TP (Actual 1, Predicted 1)'] = tp
        metrics_result['TN (Actual 0, Predicted 0)'] = tn
        metrics_result['FP (Actual 0, Predicted 1)'] = fp
        metrics_result['FN (Actual 1, Predicted 0)'] = fn

    metrics_list.append(metrics_result)
    print(f"✅ Performance metrics for '{target_name}' collected.")

    # --- Generate Other Reports ---
    cm_df = pd.DataFrame(cm, index=class_names, columns=class_names)
    svg_filename = f'confusion_matrix_{target_name}.svg'
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix for {target_name}')
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.tight_layout()
    plt.savefig(svg_filename, format='svg')
    plt.close()
    print(f"Confusion matrix plot saved as {svg_filename}")
    
    report_filename = f'report_{target_name}.xlsx'
    results_df = X_test.copy()
    results_df['actual_outcome'] = y.loc[X_test.index]
    results_df['predicted_outcome'] = [class_names[i] for i in y_pred]
    results_df.to_excel(report_filename, sheet_name='Test_Inputs_and_Predictions')
    print(f"Detailed report saved as {report_filename}")
    print("-" * 40 + "\n")


# --- Main Execution ---
if __name__ == "__main__":
    try:
        df = pd.read_csv(DATA_FILE)
    except FileNotFoundError:
        print(f"Error: The data file '{DATA_FILE}' was not found.")
        exit()

    input_cols = [col for col in df.columns if col not in CLASSIFIER_TARGET_COLUMNS]
    X = df[input_cols]
    
    model_metrics_data = []

    for target in CLASSIFIER_TARGET_COLUMNS:
        if target not in df.columns:
            print(f"Warning: Target column '{target}' not found. Skipping.")
            continue

        temp_df = df.dropna(subset=[target])
        X_filtered = temp_df[input_cols]
        y = temp_df[target]

        if y.nunique() < 2:
            print(f"Skipping '{target}' because it has less than 2 unique values.")
            continue
            
        train_and_evaluate_model(X_filtered, y, target, model_metrics_data)

    if model_metrics_data:
        metrics_df = pd.DataFrame(model_metrics_data)
        metrics_filename = 'model_performance_metrics.xlsx'
        metrics_df.to_excel(metrics_filename, index=False)
        print(f"✅ All performance metrics saved to '{metrics_filename}'.")

    print("\nAll tasks complete.")


--- Processing target: incident_type ---
Removed 1 rows.
Applying SMOTE... Using k_neighbors=5.

Training the upgraded model...




  saving_api.save_model(


Model saved as incident_type.h5
✅ Performance metrics for 'incident_type' collected.
Confusion matrix plot saved as confusion_matrix_incident_type.svg
Detailed report saved as report_incident_type.xlsx
----------------------------------------

--- Processing target: incident_mechanism_1 ---
Removed 3 rows.
Applying SMOTE... Using k_neighbors=1.
Training the upgraded model...


  saving_api.save_model(


Model saved as incident_mechanism_1.h5
✅ Performance metrics for 'incident_mechanism_1' collected.
Confusion matrix plot saved as confusion_matrix_incident_mechanism_1.svg
Detailed report saved as report_incident_mechanism_1.xlsx
----------------------------------------

--- Processing target: incident_mechanism_2 ---
Removed 5 rows.
Applying SMOTE... Using k_neighbors=1.
Training the upgraded model...


  saving_api.save_model(


Model saved as incident_mechanism_2.h5
✅ Performance metrics for 'incident_mechanism_2' collected.
Confusion matrix plot saved as confusion_matrix_incident_mechanism_2.svg
Detailed report saved as report_incident_mechanism_2.xlsx
----------------------------------------

--- Processing target: incident_mechanism_3 ---
Removed 7 rows.
Applying SMOTE... Using k_neighbors=1.
Training the upgraded model...


  saving_api.save_model(


Model saved as incident_mechanism_3.h5
✅ Performance metrics for 'incident_mechanism_3' collected.
Confusion matrix plot saved as confusion_matrix_incident_mechanism_3.svg
Detailed report saved as report_incident_mechanism_3.xlsx
----------------------------------------

--- Processing target: eap_enacted_y_n_due_to_incident ---
Applying SMOTE... Using k_neighbors=1.
Training the upgraded model...


  saving_api.save_model(


Model saved as eap_enacted_y_n_due_to_incident.h5
✅ Performance metrics for 'eap_enacted_y_n_due_to_incident' collected.
Confusion matrix plot saved as confusion_matrix_eap_enacted_y_n_due_to_incident.svg
Detailed report saved as report_eap_enacted_y_n_due_to_incident.xlsx
----------------------------------------

--- Processing target: fatalities_number ---
Removed 14 rows.
Skipping SMOTE for 'fatalities_number'.
Training the upgraded model...


  saving_api.save_model(


Model saved as fatalities_number.h5
✅ Performance metrics for 'fatalities_number' collected.
Confusion matrix plot saved as confusion_matrix_fatalities_number.svg
Detailed report saved as report_fatalities_number.xlsx
----------------------------------------

--- Processing target: other_infrastructure_impacts ---
Removed 7 rows.
Applying SMOTE... Using k_neighbors=1.
Training the upgraded model...


  saving_api.save_model(


Model saved as other_infrastructure_impacts.h5
✅ Performance metrics for 'other_infrastructure_impacts' collected.
Confusion matrix plot saved as confusion_matrix_other_infrastructure_impacts.svg
Detailed report saved as report_other_infrastructure_impacts.xlsx
----------------------------------------

--- Processing target: response ---
Removed 204 rows.
Skipping SMOTE for 'response'.
Training the upgraded model...


  saving_api.save_model(


Model saved as response.h5
✅ Performance metrics for 'response' collected.
Confusion matrix plot saved as confusion_matrix_response.svg
Detailed report saved as report_response.xlsx
----------------------------------------

--- Processing target: incident_report_produced ---
Removed 1 rows.
Applying SMOTE... Using k_neighbors=3.
Training the upgraded model...


  saving_api.save_model(


Model saved as incident_report_produced.h5
✅ Performance metrics for 'incident_report_produced' collected.
Confusion matrix plot saved as confusion_matrix_incident_report_produced.svg
Detailed report saved as report_incident_report_produced.xlsx
----------------------------------------

✅ All performance metrics saved to 'model_performance_metrics.xlsx'.

All tasks complete.
