In [21]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from glob import glob
from PIL import ImageFile
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import seaborn as sns

In [22]:
# Set random seeds for reproducibility
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Suppress PIL warnings
ImageFile.LOAD_TRUNCATED_IMAGES = True  # Handle truncated images

In [23]:
# Constants
ROOT_DIR = "Datasets/corrected_wildfires_dataset"
IMAGE_SIZE = (224, 224)
NUM_CLASSES = 2
EPOCHS = 10
NUM_FOLDS = 10
BATCH_SIZE = 32

In [24]:
# Prepare dataset and labels
image_paths = glob(f"{ROOT_DIR}/*/*.jpg")
class_labels = {"fire": 0, "nofire": 1}

# Filter image paths and extract labels
filtered_image_paths = []
labels = []
for p in image_paths:
    label_name = os.path.basename(os.path.dirname(p))
    if label_name in class_labels:
        filtered_image_paths.append(p)
        labels.append(class_labels[label_name])

# Convert to numpy arrays
image_paths = np.array(filtered_image_paths)
labels = np.array(labels)

# Check class distribution before balancing
unique, counts = np.unique(labels, return_counts=True)
class_counts = dict(zip(unique, counts))
print("Class distribution before balancing:", class_counts)

Class distribution before balancing: {0: 469, 1: 990}


In [25]:
# Balance the classes by undersampling the majority class
min_count = min(class_counts.values())

balanced_image_paths = []
balanced_labels = []

for class_label in np.unique(labels):
    class_indices = np.where(labels == class_label)[0]
    sampled_indices = np.random.choice(class_indices, min_count, replace=False)
    balanced_image_paths.extend(image_paths[sampled_indices])
    balanced_labels.extend([class_label] * min_count)

# Convert to numpy arrays
balanced_image_paths = np.array(balanced_image_paths)
balanced_labels = np.array(balanced_labels)

# Shuffle the balanced dataset
indices = np.arange(len(balanced_image_paths))
np.random.shuffle(indices)
balanced_image_paths = balanced_image_paths[indices]
balanced_labels = balanced_labels[indices]

# Update image_paths and labels to the balanced dataset
image_paths = balanced_image_paths
labels = balanced_labels

# Check class distribution after balancing
unique, counts = np.unique(labels, return_counts=True)
class_counts = dict(zip(unique, counts))
print("Class distribution after balancing:", class_counts)

Class distribution after balancing: {0: 469, 1: 469}


In [26]:
# Split data into training and validation sets
train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.2, stratify=labels, random_state=seed
)

# Create reverse mapping from label indices to class names
rev_class_labels = {v: k for k, v in class_labels.items()}

# Create DataFrames for generators with string labels
train_df = pd.DataFrame({
    'filename': train_paths,
    'class': [rev_class_labels[label] for label in train_labels]
})

val_df = pd.DataFrame({
    'filename': val_paths,
    'class': [rev_class_labels[label] for label in val_labels]
})


In [27]:
# Data augmentation and normalization
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    horizontal_flip=True,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    brightness_range=[0.9, 1.1]
)

val_datagen = ImageDataGenerator(rescale=1./255)

In [28]:
# Define diverse architectures
architectures = [
     {
        'name': 'Arch1',
        'num_conv_layers': 1,
        'num_filters': [32],
        'kernel_sizes': [3],
        'fc_units': 64,
        'dropout_rate': 0.2,
    },
    {
        'name': 'Arch2',
        'num_conv_layers': 2,
        'num_filters': [32, 64],
        'kernel_sizes': [3, 3],
        'fc_units': 128,
        'dropout_rate': 0.3,
    },
     {
        'name': 'Arch3',
        'num_conv_layers': 2,
        'num_filters': [64, 128],
        'kernel_sizes': [5, 5],
        'fc_units': 256,
        'dropout_rate': 0.5,
    },
      {
        'name': 'Arch4',
        'num_conv_layers': 2,
        'num_filters': [128, 256],
        'kernel_sizes': [5, 5],
        'fc_units': 512,
        'dropout_rate': 0.5,
    },
       {
        'name': 'Arch5',
        'num_conv_layers': 2,
        'num_filters': [64, 64],
        'kernel_sizes': [3, 3],
        'fc_units': 128,
        'dropout_rate': 0.3,
    },
    {
        'name': 'Arch6',
        'num_conv_layers': 3,
        'num_filters': [32, 64, 128],
        'kernel_sizes': [3, 3, 3],
        'fc_units': 128,
        'dropout_rate': 0.3,
    },
   
    {
        'name': 'Arch7',
        'num_conv_layers': 3,
        'num_filters': [64, 128, 256],
        'kernel_sizes': [3, 3, 3],
        'fc_units': 256,
        'dropout_rate': 0.5,
    },
   {
        'name': 'Arch8',
        'num_conv_layers': 3,
        'num_filters': [32, 64, 64],
        'kernel_sizes': [5, 5, 5],
        'fc_units': 128,
        'dropout_rate': 0.4,
    },
   
    {
        'name': 'Arch9',
        'num_conv_layers': 3,
        'num_filters': [64, 128, 256],
        'kernel_sizes': [5, 3, 3],
        'fc_units': 256,
        'dropout_rate': 0.3,
    },
    {
        'name': 'Arch10',
        'num_conv_layers': 4,
        'num_filters': [32, 64, 128, 256],
        'kernel_sizes': [5, 3, 3, 3],
        'fc_units': 128,
        'dropout_rate': 0.5,
    },
     {
        'name': 'Arch11',
        'num_conv_layers': 4,
        'num_filters': [32, 64, 128, 256],
        'kernel_sizes': [3, 3, 3, 3],
        'fc_units': 128,
        'dropout_rate': 0.4,
    },
   
    
]

In [29]:
def create_model(arch):
    model = models.Sequential(name=arch['name'])
    input_shape = (IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
    model.add(layers.InputLayer(shape=input_shape))  # Updated line
    for i in range(arch['num_conv_layers']):
        filters = arch['num_filters'][i]
        kernel_size = arch['kernel_sizes'][i]
        model.add(layers.Conv2D(filters, kernel_size, padding='same', activation='relu'))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(arch['fc_units'], activation='relu'))
    model.add(layers.Dropout(arch['dropout_rate']))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model


In [30]:
# Evaluate architectures
results = []
learning_rate = 0.001

print("\nStarting Architecture Evaluation...")
for arch in architectures:
    print(f"\nEvaluating Architecture: {arch['name']}")
    model = create_model(arch)
    model.compile(optimizer=optimizers.Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    # Create generators for this architecture
    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col='filename',
        y_col='class',
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary'
    )
    
    val_generator = val_datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col='filename',
        y_col='class',
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary',
        shuffle=False
    )
    
    # Implement early stopping
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    
    # Training
    history = model.fit(
        train_generator,
        epochs=EPOCHS,
        validation_data=val_generator,
        callbacks=[early_stopping],
        verbose=1
    )
    
    # Evaluation
    val_generator.reset()
    preds = model.predict(val_generator)
    y_pred = (preds > 0.5).astype(int).flatten()
    y_true = val_generator.classes
    
    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    
    # Compute metrics
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0  # Sensitivity
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0.0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    roc_auc = roc_auc_score(y_true, preds) if not np.isnan(preds).any() else 0.0
    
    # Print metrics
    print(f"\nMetrics for Architecture {arch['name']}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"TP: {tp}, FP: {fp}, TN: {tn}, FN: {fn}")
    print(f"Sensitivity (Recall): {recall:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Area Under ROC Curve: {roc_auc:.4f}")
    
    # Store results
    results.append({
        'architecture': arch['name'],
        'model': model,
        'accuracy': accuracy,
        'tp': tp,
        'fp': fp,
        'tn': tn,
        'fn': fn,
        'sensitivity': recall,
        'specificity': specificity,
        'precision': precision,
        'f1_score': f1,
        'roc_auc': roc_auc
    })


Starting Architecture Evaluation...

Evaluating Architecture: Arch1
Found 750 validated image filenames belonging to 2 classes.
Found 188 validated image filenames belonging to 2 classes.




Epoch 1/10


  self._warn_if_super_not_called()
2024-11-20 16:12:31.540979: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:2: Filling up shuffle buffer (this may take a while): 3 of 8
2024-11-20 16:12:41.554865: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:2: Filling up shuffle buffer (this may take a while): 5 of 8
2024-11-20 16:12:59.105016: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m185s[0m 6s/step - accuracy: 0.6261 - loss: 20.7845 - val_accuracy: 0.7713 - val_loss: 0.5665
Epoch 2/10


2024-11-20 16:15:37.541424: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:2: Filling up shuffle buffer (this may take a while): 3 of 8
2024-11-20 16:15:53.278604: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:2: Filling up shuffle buffer (this may take a while): 6 of 8
2024-11-20 16:16:03.608045: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 6s/step - accuracy: 0.6846 - loss: 1.5306 - val_accuracy: 0.5160 - val_loss: 0.6883
Epoch 3/10


2024-11-20 16:18:25.140126: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:2: Filling up shuffle buffer (this may take a while): 2 of 8
2024-11-20 16:18:37.110531: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:2: Filling up shuffle buffer (this may take a while): 4 of 8
2024-11-20 16:18:49.466037: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:2: Filling up shuffle buffer (this may take a while): 6 of 8
2024-11-20 16:19:04.902539: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 6s/step - accuracy: 0.6507 - loss: 1.0013 - val_accuracy: 0.6277 - val_loss: 0.6494
Epoch 4/10


2024-11-20 16:21:24.910401: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:2: Filling up shuffle buffer (this may take a while): 2 of 8
2024-11-20 16:21:36.109292: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:2: Filling up shuffle buffer (this may take a while): 4 of 8
2024-11-20 16:21:55.180584: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 5s/step - accuracy: 0.6652 - loss: 0.7444 - val_accuracy: 0.6809 - val_loss: 0.6097
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 6s/step

Metrics for Architecture Arch1:
Accuracy: 0.7713
TP: 90, FP: 39, TN: 55, FN: 4
Sensitivity (Recall): 0.9574
Specificity: 0.5851
Precision: 0.6977
F1 Score: 0.8072
Area Under ROC Curve: 0.9056

Evaluating Architecture: Arch2
Found 750 validated image filenames belonging to 2 classes.
Found 188 validated image filenames belonging to 2 classes.




Epoch 1/10


  self._warn_if_super_not_called()
2024-11-20 16:25:10.274162: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:24: Filling up shuffle buffer (this may take a while): 3 of 8
2024-11-20 16:25:22.841603: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:24: Filling up shuffle buffer (this may take a while): 5 of 8
2024-11-20 16:25:39.353301: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


KeyboardInterrupt: 

In [None]:
# Create a DataFrame with the results
results_df = pd.DataFrame(results)
print("\nArchitectures Evaluation Results:")
print(results_df[['architecture', 'accuracy', 'precision', 'sensitivity', 'specificity', 'f1_score', 'roc_auc']])

# Find the best architecture based on your preferred metric (e.g., accuracy)
best_architecture_name = results_df.sort_values(by='accuracy', ascending=False).iloc[0]['architecture']
best_model_info = results_df[results_df['architecture'] == best_architecture_name].iloc[0]
best_model = best_model_info['model']
best_architecture = next(arch for arch in architectures if arch['name'] == best_architecture_name)

print(f"\nBest Architecture: {best_architecture_name}")

In [None]:
# Stratified K-Fold Cross Validation on Best Architecture
print(f"\nConducting K-Fold Validation on Best Architecture: {best_architecture_name}")

kfold = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=seed)
fold_metrics = []

for fold, (train_idx, val_idx) in enumerate(kfold.split(image_paths, labels)):
    print(f"\nFold {fold+1}/{NUM_FOLDS}")
    train_paths_fold, val_paths_fold = image_paths[train_idx], image_paths[val_idx]
    train_labels_fold, val_labels_fold = labels[train_idx], labels[val_idx]
    
    # Create DataFrames for generators with string labels
    train_df_fold = pd.DataFrame({
        'filename': train_paths_fold,
        'class': [rev_class_labels[label] for label in train_labels_fold]
    })

    val_df_fold = pd.DataFrame({
        'filename': val_paths_fold,
        'class': [rev_class_labels[label] for label in val_labels_fold]
    })
    
    # Create generators
    train_generator_fold = train_datagen.flow_from_dataframe(
        dataframe=train_df_fold,
        x_col='filename',
        y_col='class',
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary'
    )
    
    val_generator_fold = val_datagen.flow_from_dataframe(
        dataframe=val_df_fold,
        x_col='filename',
        y_col='class',
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary',
        shuffle=False
    )
    
    # Create model
    model = create_model(best_architecture)
    model.compile(optimizer=optimizers.Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    # Implement early stopping
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    
    # Training
    history = model.fit(
        train_generator_fold,
        epochs=EPOCHS,
        validation_data=val_generator_fold,
        callbacks=[early_stopping],
        verbose=1
    )
    
    # Evaluation
    val_generator_fold.reset()
    preds = model.predict(val_generator_fold)
    y_pred = (preds > 0.5).astype(int).flatten()
    y_true = val_generator_fold.classes
    
    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    
    # Compute metrics
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0  # Sensitivity
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0.0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    roc_auc = roc_auc_score(y_true, preds) if not np.isnan(preds).any() else 0.0
    
    # Print metrics
    print(f"\nMetrics for Fold {fold+1}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"TP: {tp}, FP: {fp}, TN: {tn}, FN: {fn}")
    print(f"Sensitivity (Recall): {recall:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Area Under ROC Curve: {roc_auc:.4f}")
    
    # Store metrics
    fold_metrics.append({
        'fold': fold + 1,
        'accuracy': accuracy,
        'tp': tp,
        'fp': fp,
        'tn': tn,
        'fn': fn,
        'sensitivity': recall,
        'specificity': specificity,
        'precision': precision,
        'f1_score': f1,
        'roc_auc': roc_auc
    })

# Average metrics over folds
kfold_results_df = pd.DataFrame(fold_metrics)
avg_metrics = kfold_results_df.mean()
print("\nK-Fold Cross-Validation Results:")
print(kfold_results_df[['fold', 'accuracy', 'precision', 'sensitivity', 'specificity', 'f1_score', 'roc_auc']])
print("\nAverage Metrics over all folds:")
print(avg_metrics[['accuracy', 'precision', 'sensitivity', 'specificity', 'f1_score', 'roc_auc']])


In [None]:
# Hyperparameter tuning on the best architecture
print("\nPerforming Hyperparameter Tuning on the Best Architecture")

# Define hyperparameters for tuning
learning_rates = [0.0001, 0.001, 0.01]
dropout_rates = [0.3, 0.5]
batch_sizes = [16, 32, 64]

tuning_results = []

from itertools import product

for lr, dr, bs in product(learning_rates, dropout_rates, batch_sizes):
    print(f"\nEvaluating Hyperparameters: LR={lr}, Dropout={dr}, Batch Size={bs}")
    arch = best_architecture.copy()
    arch['dropout_rate'] = dr
    
    # Adjust batch size and create generators
    train_generator_tune = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col='filename',
        y_col='class',
        target_size=IMAGE_SIZE,
        batch_size=bs,
        class_mode='binary'
    )

    val_generator_tune = val_datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col='filename',
        y_col='class',
        target_size=IMAGE_SIZE,
        batch_size=bs,
        class_mode='binary',
        shuffle=False
    )
    
    # Create model
    model = create_model(arch)
    model.compile(optimizer=optimizers.Adam(learning_rate=lr),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    # Implement early stopping
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    
    # Training
    history = model.fit(
        train_generator_tune,
        epochs=EPOCHS,
        validation_data=val_generator_tune,
        callbacks=[early_stopping],
        verbose=1
    )
    
    # Evaluation
    val_generator_tune.reset()
    preds = model.predict(val_generator_tune)
    y_pred = (preds > 0.5).astype(int).flatten()
    y_true = val_generator_tune.classes
    
    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    
    # Compute metrics
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0  # Sensitivity
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0.0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    roc_auc = roc_auc_score(y_true, preds) if not np.isnan(preds).any() else 0.0
    
    # Print metrics
    print(f"\nMetrics for Hyperparameters LR={lr}, Dropout={dr}, Batch Size={bs}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"TP: {tp}, FP: {fp}, TN: {tn}, FN: {fn}")
    print(f"Sensitivity (Recall): {recall:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Area Under ROC Curve: {roc_auc:.4f}")
    
    # Store results
    tuning_results.append({
        'learning_rate': lr,
        'dropout_rate': dr,
        'batch_size': bs,
        'accuracy': accuracy,
        'tp': tp,
        'fp': fp,
        'tn': tn,
        'fn': fn,
        'sensitivity': recall,
        'specificity': specificity,
        'precision': precision,
        'f1_score': f1,
        'roc_auc': roc_auc
    })

# Create a DataFrame with the tuning results
tuning_results_df = pd.DataFrame(tuning_results)
print("\nHyperparameter Tuning Results:")
print(tuning_results_df[['learning_rate', 'dropout_rate', 'batch_size', 'accuracy', 'precision', 'sensitivity', 'specificity', 'f1_score', 'roc_auc']])

# Find the best hyperparameters based on your preferred metric (e.g., highest specificity with high accuracy)
best_tuning_index = tuning_results_df.sort_values(by=['specificity', 'accuracy'], ascending=False).index[0]
best_hyperparams = tuning_results_df.loc[best_tuning_index]
print(f"\nBest Hyperparameters:")
print(best_hyperparams)

In [None]:
# Final Training with Best Hyperparameters on Full Dataset
print("\nTraining Final Model with Best Hyperparameters on Full Dataset")

# Update architecture and hyperparameters
final_architecture = best_architecture.copy()
final_architecture['dropout_rate'] = best_hyperparams['dropout_rate']
final_learning_rate = best_hyperparams['learning_rate']
final_batch_size = best_hyperparams['batch_size']

# Create DataFrame with string labels
full_df = pd.DataFrame({
    'filename': image_paths,
    'class': [rev_class_labels[label] for label in labels]
})

# Create generator on full dataset
train_generator_full = train_datagen.flow_from_dataframe(
    dataframe=full_df,
    x_col='filename',
    y_col='class',
    target_size=IMAGE_SIZE,
    batch_size=final_batch_size,
    class_mode='binary'
)

# Create model
final_model = create_model(final_architecture)
final_model.compile(optimizer=optimizers.Adam(learning_rate=final_learning_rate),
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

# Implement early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

# Training
history = final_model.fit(
    train_generator_full,
    epochs=EPOCHS,
    callbacks=[early_stopping],
    verbose=1
)

# Save the final model
final_model.save('final_model.h5')

In [None]:
# Evaluate Final Model on Training Data
train_generator_full.reset()
preds = final_model.predict(train_generator_full)
y_pred = (preds > 0.5).astype(int).flatten()
y_true = train_generator_full.classes

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
tn, fp, fn, tp = cm.ravel()

# Compute metrics
accuracy = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0  # Sensitivity
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0.0
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
roc_auc = roc_auc_score(y_true, preds) if not np.isnan(preds).any() else 0.0

# Print metrics
print("\nFinal Model Evaluation Metrics on Full Dataset:")
print(f"Accuracy: {accuracy:.4f}")
print(f"TP: {tp}, FP: {fp}, TN: {tn}, FN: {fn}")
print(f"Sensitivity (Recall): {recall:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"Precision: {precision:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Area Under ROC Curve: {roc_auc:.4f}")

In [None]:
# Plot Training Accuracy and Loss over Time
plt.figure(figsize=(12, 5))

# Accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.title('Training Accuracy Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.title('Training Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()


In [None]:
# Plot Confusion Matrix

plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No Fire', 'Fire'], yticklabels=['No Fire', 'Fire'])
plt.title('Confusion Matrix on Full Dataset')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()