### Importing key libraries

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import optuna
# from sklearn.utils.class_weight import compute_class_weight
# from sklearn.metrics import (confusion_matrix, ConfusionMatrixDisplay, classification_report)

# Set seeds for reproducibility
import random
random.seed(42)
tf.random.set_seed(42)

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

# tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # supress tensorflow warnings
gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
config = tf.compat.v1.ConfigProto(gpu_options=gpu_options)
session = tf.compat.v1.Session(config=config)

### Hyperparameters

In [None]:
IMG_SIZE = 224
BATCH_SIZE = 32

### Dataset splitting

In [None]:
!pip install -q split-folders

In [None]:
import splitfolders

dataset_dir = r'dataset'
output_dir = r'splitted_data'

# Make output dir if doesn't exist, else skip
os.makedirs(output_dir, exist_ok=True)

splitfolders.ratio(dataset_dir, output=output_dir, seed=1337, ratio=(0.75, 0.15, 0.1))
print(f"Dataset successfully splitted into: {os.listdir(output_dir)}")

### Dataset config

In [None]:
def helper_ds(partition, shuffle_status=True):
    return tf.keras.utils.image_dataset_from_directory(
        directory=os.path.join(output_dir, partition),
        image_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        seed=1337,
        labels='inferred',
        label_mode='binary',
        shuffle=shuffle_status
    )

train_ds = helper_ds('train')
test_ds = helper_ds('test', shuffle_status=False)
val_ds = helper_ds('val')

### Data visualization

In [None]:
plt.figure(figsize=(10, 8))
for images, labels in train_ds.take(1):
    for i in range(12):
        ax = plt.subplot(3, 4, i + 1)
        plt.imshow(np.array(images[i]).astype("uint8"))
        state = "active" if int(labels[i]) else "drowsy"
        plt.title(state)
        plt.axis("off")
        plt.tight_layout()

In [None]:
class_names = train_ds.class_names
print(f"Class names: {class_names}")

### Data augmentation example

In [None]:
data_aug = [
    tf.keras.layers.RandomFlip("horizontal", input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.2),
    tf.keras.layers.RandomContrast(0.2),
    tf.keras.layers.RandomBrightness([-0.3, 0.1])
]

def data_augmentation(images):
    for layer in data_aug:
        images = layer(images)
    return images

plt.figure(figsize=(10, 6))
for images, _ in train_ds.take(1):
    for i in range(8):
        augmented_images = data_augmentation(images)
        ax = plt.subplot(2, 4, i + 1)
        plt.imshow(np.array(augmented_images[0]).astype("uint8"))
        plt.title(f"augmented {i+1}")
        plt.axis("off")
        plt.tight_layout()

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

# Normalize pixel values
# normalization_layer = tf.keras.layers.Rescaling(1./255)
# train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y), num_parallel_calls=AUTOTUNE)
# val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y), num_parallel_calls=AUTOTUNE)
# test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y), num_parallel_calls=AUTOTUNE)

# Enable prefetching and shuffling
train_ds = train_ds.shuffle(buffer_size=10000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

In [None]:
# Retain class names after applying map
train_ds.class_names = class_names
print(train_ds.class_names)

In [None]:
# Double checking if train_ds exists and has class_names
if hasattr(train_ds, 'class_names'):
    print("Class Names:", train_ds.class_names)
else:
    print("Error: train_ds does not have class_names. Verify dataset creation.")

### Stacking models

In [None]:
def stacked_model(input_shape=[IMG_SIZE, IMG_SIZE, 3], LEARNING_RATE=0.0001, DROPOUT_PROB=0.3, OPTIMIZER='Adam'):
    optim = None
    
    d_net = tf.keras.applications.DenseNet121(include_top=False, weights='imagenet', input_shape=input_shape)
    e_net = tf.keras.applications.EfficientNetV2B2(include_top=False, weights='imagenet', input_shape=input_shape)
    m_net = tf.keras.applications.MobileNetV2(include_top=False, weights='imagenet', input_shape=input_shape)

    # Freeze the models
    d_net.trainable=False; e_net.trainable=False; m_net.trainable=False

    # Input layer
    inputs = tf.keras.Input(shape=input_shape)

    # Data augmentation
    data_augmentation = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3)),
        tf.keras.layers.RandomFlip("horizontal"),
        tf.keras.layers.RandomRotation(0.2),
        tf.keras.layers.RandomZoom(0.2),
        tf.keras.layers.RandomContrast(0.2),
        tf.keras.layers.RandomBrightness([-0.3, 0.1])
    ])

    augmented_inputs = data_augmentation(inputs)

    # Extracted features
    d_net_features = d_net(augmented_inputs)
    e_net_features = e_net(augmented_inputs)
    m_net_features = m_net(augmented_inputs)

    # Global average pooling
    d_net_pooling = tf.keras.layers.GlobalAveragePooling2D()(d_net_features)
    e_net_pooling = tf.keras.layers.GlobalAveragePooling2D()(e_net_features)
    m_net_pooling = tf.keras.layers.GlobalAveragePooling2D()(m_net_features)

    # Combine outputs
    combined_outputs = tf.keras.layers.concatenate([d_net_pooling, e_net_pooling])
    # REFERENCE: https://stackoverflow.com/a/71170687/23011800
    outputs_pre = tf.keras.layers.Dense(1024, activation='relu')(combined_outputs)
    outputs_pre = tf.keras.layers.Dropout(DROPOUT_PROB)(outputs_pre)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(outputs_pre)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    if OPTIMIZER == 'Adam':
        optim = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
    elif OPTIMIZER == 'RMSProp':
        optim = tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE)
    elif OPTIMIZER == 'SGD':
        optim = tf.keras.optimizers.SGD(learning_rate=LEARNING_RATE)
        
    model.compile(optimizer=optim,
                 loss='binary_crossentropy',
                 metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')])
    return model

### Defining callbacks

In [None]:
# early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)
# lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=LR_FACTOR, patience=LR_PATIENCE, min_lr=MIN_LR)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint("checkpoints/best_model.keras", monitor='val_loss', save_best_only=True, verbose=1)

# Custom training curve callback
from IPython.display import clear_output

train_losses=[]; val_losses=[]; precision_scores=[]; recall_scores=[]

class TrainingCurveCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        train_losses.append(logs['loss'])
        val_losses.append(logs['val_loss'])
        precision_scores.append(logs['precision'])
        recall_scores.append(logs['recall'])

        clear_output(wait=True) # clear output before plotting

        fig, ax1 = plt.subplots(figsize=(10, 5)) # create figure; will contain loss/accuracy curves
        fig.suptitle('Training Curves')

        # loss curve (ax1 - left y axis)
        ax1.plot(train_losses, label='Train Loss')
        ax1.plot(val_losses, label='Validation Loss')
        ax1.set_xlabel('Epoch')
        ax1.set_xlim(left=0)
        ax1.set_ylabel('Loss function')

        # determine left y axis range from min/max loss values
        y_ax1_min = min(min(train_losses), min(val_losses)) # find lowest loss value across both curves
        y_ax1_min = max(0, y_ax1_min - 0.1) # add some padding to the bottom of the plot. lower bound can't be less than 0
        y_ax1_max = max(max(train_losses), max(val_losses)) + 0.1# find highest loss value across both curves, add some padding to the top of the plot
        ax1.set_ylim(y_ax1_min, y_ax1_max) # set y axis limits

        # Best epoch (smallest loss)
        best_epoch = int(np.argmin(train_losses))
        best_loss = train_losses[best_epoch]

        # add veritcal line for best epoch
        ax1.vlines(best_epoch, ymin=y_ax1_min, ymax=y_ax1_max, linestyles='dashed', colors='black',
                   label=f'best epoch={best_epoch}\nloss={best_loss:.3f}')
        ax1.legend(loc='upper left')

        # create right y axis for precision/recall curves
        ax2 = ax1.twinx()

        # Plot precision/recall (right y-axis)
        ax2.plot(precision_scores, label=f'Precision\n{precision_scores[best_epoch]:.3f} @ {best_epoch}', color='red')
        ax2.plot(recall_scores, label=f'Recall\n{recall_scores[best_epoch]:.3f} @ {best_epoch}', color='green')
        ax2.set_ylabel('Precision / Recall')
        ax2.set_ylim(0, 1)
        ax2.legend(loc='upper right')
        plt.tight_layout()
        plt.show()

### Initializing the model

In [None]:
ensemble_model = stacked_model()

### Model summary (optional)

In [None]:
# ensemble_model.summary()

### Training the model

In [None]:
def objective(trial):
    params = {
        'NUM_EPOCHS': trial.suggest_int('NUM_EPOCHS', 10, 100, step=10),
        'LEARNING_RATE': trial.suggest_float('LEARNING_RATE', 1e-5, 1e-3, log=True),
        'OPTIMIZER': trial.suggest_categorical('OPTIMIZER', ["Adam", "RMSprop", "SGD"]),
        'BATCH_SIZE': trial.suggest_categorical('BATCH_SIZE', [16, 32, 64, 128]),
        'DROPOUT_PROB': trial.suggest_float('DROPOUT_PROB', 0.1, 0.7, step=0.1),
        'PATIENCE': trial.suggest_int('PATIENCE', 3, 10, step=1),
        'LR_PATIENCE': trial.suggest_int('LR_PATIENCE', 3, 10, step=1),
    }
    model = stacked_model(LEARNING_RATE=params['LEARNING_RATE'],
                         DROPOUT_PROB=params['DROPOUT_PROB'],
                         OPTIMIZER=params['OPTIMIZER'])
    
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=params['PATIENCE'], restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=params['LR_PATIENCE'], min_lr=1e-6)

    history = ensemble_model.fit(train_ds, validation_data=val_ds, epochs=params['NUM_EPOCHS'],
                             callbacks=[early_stopping, reduce_lr, TrainingCurveCallback()], shuffle=True)

    return min(history.history['val_loss'])

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

In [None]:
# history = ensemble_model.fit(train_ds, validation_data=val_ds, epochs=NUM_EPOCHS,
#                              callbacks=[early_stopping, lr_scheduler, model_checkpoint, TrainingCurveCallback()], shuffle=True)