# 1. Age Estimator

### 1.6 Core Adjustments of the CNN baseline models

(a) Import libraries

(b) Load the saved training/validation/testing datasets

(c) Check GPU availability with TensorFlow

(d) Decision on what hyperparameters to tune for the baeline CNN models

(e) Define reusable CNN building functions for Regression amd Classification

(f) Define a function to free up memory for GPU after model training

(g) Define 2 functions to create 2 sfuffled datasets for tuning

(h) Define 2 functions to create 2 sfuffle datasets for final training and evaluation

In [1]:
# (a) Import libraries
# --------------------------------------------------------------------
import tensorflow as tf                             # includes Keras and all its functionalities. It is used for building and training neural networks models such as CNNs.

# Enable GPU memory growth before importing other libraries
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)     # Prevent TensorFlow from allocating all GPU memory at once, avoiding out-of-memory errors (= crashes during training)

# Clear previous sessions to free up resources (before starting new model training) to avoid potential out-of-memory errors (= crashes during training)
tf.keras.backend.clear_session()

import gc                           # for garbage collection to free up memory

import matplotlib.pyplot as plt     # for creating visualizations in Python.
import pandas as pd                 # for data manipulation and analysis, particularly for working with DataFrames.  
import numpy as np                  # for numerical computing (e.g. arrays and  mathematical functions).
import os, pathlib                  # for interacting with the operating system and handling file paths.


# from tensorflow import keras                      # not needed since tf includes keras
# from tensorflow.keras import layers, models       # not needed since tf includes keras

# --------------------------------------------------------------------

In [2]:
# (b) Load the saved training/validation/testing datasets
# --------------------------------------------------------------------

# training/validation/testing datasets for normalised images (Feature inputs: pixel values)
X_train_norm = np.load('X_train_norm.npy')              # scaled images between 0 and 1 
X_val_norm = np.load('X_val_norm.npy')
X_test_norm = np.load('X_test_norm.npy')

# training/validation/testing datasets for regression (Target features labels: age in years)
y_train_reg = np.load('y_train_reg.npy')                # converted from default int64 to float32
y_val_reg = np.load('y_val_reg.npy')
y_test_reg = np.load('y_test_reg.npy')

# training/validation/testing datasets for classification (Target features labels: age groups)
y_train_cls_ohe = np.load('y_train_cls_ohe.npy')        # transformed from integer labels to one-hot encoded format (2D arrays with binary values for each class)
y_val_cls_ohe = np.load('y_val_cls_ohe.npy')
y_test_cls_ohe = np.load('y_test_cls_ohe.npy')
# --------------------------------------------------------------------

# to reduce memory usage, convert X data splits to float16
X_train_norm = X_train_norm.astype('float16')
X_val_norm   = X_val_norm.astype('float16')
X_test_norm  = X_test_norm.astype('float16')

In [None]:
# (c) Check GPU availability with TensorFlow
# --------------------------------------------------------------------

# Print TF version
print("TensorFlow version:", tf.__version__)

# List available GPUs
gpus = tf.config.list_physical_devices('GPU')
print("GPUs available:", gpus)

# Check if built with CUDA
print("Built with CUDA:", tf.test.is_built_with_cuda())
# ---------------------------------------------------------------------

# Check GPU availability with PyTorch
#import torch
#print(torch.cuda.is_available())
#print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")

# ---------------------------------------------------------------------

# NOTE on GPU usage with TensorFlow in this environment:

# TensorFlow cannot use the GPU in the current environment apparently for 2 reasons:
# 1. Python 3.11 is installed, but the last Windows GPU-enabled TensorFlow (2.10) only supports Python 3.7–3.9. So TF 2.10 cannot be installed.
# 2. TensorFlow 2.11+ removed all GPU support for Windows entirely. # Result: Any TensorFlow version installed in this environment is CPU-only.

# ---> As TensorFlow will be use for this assignment,the solution is to create a new conda environment with Python 3.9 and install TensorFlow 2.10 with GPU support

# On Anaconda Prompt:
# conda create -n adl_tf_gpu_env python=3.9 -y
# conda activate adl_tf_gpu_env
# pip install tensorflow==2.10
# pip install matplotlib pandas scikit-learn

# Separately:
# download and install CUDA Toolkit 11.2 and cuDNN 8.1.0 for CUDA 11.2 from NVIDIA website.

TensorFlow version: 2.10.0
GPUs available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Built with CUDA: True


In [None]:
# (d) Decision on what hyperparameters to tune with TensorFlow for the baeline CNN models both for regression and classification
# --------------------------------------------------------------------

# For the baseline model, we should only tune the hyperparameters that influence training stability and general model behaviour, 
# avoiding tuning anything that would change the model architecture or its capacity to learn complex patterns.

# Therefore, we will tune the following basic hyperparameters:

# 1. Learning Rate: This controls how much to change the model in response to the estimated error each time the model weights are updated.
# 2. Batch Size: This defines the number of samples that will be propagated through the network at once.
# 3. Number of Epochs: This defines the number of complete passes through the training dataset.
# 4. Optimizer type: Different optimizers can have different effects on training dynamics.
# 5. Dropout Rate: If dropout layers are used, tuning the dropout rate can help prevent overfitting.
# 6. Early Stopping Patience: This defines how many epochs to wait for an improvement in validation loss before stopping training early.

# For the baseline model, we will NOT tune:

# CNN Kernel Variations: 
    # 1. Activation Functions: We will keep the default ReLU for hidden layers and linear/softmax for output layers (as per regression/classification).
    # 2. Kernel size: We will keep the default kernel sizes for convolutional layers (3x3).

# Network Architecture Variations:
    # 3. Network Depth and Width: We will keep the default number of layers and number of filters/neurons per layer (32 and 64 filters (unique type of image features detectable) in conv layers; 128 neurons in dense layers).
    # 4. Pooling presence Vs absence: We will keep the default max pooling layers after convolutional layers (2x2).
    # 5. Skip connections presence Vs absence: We will not add skip connections in the baseline model.
    # 6. Fully connected layers variations: We will keep the default of 2 dense layers after flattening the conv layers output (hidden dense layer with 128 neurons and the output layer).

# Dataset Variations:
    # 7. 1 vs 3 channel input images: We will use 3-channel RGB images for the baseline model.
    # 8. Data Augmentation: We will not apply data augmentation techniques for the baseline model.
# --------------------------------------------------------------------


# --------------------------------------------------------------------
# Hyperparameter ranges for tuning
# --------------------------------------------------------------------
# Learning rate: Common starting point for Adam is 1e-3(0.001); for SGD usually 1e-2(0.01) to 1e-4(0.0001)

# Batch size: 32-128 is common; larger batches require more GPU memory
# https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam
# https://www.tensorflow.org/tutorials/keras/overfit_and_underfit

# Epochs: 50-200 is typical; early stopping is used to prevent overfitting.

# Dropout rate: 0.3-0.5 is common for dense layers
# https://www.tensorflow.org/tutorials/keras/overfit_and_underfit

# EarlyStopping patience: 3-10 epochs is standard for validation monitoring

# Optimizer: 'adam' (adaptive) vs 'sgd' (classic) are standard choices for CNNs

learning_rates = [1e-4, 1e-3, 1e-2]     # Tunable learning rates
batch_sizes = [32, 64, 128]             # Tunable batch sizes
dropout_rates = [0.3, 0.5]              # Dropout rates for regularization
optimizers = ['adam']                   # It is decided to only use 'adam' optimizer for tuning to reduce computational load (also sgd requires more careful learning rate tuning)
early_stopping_patiences = [3, 5, 8]    # Patience for EarlyStopping
num_epochs_options = [50, 80, 120]      # Max number of epochs to train


In [None]:
# (e) Define 2 functions to build the baseline CNN model for regression and classification
# --------------------------------------------------------------------

# In CNNs, convolutional layers with ReLU activation are typically followed by pooling layers to reduce spatial dimensions and extract dominant features (such as edges, textures, shapes) from images.
# After 2 conv+pooling layers, the 2D feature maps are flattened into 1D feature vectors to be processed by fully connected (dense) layers.
# Dropout layers are usually added before the output layer to prevent overfitting by randomly dropping a fraction of neurons during training.

    # Convolutional layers use small learnable filters (kernels), 3×3 matrices, which slide over the input image to detect visual features.
        # Each filter has its own set of weights and learns to detect a specific pattern (edges, textures, shapes, etc.).

        # ReLU (Rectified Linear Unit) activation is used to introduce non-linearity, allowing the model to learn non-linear patterns (curves, edges, textures, shape)).
    
    # Pooling layers (2x2 matrices of max values of previously learned weights)reduce the spatial dimensions of feature maps, retaining the most important features 
        # while reducing computational load and helping to prevent overfitting. 

    # Flatten layers convert 2D feature maps (the output of the last convolutional/pooling layer) into 1D vectors for input into dense layers.

    # Dense layers learn high-level representations and relationships between features (such as combinations of edges, textures, shapes) extracted by convolutional layers. 
        # Each layer is fully connected to the previous layer, allowing complex combinations of features to be learned.
        # These layers operate on abstracted features rather than raw pixels.

    # Dropout layers randomly deactivate a fraction (specified by the dropout rate) of neurons (the less important ones based on their weights) during training.

    # The output layer, for regression tasks, has a single neuron with linear activation to predict continuous values (between 0 and 110 for age).
    # The output layer, for classification tasks, has neurons equal to the number of classes with softmax activation to output class probabilities (between 0 and 1 for each class).

def build_base_cnn_reg(input_shape=(128, 128, 3), dropout_rate=None):
    """
    Builds a base CNN model for regression (predicting age).
    
    Layers:
    - Conv2D + MaxPooling2D for feature extraction of images
    - Flatten the 2D feature maps into 1D feature vectors
    - Dense 128 neurons + Dropout (before the output layer) for regularization (adding constraints to a model to reduce overfitting and improve its ability to generalize to new, unseen data)
    - Dense 1 neuron for regression output
    
    Args:
        input_shape (tuple): Shape of input images (height, width, channels)
        dropout_rate (float): Dropout rate to prevent overfitting
        
    Returns:
        tf.keras.Sequential model
    """
    layers = [
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
    ]

    if dropout_rate is not None:
        layers.append(tf.keras.layers.Dropout(dropout_rate))        # Insert dropout before the output layer
    
    layers.append(tf.keras.layers.Dense(1))  # Regression output
    
    return tf.keras.Sequential(layers)


def build_base_cnn_cls(input_shape=(128, 128, 3), dropout_rate=None, num_classes=y_test_cls_ohe.shape[1]):  # number of classes inferred from one-hot encoded labels
    """
    Builds a base CNN model for classification (predicting age category).
    
    Layers:
    - Conv2D + MaxPooling2D for feature extraction
    - Flatten
    - Dense 128 neurons + Dropout for regularization
    - Dense num_classes neurons with softmax activation
    
    Args:
        input_shape (tuple): Shape of input images (height, width, channels)
        dropout_rate (float): Dropout rate to prevent overfitting
        num_classes (int): Number of output classes
        
    Returns:
        tf.keras.Sequential model
    """
    layers = [
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu')
    ]

    if dropout_rate is not None:
        layers.append(tf.keras.layers.Dropout(dropout_rate))            # Insert dropout before the output layer

    layers.append(tf.keras.layers.Dense(num_classes, activation='softmax'))  # Classification output

    return tf.keras.Sequential(layers)

In [5]:
# (f) Define a function to free up memory for GPU after model training
# --------------------------------------------------------------------

# To avoid out-of-memory errors during hyperparameter tuning with multiple model trainings,
# we define a function to clear GPU memory after each model training.
def clear_gpu_memory():
    """Function to clear GPU memory after model training to avoid out-of-memory errors."""
    tf.keras.backend.clear_session()            # Clear Keras session
    gc.collect()                                # Run garbage collection (such as unused variables, tensors, etc., that are no longer needed and use GPU memory) 
# --------------------------------------------------------------------

In [None]:
# (g) Define 2 functions to create 2 sfuffled datasets for tuning (regression and classification)
# --------------------------------------------------------------------

# training dataset is shuffled to improve generalization by exposing the model to different data orders each epoch; 
# it prevents the model from learning patterns in the order of the data.

# validation dataset is not shuffled as we want the same inputs in the same order every time during valuation; unlike the training set, the model does not update weights from validation data, 
# so shuffling is unnecessary

# .prefetch tells TensorFlow to prepare the next n batches in the background while the model is training on the current batch;
# prefetch(1) is usually enough for small dataset;
# prefetching more batches consumes more memory: keeping prefetch(1) is safest to avoid out-of-memory error using GPU.

def create_datasets_tuning_reg (X_train_norm, y_train_reg, X_val_norm, y_val_reg, batch_size):
    train_dataset_reg = tf.data.Dataset.from_tensor_slices((X_train_norm, y_train_reg))
    train_dataset_reg = train_dataset_reg.shuffle(buffer_size=1000).batch(batch_size).prefetch(1)     # shuffle training dataset to improve generalization  
    
    val_dataset_reg = tf.data.Dataset.from_tensor_slices((X_val_norm, y_val_reg))
    val_dataset_reg = val_dataset_reg.batch(batch_size)                                             # validation dataset is not shuffled as randomness is not desired during evaluation
    
    return train_dataset_reg, val_dataset_reg

def create_datasets_tuning_cls (X_train_norm, y_train_cls_ohe, X_val_norm, y_val_cls_ohe, batch_size):
    train_dataset_cls = tf.data.Dataset.from_tensor_slices((X_train_norm, y_train_cls_ohe))
    train_dataset_cls = train_dataset_cls.shuffle(buffer_size=1000).batch(batch_size).prefetch(1)
    
    val_dataset_cls = tf.data.Dataset.from_tensor_slices((X_val_norm, y_val_cls_ohe))
    val_dataset_cls = val_dataset_cls.batch(batch_size)                                                        
    
    return train_dataset_cls, val_dataset_cls

# --------------------------------------------------------------------
# (h) Define 2 functions to create 2 sfuffle datasets for final training and evaluation (regression and classification)
# --------------------------------------------------------------------
def create_datasets_final_reg (X_train_norm, y_train_reg, X_val_norm, y_val_reg, X_test_norm, y_test_reg, batch_size):
    train_dataset_reg = tf.data.Dataset.from_tensor_slices((X_train_norm, y_train_reg))
    train_dataset_reg = train_dataset_reg.shuffle(buffer_size=1000).batch(batch_size).prefetch(1)

    val_dataset_reg = tf.data.Dataset.from_tensor_slices((X_val_norm, y_val_reg))
    val_dataset_reg = val_dataset_reg.batch(batch_size)
    
    test_dataset_reg = tf.data.Dataset.from_tensor_slices((X_test_norm, y_test_reg))
    test_dataset_reg = test_dataset_reg.batch(batch_size)
    
    return train_dataset_reg, val_dataset_reg, test_dataset_reg

def create_datasets_final_cls (X_train_norm, y_train_cls_ohe, X_val_norm, y_val_cls_ohe, X_test_norm, y_test_cls_ohe, batch_size):
    train_dataset_cls = tf.data.Dataset.from_tensor_slices((X_train_norm, y_train_cls_ohe))
    train_dataset_cls = train_dataset_cls.shuffle(buffer_size=1000).batch(batch_size).prefetch(1)

    val_dataset_cls = tf.data.Dataset.from_tensor_slices((X_val_norm, y_val_cls_ohe))
    val_dataset_cls = val_dataset_cls.batch(batch_size)
    
    test_dataset_cls = tf.data.Dataset.from_tensor_slices((X_test_norm, y_test_cls_ohe))
    test_dataset_cls = test_dataset_cls.batch(batch_size)
    
    return train_dataset_cls, val_dataset_cls, test_dataset_cls

#### 1.6.1 Tune Basic Hyperparameters: learning_rates, batch_sizes
Batch + LR change how the model learns

In [None]:
# Tune Basic Hyperparameters with TensorFlow: learning rate, batch size, number of epochs = 20
# Combined hyperparameter tuning for regression and classification models instead of separate tuning loops to cut the time of execution by half.

# Define hyperparameters to tune
learning_rates = [0.001, 0.0005]            # 0.001 means the model weights are updated by 0.1% of the calculated gradient at each step
batch_sizes = [32, 64]                      # Number of samples (independent units of data (images) being processed together) per gradient update (in each training iteration)
num_epochs = 20                             # Number of complete passes through the training dataset to update model weights

# These hyperparameter ranges are chosen based on common practices for CNN training and to balance computational efficiency with effective model training.

# Track best results

# --- REGRESSION MODEL ---
best_val_loss_reg = float('inf')            # Initialize best validation loss to infinity(for minimization, as lower is better)
best_hyperparams_reg = {}                   # Dictionary to store best hyperparameters for regression

# --- CLASSIFICATION MODEL ---
best_val_loss_cls = float('inf')            # Initialize best validation loss to infinity(for minimization, as lower is better)
best_hyperparams_cls = {}                   # Dictionary to store best hyperparameters for classification

# Combined Tuning Loop for REGRESSION MODEL

for lr_reg in learning_rates:
        for batch_size in batch_sizes:
            print(f'Tuning with Learning Rate (Reg): {lr_reg}, Batch Size: {batch_size}')
                        
            # Clear previous models and free up memory to avoid potential out-of-memory errors during training
            clear_gpu_memory()

            # Create tf.data datasets (training + validation) using the function to avoid OOM (out-of-memory) errors
            train_dataset_reg, val_dataset_reg = create_datasets_tuning_reg(X_train_norm, y_train_reg, X_val_norm, y_val_reg, batch_size)


            # Build (using the function) and compile regression model
            model_reg = build_base_cnn_reg(dropout_rate=None)           # Build regression model without dropout as it will be tuned later

            model_reg.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_reg),
                                loss='mean_squared_error',
                                metrics=['mean_absolute_error'])                     
            
            # Train regression model
            history_reg = model_reg.fit(train_dataset_reg,                      # history object stores training history (loss and metrics values at each epoch)
                                        validation_data=val_dataset_reg,
                                        epochs=num_epochs,
                                        verbose=2)
            
            # Evaluate regression model on validation set
            val_loss_reg = min(history_reg.history['val_loss'])                  # Get the minimum validation loss from the list of validation losses for all epochs of that run
            print(f'Regression Validation Loss: {val_loss_reg}')
            
            # Update best regression hyperparameters if improved at end of every run (tuning iteration)
            if val_loss_reg < best_val_loss_reg:
                best_val_loss_reg = val_loss_reg
                best_hyperparams_reg = {'learning_rate': lr_reg, 'batch_size': batch_size}
            
            # Clear previous models and free up memory
            clear_gpu_memory()

# Combined Tuning Loop for CLASSIFICATION MODEL
            
for lr_cls in learning_rates:
        for batch_size in batch_sizes:
            print (f'Tuning with Learning Rate (Cls): {lr_cls}, Batch Size: {batch_size}')
                        
            # Clear previous models and free up memory            
            clear_gpu_memory()

            # Create tf.data datasets (training + validation) using the function to avoid OOM (out-of-memory) errors
            train_dataset_cls, val_dataset_cls = create_datasets_tuning_cls(X_train_norm, y_train_cls_ohe, X_val_norm, y_val_cls_ohe, batch_size)
                        
            # Build (using the function) and compile classification model
            model_cls = build_base_cnn_cls(dropout_rate=None)          # Build classification model without dropout as it will be tuned later
            
            model_cls.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_cls),
                                loss='categorical_crossentropy',
                                metrics=['accuracy'])
            
            # Train classification model
            history_cls = model_cls.fit(train_dataset_cls,
                                        validation_data=val_dataset_cls,
                                        epochs=num_epochs,
                                        verbose=2)
            
            # Evaluate classification model on validation set
            val_loss_cls = min(history_cls.history['val_loss'])
            print(f'Classification Validation Loss: {val_loss_cls}')
            
            # Update best classification hyperparameters if improved
            if val_loss_cls < best_val_loss_cls:
                best_val_loss_cls = val_loss_cls
                best_hyperparams_cls = {'learning_rate': lr_cls, 'batch_size': batch_size}
            
            # Clear models and free GPU memory after each full iteration
            clear_gpu_memory()

# Display best hyperparameters found
print("Best Hyperparameters for Regression:")
print(best_hyperparams_reg)
print(f"Best Validation Loss for Regression: {best_val_loss_reg}")

print("\nBest Hyperparameters for Classification:")
print(best_hyperparams_cls)
print(f"Best Validation Loss for Classification: {best_val_loss_cls}")

Tuning Basic Parameters Result:

Best Hyperparameters for Regression: {'learning_rate': 0.0005, 'batch_size': 32}

Best Validation Loss for Regression: 614.8016967773438

Best Hyperparameters for Classification: {'learning_rate': 0.0005, 'batch_size': 32}

Best Validation Loss for Classification: 1.349984884262085


#### 1.6.2 Tune Basic Hyperparameters: num_epochs and early_stopping_patiences
Epochs + patience control how long the model learns.

In [None]:
# Tune Basic Hyperparameters with TensorFlow: num_epochs and early_stopping_patiences

# Define hyperparameters to tune
early_stopping_patiences = [3, 5, 8]        # Number of epochs with no improvement after which training will be stopped
num_epochs = [50, 80, 120]                  # Number of complete passes through the training dataset to update model weights

# Previous best results from learning rate and batch size tuning for regression and classification
best_learning_rate_reg = 0.0005
best_batch_size_reg = 32
best_learning_rate_cls = 0.0005
best_batch_size_cls = 32

dropout_rates = None                        # No dropout for this tuning phase as it will be tuned later

# Track best results
# --- REGRESSION MODEL ---
best_val_loss_reg = float('inf')            # Initialize best validation loss to infinity(for minimization, as lower is better)
best_hyperparams_reg = {}                   # Dictionary to store best hyperparameters for regression   

# --- CLASSIFICATION MODEL ---
best_val_loss_cls = float('inf')            
best_hyperparams_cls = {}                   

# Combined Tuning Loop for REGRESSION MOODEL
for patience in early_stopping_patiences:
    for epochs in num_epochs:
        print(f'Tuning with Early Stopping Patience: {patience}, Number of Epochs: {epochs}')
        
        # Clear previous models and free up memory to avoid potential out-of-memory errors during training
        clear_gpu_memory()

        # Create (using the function) tf.data datasets (training + validation)
        train_dataset_reg, val_dataset_reg = create_datasets_tuning_reg(X_train_norm, y_train_reg, X_val_norm, y_val_reg, best_batch_size_reg)  
        
        # Build and compile regression model
        model_reg = build_base_cnn_reg(dropout_rate=None)           # Build regression model without dropout as it will be tuned later

        model_reg.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_learning_rate_reg),
                          loss='mean_squared_error',
                          metrics=['mean_absolute_error'])
        
        # Define Early Stopping callback
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
        
        # Train regression model
        history_reg = model_reg.fit(train_dataset_reg,
                                    validation_data=val_dataset_reg,
                                    epochs=epochs,
                                    callbacks=[early_stopping],
                                    verbose=2)
        
        # Evaluate regression model on validation set
        val_loss_reg = min(history_reg.history['val_loss'])                  # Get the minimum validation loss from the list of validation losses for all epochs of that run
        print(f'Regression Validation Loss: {val_loss_reg}')
        
        # Update best regression hyperparameters if improved at end of every run (tuning iteration)
        if val_loss_reg < best_val_loss_reg:
            best_val_loss_reg = val_loss_reg
            best_hyperparams_reg = {'early_stopping_patience': patience, 'num_epochs': epochs}
        
        # Clear previous models and free up memory to avoid potential out-of-memory errors during training
        clear_gpu_memory()

# Combined Tuning Loop for CLASSIFICATION MODEL
for patience in early_stopping_patiences:
    for epochs in num_epochs:
        print(f'Tuning with Early Stopping Patience: {patience}, Number of Epochs: {epochs}')

        # Each time before building a new model, clear previous models and free up memory to avoid potential out-of-memory errors during training
        clear_gpu_memory()

        # Create (using the function) tf.data datasets (training + validation)
        train_dataset_cls, val_dataset_cls = create_datasets_tuning_cls(X_train_norm, y_train_cls_ohe, X_val_norm, y_val_cls_ohe, best_batch_size_cls)  

        # Build and compile classification model
        model_cls = build_base_cnn_cls(dropout_rate=None)          # Build classification model without dropout as it will be tuned later

        model_cls.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_learning_rate_cls),
                            loss='categorical_crossentropy',
                            metrics=['accuracy'])
        
        # Define Early Stopping callback
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
        
        # Train classification model
        history_cls = model_cls.fit(train_dataset_cls,
                                    validation_data=val_dataset_cls,
                                    epochs=epochs,
                                    callbacks=[early_stopping],
                                    verbose=2)
        
        # Evaluate classification model on validation set 
        val_loss_cls = min(history_cls.history['val_loss'])                  # Get the minimum validation loss from the list of validation losses for all epochs of that run
        print(f'Classification Validation Loss: {val_loss_cls}')
        # Update best classification hyperparameters if improved at end of every run (tuning iteration)
        if val_loss_cls < best_val_loss_cls:
            best_val_loss_cls = val_loss_cls
            best_hyperparams_cls = {'early_stopping_patience': patience, 'num_epochs': epochs}


        # Clear previous models and free up memory to avoid potential out-of-memory errors during training
        clear_gpu_memory()

# Display best hyperparameters found
print("Best Hyperparameters for Regression:")
print(best_hyperparams_reg)
print(f"Best Validation Loss for Regression: {best_val_loss_reg}")

print("\nBest Hyperparameters for Classification:")
print(best_hyperparams_cls)
print(f"Best Validation Loss for Classification: {best_val_loss_cls}")

# Expected number of training runs: 3 (patience) * 3 (epochs) = 9 runs per model = 18 total
# Expected time per epoch with CPU: ~20-40 seconds
# Expected time per epoch with GPU: ~4-10 seconds
# Total expected time with CPU : 18*20*20 seconds = ~2-4 hours (with early stopping, actual time will eventually be lower, even more than half)
# Tota l expected time with GPU : 18*20*4 seconds = ~ 24 minutes - 48 minutes (with early stopping, actual time will eventually be lower, even more than half)    

Best Hyperparameters for Regression: {'early_stopping_patience': 3, 'num_epochs': 50}

Best Validation Loss for Regression: 764.4470825195312

Best Hyperparameters for Classification: {'early_stopping_patience': 8, 'num_epochs': 120}

Best Validation Loss for Classification: 1.1832056045532227

#### 1.6.3 Tune Basic Hyperparameters: Optimizer + Dropout

In [None]:
# Tune Basic Hyperparameters with TensorFlow: dropout rate with Adam optimizer

# Define hyperparameters to tune
optimizers = ['adam']                       # Only 'adam' optimizer to reduce computational load
dropout_rates = [0.3, 0.5]                  # Different dropout rates to try

# the alternative optimizer 'sgd' is not used here to reduce computational load (also sgd requires more careful learning rate tuning)
# sgd would require 

# Previous best results from learning rate and batch size tuning for regression and classification
best_learning_rate_reg = 0.0005
best_batch_size_reg = 32
best_learning_rate_cls = 0.0005
best_batch_size_cls = 32
best_early_stopping_patience_reg = 3
best_early_stopping_patience_cls = 8
best_num_epochs_reg = 50
best_num_epochs_cls = 120

# Track best results
# --- REGRESSION MODEL ---
best_val_loss_reg = float('inf')            # Initialize best validation loss to infinity(for minimization, as lower is better)
best_hyperparams_reg = {}                   # Dictionary to store best hyperparameters for regression
# --- CLASSIFICATION MODEL ---
best_val_loss_cls = float('inf')            # Initialize best validation loss to infinity(for minimization, as lower is better)
best_hyperparams_cls = {}                   # Dictionary to store best hyperparameters for classification

# Combined Tuning Loop for REGRESSION MODEL
for opt in optimizers:
    for dropout_rate in dropout_rates:
        print(f'Tuning with Optimizer: {opt}, Dropout Rate: {dropout_rate}')
        
        # Clear previous models and free up memory to avoid potential out-of-memory errors during training
        clear_gpu_memory()
        
        # Create tf.data datasets (training + validation) using the function to avoid OOM (out-of-memory) errors
        train_dataset_reg, val_dataset_reg = create_datasets_tuning_reg(X_train_norm, y_train_reg, X_val_norm, y_val_reg, best_batch_size_reg)

        # Build (using the function) and compile regression model
        model_reg = build_base_cnn_reg(dropout_rate=dropout_rate)
        
        # Choose optimizer
        if opt == 'adam':
            optimizer_reg = tf.keras.optimizers.Adam(learning_rate=best_learning_rate_reg)
        elif opt == 'sgd':
            optimizer_reg = tf.keras.optimizers.SGD(learning_rate=best_learning_rate_reg)
        
        model_reg.compile(optimizer=optimizer_reg,
                          loss='mean_squared_error',
                          metrics=['mean_absolute_error'])
        
        # Define Early Stopping callback
        early_stopping = tf.keras.callbacks.EarlyStopping(          # it stops training when the validation loss stops improving after a certain number of epochs defined by patience
            monitor='val_loss',
            patience=best_early_stopping_patience_reg,
            restore_best_weights=True)
        
        # Train regression model
        history_reg = model_reg.fit(train_dataset_reg,
                                    validation_data=val_dataset_reg,
                                    epochs=best_num_epochs_reg,
                                    verbose=2)
        
        # Evaluate regression model on validation set
        val_loss_reg = min(history_reg.history['val_loss'])                  # Get the minimum validation loss from the list of validation losses for all epochs of that run
        print(f'Regression Validation Loss: {val_loss_reg}')
        
        # Update best regression hyperparameters if improved at end of every run (tuning iteration)
        if val_loss_reg < best_val_loss_reg:
            best_val_loss_reg = val_loss_reg
            best_hyperparams_reg = {
                'optimizer': opt,
                'dropout_rate': dropout_rate,
            }
        # Clear previous models and free up memory
        clear_gpu_memory()

# Combined Tuning Loop for CLASSIFICATION MODEL
for opt in optimizers:
    for dropout_rate in dropout_rates:
        print(f'Tuning with Optimizer: {opt}, Dropout Rate: {dropout_rate}')

        # Each time before building a new model, clear previous models and free up memory to avoid potential out-of-memory errors during training
        clear_gpu_memory()

        # Create tf.data datasets (training + validation) using the function to avoid OOM (out-of-memory) errors
        train_dataset_cls, val_dataset_cls = create_datasets_tuning_cls(X_train_norm, y_train_cls_ohe, X_val_norm, y_val_cls_ohe, best_batch_size_cls)

        # Build (using the function) and compile classification model   
        model_cls = build_base_cnn_cls(dropout_rate=dropout_rate)

        # Choose optimizer
        if opt == 'adam':
            optimizer_cls = tf.keras.optimizers.Adam(learning_rate=best_learning_rate_cls)
        elif opt == 'sgd':
            optimizer_cls = tf.keras.optimizers.SGD(learning_rate=best_learning_rate_cls)
        
        # Compile classification model
        model_cls.compile(optimizer=optimizer_cls,
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])
        
        # Define Early Stopping callback
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=best_early_stopping_patience_cls,
            restore_best_weights=True)
        
        # Train classification model
        history_cls = model_cls.fit(train_dataset_cls,
                                    validation_data=val_dataset_cls,
                                    epochs=best_num_epochs_cls,
                                    verbose=2)
        
        # Evaluate classification model on validation set
        val_loss_cls = min(history_cls.history['val_loss'])                  # Get the minimum validation loss from the list of validation losses for all epochs of that run
        print(f'Classification Validation Loss: {val_loss_cls}')
        
        # Update best classification hyperparameters if improved at end of every run (tuning iteration)
        if val_loss_cls < best_val_loss_cls:
            best_val_loss_cls = val_loss_cls
            best_hyperparams_cls = {
                'optimizer': opt,
                'dropout_rate': dropout_rate,
            }
        
        # Clear previous models and free up memory to avoid potential out-of-memory errors during training
        clear_gpu_memory()

# Display best hyperparameters found
print("Best Hyperparameters for Regression:")
print(best_hyperparams_reg)
print(f"Best Validation Loss for Regression: {best_val_loss_reg}")

print("Best Hyperparameters for Classification:")
print(best_hyperparams_cls)
print(f"Best Validation Loss for Classification: {best_val_loss_cls}")


Best Hyperparameters for Regression: {'optimizer': 'adam', 'dropout_rate': 0.3}

Best Validation Loss for Regression: 603.141845703125

Best Hyperparameters for Classification: {'optimizer': 'adam', 'dropout_rate': 0.5}

Best Validation Loss for Classification: 1.122312068939209

#### 1.7 Final CNN Baseline Models with best hyperparameters

##### 1.7.1. Improve the final CNN builders

In [None]:
# Improve Final CNN Builders (Regression + Classification)

# https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization
# Batach Normalization normalizes the activations of the previous layer at each batch,)
# i.e., applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1.
# This helps stabilize and speed up the training process by reducing internal covariate shift.

# https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dropout
# Dropout is a regularization technique that randomly sets a fraction of input units to 0 at each update during training time,
# which helps prevent overfitting by reducing reliance on specific neurons.

# --- REGRESSION MODEL ---
def final_build_base_cnn_reg(input_shape=(128, 128, 3), dropout_rate=None):
    """
    Improved CNN for final regression model.
    Adds Batch Normalization and slightly deeper feature extraction while 
    keeping the same interface and variable names.
    """

    layers = [

        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        tf.keras.layers.BatchNormalization(),               # BatchNorm after Conv layer to stabilize activations
        tf.keras.layers.MaxPooling2D((2, 2)),

        # --- Block 2 ---
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.BatchNormalization(),               # BatchNorm after Conv layer to stabilize activations
        tf.keras.layers.MaxPooling2D((2, 2)),
    
        tf.keras.layers.Flatten(),                          # Flatten 2D feature maps to 1D feature vector

        # Dense layer
        tf.keras.layers.Dense(128, activation='relu'),      # Fully connected layer with 128 neurons    
        tf.keras.layers.BatchNormalization(),               # BatchNorm after Dense layer to stabilize activations
        tf.keras.layers.Dropout(dropout_rate),              # Dropout for regularization (prevent overfitting)
        
        # Output layer
        tf.keras.layers.Dense(1)                            # Regression output
    ]
    
    return tf.keras.Sequential(layers)

# --- CLASSIFICATION MODEL ---
def final_build_base_cnn_cls(input_shape=(128, 128, 3), dropout_rate=None, num_classes=y_test_cls_ohe.shape[1]):
    """
    Improved CNN for final classification model.
    Adds Batch Normalization and slightly deeper feature extraction while 
    keeping the same interface and variable names.
    """

    layers = [

        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same',
                               input_shape=input_shape),
        tf.keras.layers.BatchNormalization(),               # BatchNorm after Conv layer to stabilize activations
        tf.keras.layers.MaxPooling2D((2, 2)),

        # --- Block 2 ---
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.BatchNormalization(),               # BatchNorm after Conv layer to stabilize activations
        tf.keras.layers.MaxPooling2D((2, 2)),
    
        tf.keras.layers.Flatten(),                          # Flatten 2D feature maps to 1D feature vector

        # Dense layer
        tf.keras.layers.Dense(128, activation='relu'),      # Fully connected layer with 128 neurons    
        tf.keras.layers.BatchNormalization(),               # BatchNorm after Dense layer to stabilize activations
        tf.keras.layers.Dropout(dropout_rate),              # Dropout for regularization (prevent overfitting)
        
        # Output layer
        tf.keras.layers.Dense(num_classes, activation='softmax')    # Classification output
    ]
    
    return tf.keras.Sequential(layers)



##### 1.7.2 Final Model Training and Evaluation with Best Hyperparameters

In [None]:
# Final CNN Baseline Models for Regression and Classification with the best hyperparameters found

# Previous best results from learning rate and batch size tuning for regression and classification
best_learning_rate_reg = 0.0005
best_batch_size_reg = 32
best_learning_rate_cls = 0.0005
best_batch_size_cls = 32
best_early_stopping_patience_reg = 8
best_early_stopping_patience_cls = 8
best_num_epochs_reg = 80
best_num_epochs_cls = 80
optimizer_reg = 'adam'       
optimizer_cls = 'adam'
best_dropout_rate_reg = 0.3
best_dropout_rate_cls = 0.5

# --- REGRESSION MODEL ---

# Clear previous models to free GPU memory
clear_gpu_memory()

# Create tf.data datasets (training + validation + test) to reduce memory usage with the function
train_dataset_reg, val_dataset_reg, test_dataset_reg = create_datasets_final_reg(X_train_norm, y_train_reg, X_val_norm, y_val_reg, X_test_norm, y_test_reg, best_batch_size_reg)

# Build (with the function) and compile regression model
model_reg = final_build_base_cnn_reg(dropout_rate=best_dropout_rate_reg)

# Choose optimizer
if optimizer_reg == 'adam':                                                     # optimier is used to update model weights based on loss gradient during training
    optimizer_reg_final  = tf.keras.optimizers.Adam(learning_rate=best_learning_rate_reg)  # Adam optimizer adapts learning rate for each parameter during training
else:
    optimizer_reg_final = tf.keras.optimizers.SGD(learning_rate=best_learning_rate_reg)   # Standard SGD uses a fixed learning rate to learn from data

# Compile regression model
model_reg.compile(optimizer=optimizer_reg_final,                          # configuring the model for training, specifying the optimizer, loss function, and evaluation metrics
                      loss='mean_squared_error',
                      metrics=['mean_absolute_error'])

# Define Early Stopping callback
early_stopping_reg = tf.keras.callbacks.EarlyStopping(monitor='val_loss',                       # stop training if validation loss does not improve for specified number of epochs
                                                      patience=best_early_stopping_patience_reg, 
                                                      restore_best_weights=True) 

# Train regression model
history_reg = model_reg.fit(train_dataset_reg,                      # .fit method trains the model for a fixed number of epochs (iterations on a dataset)
                            epochs=best_num_epochs_reg, 
                            validation_data=val_dataset_reg, 
                            callbacks=[early_stopping_reg])         # early stopping of the epochs if validation loss does not improve, to prevent overfitting

# Evaluate regression model on test set
test_loss_reg, test_mae_reg = model_reg.evaluate(test_dataset_reg)          # .evaluate method computes the loss and metrics on the test dataset

# Clear previous models and free up memory
clear_gpu_memory()


# --- CLASSIFICATION MODEL ---

# Clear previous models to free GPU memory
clear_gpu_memory()

# Create tf.data datasets (for training + validation, test) to reduce memory usage with the function
train_dataset_cls, val_dataset_cls, test_dataset_cls = create_datasets_final_cls(X_train_norm, y_train_cls_ohe, X_val_norm, y_val_cls_ohe, X_test_norm, y_test_cls_ohe, best_batch_size_cls)

# Build (using the function) and compile classification model
model_cls = final_build_base_cnn_cls(dropout_rate=best_dropout_rate_cls)

# Choose optimizer
if optimizer_cls == 'adam':
    optimizer_cls_final = tf.keras.optimizers.Adam(learning_rate=best_learning_rate_cls)
else:
    optimizer_cls_final = tf.keras.optimizers.SGD(learning_rate=best_learning_rate_cls)

# Compile classification model
model_cls.compile(optimizer=optimizer_cls_final,  
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

# Define Early Stopping callback
early_stopping_cls = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=best_early_stopping_patience_cls, restore_best_weights=True)

# Train classification model
history_cls = model_cls.fit(train_dataset_cls, epochs=best_num_epochs_cls, validation_data=val_dataset_cls, callbacks=[early_stopping_cls])

# Evaluate classification model on test set
test_loss_cls, test_accuracy_cls = model_cls.evaluate(test_dataset_cls)

# Clear previous models and free up memory
clear_gpu_memory()

# Display final test results
print("Final Test Results for Regression Model:")
print(f"Test Loss (MSE): {test_loss_reg:.4f}")
print(f"Test MAE: {test_mae_reg:.4f}")

print("Final Test Results for Classification Model:")
print(f"Test Loss: {test_loss_cls:.4f}")
print(f"Test Accuracy: {test_accuracy_cls:.4f}")

print ("Final Regression Model Summary:")
model_reg.summary()
print ("Final Classification Model Summary:")
model_cls.summary()

# Save the final models
model_reg.save('final_base_cnn_regression_model.keras')
model_cls.save('final_base_cnn_classification_model.keras')


Final Test Results for Regression Model:
- Test Loss (MSE): 394.7842
- Test MAE: 15.8970

Final Test Results for Classification Model:
- Test Loss: 1.4021
- Test Accuracy: 0.5595

