In [1]:
import tensorflow as tf
import numpy as np
import seaborn as sns
import mlflow
import dagshub
import json
import os
import keras
import matplotlib.pyplot as plt
from dagshub import dagshub_logger
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, Callback
from sklearn.metrics import confusion_matrix, classification_report
from kerastuner.tuners import RandomSearch

2025-05-17 11:25:43.783655: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-17 11:25:43.812734: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747473943.837566   29517 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747473943.844566   29517 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747473943.871131   29517 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [None]:
# Defining mlflow experiment parameter
TRIAL_NAME = "best_trial_"
MLFLOW_MAIN_RUN = "Main Tunining Run: Best Model"
TUNER_DIRECTORY = "logs/best_tuner"
TUNER_PROJECT_NAME = "FingerSpellIT -  Best Model"
MODEL_NAME = "model_fingerspelling_v4"
EVALUATION_MLFLOW_RUN = "Evaluating Best Model"
REPORT_NAME = "best_classification_report"
CM_NAME="best_confusion_matrix_best_model"

In [3]:
# MLflow - Dagshub initialization
mlflow.set_tracking_uri("https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow")

dagshub.init(repo_owner='alfoCaiazza', repo_name='FingerSpellIT', mlflow=True)
dagshub_log = dagshub_logger(metrics_path="metrics", hparams_path="params")

In [4]:
# To avoid OOM errors, setting GPU Memory Consuption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print(f"GPU: {gpu}")
    tf.config.experimental.set_memory_growth(gpu, True) # Keeping the use of memory limited to prevent errors

GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [5]:
train_dir = '../data/processed/train'
val_dir = '../data/processed/val'
test_dir = '../data/processed/test'

# Automatically creates a dataset form the referred directory. Load the full dataset, shuffle = True ensures randomness
train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=(224, 224),
    batch_size=32,
    shuffle=True,
    seed=123
)

class_names = train_ds.class_names

val_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    image_size=(224, 224),
    batch_size=32,
    shuffle=True,
    seed=123
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    image_size=(224, 224),
    batch_size=32,
    shuffle=True,
    seed=123
)

Found 4489 files belonging to 22 classes.


I0000 00:00:1747473952.162547   29517 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1753 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Ti Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


Found 962 files belonging to 22 classes.
Found 977 files belonging to 22 classes.


In [6]:
# Pre-processing sequential model
preprocessing_model = tf.keras.Sequential([
    # Normalizing the images for ResNet50 model (0,1)
    tf.keras.layers.Rescaling(1./255), 

    # Data augmentation:
    tf.keras.layers.RandomFlip("horizontal"),  # Simulating left hand
    tf.keras.layers.RandomRotation(0.05),       # Rotations, max ±5%
    tf.keras.layers.RandomZoom(0.05),

    # Gaussian noise
    tf.keras.layers.Lambda(lambda x: tf.clip_by_value(
        x + tf.random.normal(tf.shape(x), mean=0.0, stddev=0.03), 0.0, 1.0
    ))
])

train_ds = train_ds.map(lambda x, y: (preprocessing_model(x), y))

normalization = tf.keras.layers.Rescaling(1./255)

val_ds = val_ds.map(lambda x, y: (normalization(x), y))
test_ds = test_ds.map(lambda x, y: (normalization(x), y))

In [7]:
# Handling data batch size automatically 
AUTOTUNE = tf.data.AUTOTUNE

train_ds= train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds= val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

In [None]:
# Importing net for transfer learning
def build_model(hp):
    base_model = ResNet50(
        weights='imagenet',
        include_top=False, # Excludes the first layer
        input_shape=(224,224, 3) # Specifing input shape
    )

    # Unfreezing net layers
    base_model.trainable = True
    for layer in base_model.layers[:-30]:
        layer.trainable = False

    # Adding more layer to the net from its output and adapting it to a multi-class classification task 
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(128, activation='sigmoid')(x)
    x = Dropout(0.4)(x)

    predictions = Dense(22, activation='softmax')(x) 

    # Creiamo il modello finale
    model = Model(inputs=base_model.input, outputs=predictions)

    # Listing the optimizers
    optimizer = keras.optimizers.RMSprop(
        learning_rate=0.0034900399558852437,
        rho=0.872327542833833
    )
   

    # Compiliamo il modello
    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['sparse_categorical_accuracy']
    )

    return model

In [9]:
callbacks_base = [
    EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-5)
]

# Customized Callback for tracking single trial parmas with MLflow
class MLflowCallback(tf.keras.callbacks.Callback):
    def __init__(self, trial_hyperparameters, trial_id):
        super().__init__()
        self.trial_id = trial_id
        self.trial_hyperparameters = trial_hyperparameters

    # Create a nested mlrun to register the trial
    def on_train_begin(self, logs=None):
        self.run = mlflow.start_run(run_name=f"{TRIAL_NAME}_{self.trial_id}", nested=True)
        
        # Tracks all trial params
        mlflow.log_param('trial_id', self.trial_id)
        for param_name, param_value in self.trial_hyperparameters.values.items():
            mlflow.log_param(param_name, param_value)

    # Tracks all trial metrics
    def on_epoch_end(self, epoch, logs=None):
        if logs is not None:
            for metric_name, value in logs.items():
                mlflow.log_metric(metric_name, value, step=epoch)

    # Close the mlrun 
    def on_train_end(self, logs=None):
        if self.run:
            mlflow.end_run()

In [10]:
# Subclassed RandomSerach tuner which uses customized MLflow callback
class MLflowTuner(RandomSearch):
    def run_trial(self, trial, *args, **kwargs):
        callbacks = callbacks_base + [MLflowCallback(trial.hyperparameters, trial.trial_id)]
        kwargs['callbacks'] = callbacks
        return super().run_trial(trial, *args, **kwargs)

In [11]:
with mlflow.start_run(run_name=f"{MLFLOW_MAIN_RUN}"):
    epochs = 50
    mlflow.log_param('epochs', epochs)

    tuner = MLflowTuner(
        build_model,
        objective='sparse_categorical_accuracy',
        max_trials=5,
        executions_per_trial=1,
        directory=f'{TUNER_DIRECTORY}',
        project_name=f'{TUNER_PROJECT_NAME}'
    )
    
    tuner.search(
        train_ds,
        validation_data=val_ds,
        epochs=epochs
    )

    best_model = tuner.get_best_models(num_models=1)[0]
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    best_trial = tuner.oracle.get_best_trials(num_trials=1)[0]

    for param, value in best_hps.values.items():
        mlflow.log_param(param, value)

    # Log metriche finali del trial
    for metric_name, metric_data in best_trial.metrics.metrics.items():
        if metric_data and isinstance(metric_data, dict):
            values = metric_data.get('value', [])
            if values:
                mlflow.log_metric(metric_name, values[-1])

    # Salving the model
    model_path = f"../model/{MODEL_NAME}.h5"
    best_model.save(model_path)
    mlflow.log_artifact(model_path)

mlflow.end_run()

Trial 5 Complete [00h 40m 41s]
sparse_categorical_accuracy: 0.12831366062164307

Best sparse_categorical_accuracy So Far: 0.9057696461677551
Total elapsed time: 03h 03m 50s


  saveable.load_own_variables(weights_store.get(inner_path))


🏃 View run Main Tunining Run: Augmented Data at: https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow/#/experiments/0/runs/75a24b4187344641aaee3b5308854520
🧪 View experiment at: https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow/#/experiments/0


In [12]:
test_loss, test_acc = best_model.evaluate(test_ds)
print(f"Evaluation accuracy: {test_acc:.3f}")

y_true = []
y_pred = []

for images, labels in test_ds:
    y_true.extend(labels.numpy())
    y_pred.extend(np.argmax(best_model.predict(images), axis=1))

y_true = np.array(y_true)
y_pred = np.array(y_pred)

[1m30/31[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 88ms/step - loss: 0.3128 - sparse_categorical_accuracy: 0.9045










[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 356ms/step - loss: 0.3149 - sparse_categorical_accuracy: 0.9037
Evaluation accuracy: 0.892


2025-05-17 14:30:53.350682: W tensorflow/core/kernels/data/prefetch_autotuner.cc:52] Prefetch autotuner tried to allocate 33554688 bytes after encountering the first element of size 33554688 bytes.This already causes the autotune ram budget to be exceeded. To stay within the ram budget, either increase the ram budget or reduce element size


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

2025-05-17 14:31:04.159221: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [13]:
with mlflow.start_run(run_name=f"{EVALUATION_MLFLOW_RUN}"):
    report_path =f'../model/artifacts/{REPORT_NAME}.json'
    report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
    print("Classification Report", json.dumps(report, indent=4))

    with open(report_path, "w") as f:
        json.dump(report, f, indent=4)

    mlflow.log_artifact(report_path, "evaluation_metrics")

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()

    cm_local_path = os.path.join("../model/artifacts", f"{CM_NAME}.png")
    plt.savefig(cm_local_path, dpi=300, bbox_inches='tight')
    
    mlflow.log_figure(plt.gcf(), f"evaluation_plots/{CM_NAME}.png")

    plt.close()


Classification Report {
    "a": {
        "precision": 0.9459459459459459,
        "recall": 0.9459459459459459,
        "f1-score": 0.9459459459459459,
        "support": 37.0
    },
    "b": {
        "precision": 0.8918918918918919,
        "recall": 0.8461538461538461,
        "f1-score": 0.868421052631579,
        "support": 39.0
    },
    "c": {
        "precision": 0.868421052631579,
        "recall": 0.8461538461538461,
        "f1-score": 0.8571428571428571,
        "support": 39.0
    },
    "d": {
        "precision": 0.8333333333333334,
        "recall": 0.975609756097561,
        "f1-score": 0.898876404494382,
        "support": 41.0
    },
    "e": {
        "precision": 0.9130434782608695,
        "recall": 0.9333333333333333,
        "f1-score": 0.9230769230769231,
        "support": 45.0
    },
    "f": {
        "precision": 0.84375,
        "recall": 0.627906976744186,
        "f1-score": 0.72,
        "support": 43.0
    },
    "h": {
        "precision": 0.975609