In [1]:
import tensorflow as tf
import numpy as np
import seaborn as sns
import mlflow
import dagshub
import json
import os
import keras
import matplotlib.pyplot as plt
from dagshub import dagshub_logger
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import confusion_matrix, classification_report
from kerastuner.tuners import RandomSearch

2025-05-27 10:32:04.262733: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-27 10:32:04.283945: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748334724.304167   30715 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748334724.310042   30715 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748334724.330746   30715 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
# Defining mlflow experiment parameter
TRIAL_NAME = "trial_"
MLFLOW_MAIN_RUN = "Main Tunining Run: In-Vitro Image Model "
TUNER_DIRECTORY = "logs/tuner"
TUNER_PROJECT_NAME = "FingerSpellIT - In-Vitro Image Model"
MODEL_NAME = "invitro_image_model_v1"
EVALUATION_MLFLOW_RUN = "Evaluating In-Vitro Image Model"
REPORT_NAME = "invitro_image_model_classification_report"
CM_NAME="invitro_image_model_confusion_matrix_best_model"

In [3]:
# Dagshub Initialization
mlflow.set_tracking_uri("https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow")

dagshub.init(repo_owner="alfoCaiazza", repo_name="FingerSpellIT", mlflow=True)
dagshub_log = dagshub_logger(metrics_path="metrics", hparams_path="params")

In [4]:
# To avoid OOM errors, setting GPU Memory Consuption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print(f"GPU: {gpu}")
    tf.config.experimental.set_memory_growth(gpu, True) # Keeping the use of memory limited to prevent errors

GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [5]:
train_dir = '../data/raw_imgs/splits/train'
test_dir = '../data/raw_imgs/splits/test'
val_dir = '../data/raw_imgs/splits/val'

# Creating dataset with keras method
train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=(224,224),
    batch_size=32,
    shuffle=True,
    seed=123
)

class_names = train_ds.class_names

test_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    image_size=(224,224),
    batch_size=32,
    shuffle=True,
    seed=123
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    image_size=(224,224),
    batch_size=32,
    shuffle=True,
    seed=123
)



Found 5654 files belonging to 24 classes.


I0000 00:00:1748334729.846853   30715 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1753 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Ti Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


Found 719 files belonging to 24 classes.
Found 705 files belonging to 24 classes.


In [6]:
# Pre-processing sequential model
# Normalizing the images for ResNet50 model (0,1)
normalization = tf.keras.layers.Rescaling(1./255)

train_ds  = train_ds.map(lambda x, y: (normalization(x), y))
val_ds = val_ds.map(lambda x, y: (normalization(x), y))
test_ds = test_ds.map(lambda x, y: (normalization(x), y))

In [7]:
# Handling data batch size automatically 
AUTOTUNE = tf.data.AUTOTUNE

train_ds= train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds= val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

In [8]:
#Building parametrized ResNet50 Transfer Learning Model
def build_model(hp):
    base_model = ResNet50(
        weights='imagenet',
        include_top=False,
        input_shape=(224,224,3)
    )

    # Unfreezing last layers
    base_model.trainable = True
    for layer in base_model.layers[:-30]:
        layer.trainable = False

    activation = hp.Choice('activation', ['relu', 'sigmoid', 'tanh'])

    # Building output layers
    x = GlobalAveragePooling2D()(base_model.output)
    for i in range(hp.Int('num_dense_layers', 1, 3)):
        x = Dense(hp.Int('num_units', 128, 512, step=128))(x)
        x = Dropout(hp.Float('dropout_rate', 0.2, 0.5))(x)

    predictions = Dense(24, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    # Parametrizing optimizers
    optimizer_name = hp.Choice('optimizer', ['Adam', 'RMSprop', 'SGD'])

    if optimizer_name == 'Adam':
        optimizer = Adam(
            learning_rate = hp.Float('adam_lr', min_value=1e-5, max_value=1e-2, sampling='log')
    )
    elif optimizer_name == 'RMSprop':
        optimizer = RMSprop(
               learning_rate = hp.Float('rmsprop_lr', min_value=1e-5, max_value=1e-2, sampling='log'),
               rho = hp.Float('rmsprop_rho', min_value=0.8, max_value=0.99)
    )
    elif optimizer_name == 'SGD':
            optimizer = SGD(
                learning_rate=hp.Float('sgd_lr', min_value=1e-4, max_value=1e-2, sampling='log'),
                momentum=hp.Float('sgd_momentum', min_value=0.0, max_value=0.99)
    )
            
    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics = ['sparse_categorical_accuracy']
    )

    return model

In [9]:
base_callbacks = [
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, min_lr=1e-5)
]

# Tracking MLflow Callback
class MLflowCallback(Callback):
    def __init__(self, trial_hyperparameters, trial_id):
        super().__init__()
        self.trial_id = trial_id
        self.trial_hyperparameters =trial_hyperparameters

    def on_train_begin(self, logs=None):
        self.run = mlflow.start_run(run_name=f"{TRIAL_NAME}_{self.trial_id}", nested=True)

        # Tracks trial parameters
        mlflow.log_param('trial_id', self.trial_id)
        for param_name, param_value in self.trial_hyperparameters.values.items():
            mlflow.log_param(param_name, param_value)

    # Tracks trial metrics
    def on_epoch_end(self, epoch, logs=None):
        if logs is not None:
            for metric_name, value in logs.items():
                mlflow.log_metric(metric_name, value, step=epoch)

    # Close trial mlrun 
    def on_train_end(self, logs=None):
        if self.run:
            mlflow.end_run()



In [10]:
# Sublassed RandomSearch Tuner that implements MLflow tracking
class MLflowTuner(RandomSearch):
    def run_trial(self, trial, *args, **kwargs):
        callbacks = base_callbacks + [MLflowCallback(trial.hyperparameters, trial.trial_id)]
        kwargs['callbacks'] = callbacks

        return super().run_trial(trial, *args, **kwargs)

In [None]:
with mlflow.start_run(run_name=f"{MLFLOW_MAIN_RUN}"):
    epochs = 50

    mlflow.set_tag("stage", "hyperparameter_tuning")
    mlflow.set_tag("model_architecture", "ResNet50")
    mlflow.log_param('epochs', epochs)

    tuner = MLflowTuner(
        build_model,
        objective='sparse_categorical_accuracy',
        max_trials=10,
        executions_per_trial=1,
        directory=f'{TUNER_DIRECTORY}',
        project_name=f'{TUNER_PROJECT_NAME}'
    )
    
    tuner.search(
        train_ds,
        validation_data=val_ds,
        epochs=epochs
    )

    best_model = tuner.get_best_models(num_models=1)[0]
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    best_trial = tuner.oracle.get_best_trials(num_trials=1)[0]

    for param, value in best_hps.values.items():
        mlflow.log_param(param, value)

    # Log metriche finali del trial
    for metric_name, metric_data in best_trial.metrics.metrics.items():
        if metric_data and isinstance(metric_data, dict):
            values = metric_data.get('value', [])
            if values:
                mlflow.log_metric(metric_name, values[-1])

    # Log best score
    mlflow.log_metric("best_sparse_categorical_accuracy", best_trial.score)

    # Salving the model
    model_path = f"../models/{MODEL_NAME}.keras"
    best_model.save(model_path)
    mlflow.log_artifact(model_path)

    # Log hyperparameters space and model structure
    mlflow.log_dict(best_hps.get_config(), "search_space.json")
    with open("model_structure.json", "w") as f:
        f.write(best_model.to_json())
    mlflow.log_artifact("model_structure.json")


mlflow.end_run()

Trial 1 Complete [00h 04m 50s]
sparse_categorical_accuracy: 0.9326140880584717

Best sparse_categorical_accuracy So Far: 0.9326140880584717
Total elapsed time: 00h 04m 50s


  saveable.load_own_variables(weights_store.get(inner_path))


🏃 View run Main Tunining Run: In-Vitro Image Model  at: https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow/#/experiments/0/runs/fd1851f9b0c94f1d98dc13c81befed78
🧪 View experiment at: https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow/#/experiments/0


In [12]:
test_loss, test_acc = best_model.evaluate(test_ds)
print(f"Evaluation accuracy: {test_acc:.3f}")

y_true, y_pred = [], []

for images, labels in test_ds:
    y_true.extend(labels.numpy())
    y_pred.extend(np.argmax(best_model.predict(images), axis=1))

y_true = np.array(y_true)
y_pred = np.array(y_pred)

[1m22/23[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 103ms/step - loss: 0.7607 - sparse_categorical_accuracy: 0.7760











[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 890ms/step - loss: 0.7622 - sparse_categorical_accuracy: 0.7760
Evaluation accuracy: 0.776


2025-05-27 10:38:31.281148: W tensorflow/core/kernels/data/prefetch_autotuner.cc:52] Prefetch autotuner tried to allocate 33554688 bytes after encountering the first element of size 33554688 bytes.This already causes the autotune ram budget to be exceeded. To stay within the ram budget, either increase the ram budget or reduce element size


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m-2s[0m -2325927us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

2025-05-27 10:38:43.230836: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [13]:
with mlflow.start_run(run_name=f"{EVALUATION_MLFLOW_RUN}"):
    report_path =f'../artifacts/{REPORT_NAME}.json'
    report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
    print("Classification Report", json.dumps(report, indent=4))

    with open(report_path, "w") as f:
        json.dump(report, f, indent=4)

    with open(f"../artifacts/{REPORT_NAME}.txt", "w") as f:
        f.write(classification_report(y_true, y_pred, target_names=class_names))

    mlflow.log_artifact(report_path, "evaluation_metrics")

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()

    cm_local_path = os.path.join("../artifacts", f"{CM_NAME}.png")
    plt.savefig(cm_local_path, dpi=300, bbox_inches='tight')
    
    mlflow.log_figure(plt.gcf(), f"evaluation_plots/{CM_NAME}.png")

    plt.close()


Classification Report {
    "a": {
        "precision": 1.0,
        "recall": 0.88,
        "f1-score": 0.9361702127659575,
        "support": 25.0
    },
    "b": {
        "precision": 1.0,
        "recall": 0.46153846153846156,
        "f1-score": 0.631578947368421,
        "support": 26.0
    },
    "c": {
        "precision": 0.5652173913043478,
        "recall": 1.0,
        "f1-score": 0.7222222222222222,
        "support": 26.0
    },
    "d": {
        "precision": 0.6585365853658537,
        "recall": 0.9642857142857143,
        "f1-score": 0.782608695652174,
        "support": 28.0
    },
    "e": {
        "precision": 0.7692307692307693,
        "recall": 1.0,
        "f1-score": 0.8695652173913043,
        "support": 30.0
    },
    "f": {
        "precision": 0.65,
        "recall": 0.896551724137931,
        "f1-score": 0.7536231884057971,
        "support": 29.0
    },
    "g": {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "suppor