In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import mlflow
import dagshub
import json
import os
import keras
import matplotlib.pyplot as plt
from dagshub import dagshub_logger
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, Callback
from sklearn.metrics import confusion_matrix, classification_report
from kerastuner.tuners import RandomSearch

2025-05-22 07:34:43.943569: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-22 07:34:43.958227: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747892083.975195   45011 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747892083.980410   45011 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747892083.993869   45011 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
df = pd.read_csv('../data/landmarked/landmarked_dataset.csv')
df.head()

Unnamed: 0,letter,landmark_0_x,landmark_0_y,landmark_0_z,landmark_1_x,landmark_1_y,landmark_1_z,landmark_2_x,landmark_2_y,landmark_2_z,...,landmark_17_z,landmark_18_x,landmark_18_y,landmark_18_z,landmark_19_x,landmark_19_y,landmark_19_z,landmark_20_x,landmark_20_y,landmark_20_z
0,t,0.421081,0.626581,4.136987e-07,0.463961,0.494464,-0.002851,0.543352,0.413993,-0.054677,...,-0.246295,0.589068,0.633021,-0.228213,0.563941,0.639064,-0.179231,0.517077,0.635068,-0.158235
1,t,0.370567,0.94484,1.408928e-07,0.448386,0.758689,0.038205,0.504371,0.596686,0.006353,...,-0.214565,0.559574,0.741031,-0.214092,0.532368,0.796494,-0.179976,0.484589,0.791432,-0.163417
2,t,0.364367,0.941906,2.749499e-07,0.427728,0.784828,0.005007,0.490351,0.619426,-0.023863,...,-0.155276,0.558961,0.782013,-0.169657,0.532717,0.830628,-0.144721,0.485125,0.819545,-0.127289
3,t,0.387544,0.654715,-1.154244e-08,0.42156,0.501985,0.03924,0.483446,0.362526,0.018951,...,-0.174659,0.577703,0.564937,-0.153963,0.544353,0.595212,-0.12223,0.506335,0.580663,-0.112222
4,t,0.436042,0.768666,4.424058e-07,0.434004,0.664704,-0.015632,0.452978,0.54819,-0.053242,...,-0.152488,0.581925,0.712983,-0.162865,0.564003,0.724012,-0.136979,0.529893,0.740341,-0.118821


In [3]:
label_encoder = LabelEncoder()
scaler = StandardScaler()

classes = sorted(df['letter'].unique())

label_encoder.fit(classes)

y = df['letter']
y = label_encoder.transform(y)

X = df.drop('letter', axis=1)

# 70% train, 15% val, 15% test
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=(15/85), random_state=42, stratify=y_temp)

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [24]:
import joblib

joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

['label_encoder.pkl']

In [4]:
# Defining mlflow experiment parameter
TRIAL_NAME = "initial_trial_"
MLFLOW_MAIN_RUN = "Main Tunining Run: Landmark Model"
TUNER_DIRECTORY = "logs_lm_model/initial_tuner"
TUNER_PROJECT_NAME = "FingerSpellIT -  Landmark Best Model"
MODEL_NAME = "model_landmarked_v1"
EVALUATION_MLFLOW_RUN = "Evaluating Landmark Best Model"
REPORT_NAME = "initial_classification_report"
CM_NAME="initial_confusion_matrix"

In [5]:
# MLflow - Dagshub initialization
mlflow.set_tracking_uri("https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow")

dagshub.init(repo_owner='alfoCaiazza', repo_name='FingerSpellIT', mlflow=True)
dagshub_log = dagshub_logger(metrics_path="metrics", hparams_path="params")

In [6]:
# To avoid OOM errors, setting GPU Memory Consuption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print(f"GPU: {gpu}")
    tf.config.experimental.set_memory_growth(gpu, True) # Keeping the use of memory limited to prevent errors

GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [7]:
def build_model(hp):
    # Input Layer
    input_layer = layers.Input(shape=(63,))
    x = input_layer
    
    # First Dense Layer
    activation = hp.Choice('initial_activation', ['relu', 'tanh', 'sigmoid'])
    x = layers.Dense(64, activation=activation)(x)
    x = layers.Dropout(hp.Float('initial_dropout', 0.1, 0.5, step=0.1))(x)
    
    # Tunable Hidden Layers
    for i in range(hp.Int('num_layers', 1, 4)):
        units = hp.Int(f'units_{i}', 128, 512, step=128)
        x = layers.Dense(units, activation=activation)(x)

        # Optional BatchNorm
        if hp.Boolean(f'use_batchnorm_{i}'):
            x = layers.BatchNormalization()(x)
        x = layers.Activation(activation)(x)
            
        x = layers.Dropout(hp.Float(f'dropout_{i}', 0.1, 0.5, step=0.1))(x)
    
    # Output Layer
    prediction = layers.Dense(24, activation='softmax')(x)
    
    # Compile Model
    optimizer_name = hp.Choice('optimizer', ['adam', 'rmsprop', 'sgd'])
    
    if optimizer_name == 'adam':
        optimizer = keras.optimizers.Adam(
            learning_rate=hp.Float('adam_lr', 1e-5, 1e-2, sampling='log')
        )
    elif optimizer_name == 'rmsprop':
        optimizer = keras.optimizers.RMSprop(
            learning_rate=hp.Float('rmsprop_lr', 1e-5, 1e-2, sampling='log'),
            rho=hp.Float('rmsprop_rho', 0.8, 0.99)
        )
    elif optimizer_name == 'sgd':
        optimizer = keras.optimizers.SGD(
            learning_rate=hp.Float('sgd_lr', 1e-4, 1e-1, sampling='log'),  # Range più ampio per SGD
            momentum=hp.Float('sgd_momentum', 0.0, 0.99)
        )
    
    model = Model(inputs=input_layer, outputs=prediction)
    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy', 'sparse_categorical_accuracy']
    )
    
    return model

In [8]:
base_callbacks = [
    EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', patience=2, factor=0.5, min_lr=1e-5)
]

# MLflow tracking callback
class MLflowCallback(Callback):
    def __init__(self, trial_hyperparameters, trial_id):
        super().__init__()
        self.trial_id = trial_id
        self.trial_hyperparameters = trial_hyperparameters

    def on_train_begin(self, logs=None):
        self.run =  mlflow.start_run(run_name=f"{TRIAL_NAME}_{self.trial_id}", nested=True)

        mlflow.log_param('trail_id', self.trial_id)
        for param_name, param_value in self.trial_hyperparameters.values.items():
            mlflow.log_param(param_name, param_value)


    def on_epoch_end(self, epoch, logs=None):
        if logs is not None:
            for metric_name, value in logs.items():
                mlflow.log_metric(metric_name, value, step=epoch)

    def on_train_end(self, logs=None):
        if self.run:
            mlflow.end_run()

In [9]:
# Subclassed RandomSerach tuner which uses customized MLflow callback
class MLflowTuner(RandomSearch):
    def run_trial(self, trial, *args, **kwargs):
        callbacks = base_callbacks + [MLflowCallback(trial.hyperparameters, trial.trial_id)]
        kwargs['callbacks'] = callbacks
        return super().run_trial(trial, *args, **kwargs)

In [10]:
# Initializing the Tuner
with mlflow.start_run(run_name=f"{MLFLOW_MAIN_RUN}"):
    epochs = 50
    mlflow.log_param('epochs', epochs)

    tuner = MLflowTuner(
        build_model,
        objective='sparse_categorical_accuracy',
        max_trials=10, 
        executions_per_trial=1,
        directory=f"{TUNER_DIRECTORY}",
        project_name=f"{TUNER_PROJECT_NAME}"
    )

    tuner.search(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs
    )

    best_model = tuner.get_best_models(num_models=1)[0]
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    best_trial = tuner.oracle.get_best_trials(num_trials=1)[0]

    for param, value in best_hps.values.items():
        mlflow.log_param(param, value)

    for metric, value in best_trial.metrics.metrics.items():
        if metric and isinstance(metric, dict):
            values = value.get('value', [])
            if values:
                mlflow.log_metric(metric, values[-1])

    model_path = f"../model/{MODEL_NAME}.h5"
    best_model.save(model_path)
    mlflow.log_artifact(model_path)

mlflow.end_run()

Trial 10 Complete [00h 05m 20s]
sparse_categorical_accuracy: 0.05420944467186928

Best sparse_categorical_accuracy So Far: 0.9965092539787292
Total elapsed time: 00h 52m 36s


  saveable.load_own_variables(weights_store.get(inner_path))


🏃 View run Main Tunining Run: Landmark Model at: https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow/#/experiments/0/runs/c7afc298db4d418295cc89816677a101
🧪 View experiment at: https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow/#/experiments/0


In [20]:
results = best_model.evaluate(X_test, y_test)
print("Evaluation results:", results)

y_pred_probs = best_model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

y_true = np.array(y_test)

[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.9989 - loss: 0.0025 - sparse_categorical_accuracy: 0.9989
Evaluation results: [0.0023601590655744076, 0.9990421533584595, 0.9990421533584595]
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [22]:
with mlflow.start_run(run_name=f"{EVALUATION_MLFLOW_RUN}"):
    class_names = label_encoder.classes_.tolist()
    report_path =f'../model/artifacts/{REPORT_NAME}.json'
    report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
    print("Classification Report", json.dumps(report, indent=4))

    with open(report_path, "w") as f:
        json.dump(report, f, indent=4)

    mlflow.log_artifact(report_path, "evaluation_metrics")

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()

    cm_local_path = os.path.join("../model/artifacts", f"{CM_NAME}.png")
    plt.savefig(cm_local_path, dpi=300, bbox_inches='tight')
    
    mlflow.log_figure(plt.gcf(), f"evaluation_plots/{CM_NAME}.png")

    plt.close()


Classification Report {
    "a": {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 37.0
    },
    "b": {
        "precision": 1.0,
        "recall": 0.9705882352941176,
        "f1-score": 0.9850746268656716,
        "support": 34.0
    },
    "c": {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 38.0
    },
    "d": {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 35.0
    },
    "e": {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 44.0
    },
    "f": {
        "precision": 0.975609756097561,
        "recall": 1.0,
        "f1-score": 0.9876543209876543,
        "support": 40.0
    },
    "g": {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 60.0
    },
    "h": {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 4