In [5]:
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import mlflow
import dagshub
import json
import os
import keras
import joblib
import matplotlib.pyplot as plt
from dagshub import dagshub_logger
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, Callback
from sklearn.metrics import confusion_matrix, classification_report
from kerastuner.tuners import RandomSearch

In [6]:
df = pd.read_csv('../data/landmarked/landmarked_dataset_plus.csv')
df.head()

Unnamed: 0,letter,filename,hand_side,source_id,landmark_0_x,landmark_0_y,landmark_0_z,landmark_1_x,landmark_1_y,landmark_1_z,...,landmark_17_z,landmark_18_x,landmark_18_y,landmark_18_z,landmark_19_x,landmark_19_y,landmark_19_z,landmark_20_x,landmark_20_y,landmark_20_z
0,t,src/data/processed/augmented_plus_II/t/aug_2_t...,unknown,unknown,0.419696,0.727407,-1.564469e-07,0.473776,0.610037,0.008192,...,-0.111799,0.569647,0.609724,-0.122497,0.54786,0.632496,-0.10924,0.512128,0.626944,-0.101184
1,t,src/data/processed/augmented_plus_II/t/flipped...,unknown,unknown,0.523883,0.762541,-1.024669e-06,0.50622,0.621727,-0.014814,...,-0.083038,0.414163,0.607353,-0.089796,0.422482,0.629771,-0.078052,0.458447,0.644775,-0.07247
2,t,src/data/processed/augmented_plus_II/t/aug_0_t...,unknown,unknown,0.441069,0.70399,-5.111158e-08,0.479444,0.558128,0.021133,...,-0.095609,0.494831,0.457364,-0.108516,0.513345,0.503679,-0.087823,0.486147,0.534728,-0.070633
3,t,src/data/processed/augmented_plus_II/t/aug_0_t...,unknown,unknown,0.492606,0.777506,7.281697e-08,0.490462,0.662237,1.7e-05,...,-0.130283,0.630471,0.616495,-0.141886,0.616912,0.653249,-0.117373,0.578293,0.671864,-0.099797
4,t,src/data/processed/augmented_plus_II/t/aug_0_t...,unknown,unknown,0.489544,0.522346,2.205826e-07,0.534607,0.415722,-0.006797,...,-0.178201,0.67383,0.535563,-0.184725,0.644136,0.541281,-0.150853,0.595937,0.535056,-0.130216


In [7]:
label_encoder = LabelEncoder()
scaler = StandardScaler()

classes = sorted(df['letter'].unique())

label_encoder.fit(classes)

y = df['letter']
y = label_encoder.transform(y)

X = df.drop(columns=['letter', 'filename','hand_side','source_id'], axis=1)

# 70% train, 15% val, 15% test
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=(15/85), random_state=42, stratify=y_temp)

print(f"Partitions shape:\nTrain{X_train.shape}\nValidation: {X_val.shape}\nTest:{X_test.shape}")

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

Partitions shape:
Train(23277, 63)
Validation: (4988, 63)
Test:(4989, 63)


In [8]:
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

['label_encoder.pkl']

In [9]:
# Defining mlflow experiment parameter
TRIAL_NAME = "v2_augmented_trial_"
MLFLOW_MAIN_RUN = "Main Tunining Run: Landmark Model"
TUNER_DIRECTORY = "logs_lm_model/v2_augmented_tuner"
TUNER_PROJECT_NAME = "FingerSpellIT - v2_Augmented Landmark Best Model"
MODEL_NAME = "model_landmarked_v3"
EVALUATION_MLFLOW_RUN = "Evaluating v2_Augmented Landmark Best Model"
REPORT_NAME = "v2_augmented_classification_report"
CM_NAME="v2_augmented_confusion_matrix"

In [10]:
# MLflow - Dagshub initialization
mlflow.set_tracking_uri("https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow")

dagshub.init(repo_owner='alfoCaiazza', repo_name='FingerSpellIT', mlflow=True)
dagshub_log = dagshub_logger(metrics_path="metrics", hparams_path="params")

In [11]:
# To avoid OOM errors, setting GPU Memory Consuption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print(f"GPU: {gpu}")
    tf.config.experimental.set_memory_growth(gpu, True) # Keeping the use of memory limited to prevent errors

GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [12]:
def build_model(hp):
    # Input Layer
    input_layer = layers.Input(shape=(63,))
    x = input_layer
    
    # First Dense Layer
    activation = hp.Choice('initial_activation', ['relu', 'tanh', 'sigmoid'])
    x = layers.Dense(64, activation=activation)(x)
    x = layers.Dropout(hp.Float('initial_dropout', 0.1, 0.5, step=0.1))(x)
    
    # Tunable Hidden Layers
    for i in range(hp.Int('num_layers', 1, 4)):
        units = hp.Int(f'units_{i}', 128, 512, step=128)
        x = layers.Dense(units, activation=activation)(x)

        # Optional BatchNorm
        if hp.Boolean(f'use_batchnorm_{i}'):
            x = layers.BatchNormalization()(x)
        x = layers.Activation(activation)(x)
            
        x = layers.Dropout(hp.Float(f'dropout_{i}', 0.1, 0.5, step=0.1))(x)
    
    # Output Layer
    prediction = layers.Dense(24, activation='softmax')(x)
    
    # Compile Model
    optimizer_name = hp.Choice('optimizer', ['adam', 'rmsprop', 'sgd'])
    
    if optimizer_name == 'adam':
        optimizer = keras.optimizers.Adam(
            learning_rate=hp.Float('adam_lr', 1e-5, 1e-2, sampling='log')
        )
    elif optimizer_name == 'rmsprop':
        optimizer = keras.optimizers.RMSprop(
            learning_rate=hp.Float('rmsprop_lr', 1e-5, 1e-2, sampling='log'),
            rho=hp.Float('rmsprop_rho', 0.8, 0.99)
        )
    elif optimizer_name == 'sgd':
        optimizer = keras.optimizers.SGD(
            learning_rate=hp.Float('sgd_lr', 1e-4, 1e-1, sampling='log'),
            momentum=hp.Float('sgd_momentum', 0.0, 0.99)
        )
    
    model = Model(inputs=input_layer, outputs=prediction)
    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy', 'sparse_categorical_accuracy']
    )
    
    return model

In [13]:
base_callbacks = [
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', patience=2, factor=0.5, min_lr=1e-5)
]

# MLflow tracking callback
class MLflowCallback(Callback):
    def __init__(self, trial_hyperparameters, trial_id):
        super().__init__()
        self.trial_id = trial_id
        self.trial_hyperparameters = trial_hyperparameters

    def on_train_begin(self, logs=None):
        self.run =  mlflow.start_run(run_name=f"{TRIAL_NAME}_{self.trial_id}", nested=True)

        mlflow.log_param('trail_id', self.trial_id)
        for param_name, param_value in self.trial_hyperparameters.values.items():
            mlflow.log_param(param_name, param_value)


    def on_epoch_end(self, epoch, logs=None):
        if logs is not None:
            for metric_name, value in logs.items():
                mlflow.log_metric(metric_name, value, step=epoch)

    def on_train_end(self, logs=None):
        if self.run:
            mlflow.end_run()

In [14]:
# Subclassed RandomSerach tuner which uses customized MLflow callback
class MLflowTuner(RandomSearch):
    def run_trial(self, trial, *args, **kwargs):
        callbacks = base_callbacks + [MLflowCallback(trial.hyperparameters, trial.trial_id)]
        kwargs['callbacks'] = callbacks
        return super().run_trial(trial, *args, **kwargs)

In [15]:
# Initializing the Tuner
with mlflow.start_run(run_name=f"{MLFLOW_MAIN_RUN}"):
    epochs = 50
    mlflow.log_param('epochs', epochs)

    tuner = MLflowTuner(
        build_model,
        objective='sparse_categorical_accuracy',
        max_trials=10, 
        executions_per_trial=1,
        directory=f"{TUNER_DIRECTORY}",
        project_name=f"{TUNER_PROJECT_NAME}"
    )

    tuner.search(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs
    )

    best_model = tuner.get_best_models(num_models=1)[0]
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    best_trial = tuner.oracle.get_best_trials(num_trials=1)[0]

    for param, value in best_hps.values.items():
        mlflow.log_param(param, value)

    for metric, value in best_trial.metrics.metrics.items():
        if metric and isinstance(metric, dict):
            values = value.get('value', [])
            if values:
                mlflow.log_metric(metric, values[-1])

    model_path = f"../model/{MODEL_NAME}.h5"
    best_model.save(model_path)
    mlflow.log_artifact(model_path)

mlflow.end_run()

Trial 10 Complete [00h 06m 19s]
sparse_categorical_accuracy: 0.9729776382446289

Best sparse_categorical_accuracy So Far: 0.9729776382446289
Total elapsed time: 00h 58m 21s


  saveable.load_own_variables(weights_store.get(inner_path))


üèÉ View run Main Tunining Run: Landmark Model at: https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow/#/experiments/0/runs/f34be81f54004fe6abacf2a3c947598c
üß™ View experiment at: https://dagshub.com/alfoCaiazza/FingerSpellIT.mlflow/#/experiments/0


In [16]:
results = best_model.evaluate(X_test, y_test)
print("Evaluation results:", results)

y_pred_probs = best_model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

y_true = np.array(y_test)

[1m146/156[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m‚îÅ‚îÅ[0m [1m0s[0m 5ms/step - accuracy: 0.9817 - loss: 0.0945 - sparse_categorical_accuracy: 0.9817






[1m156/156[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m5s[0m 29ms/step - accuracy: 0.9817 - loss: 0.0935 - sparse_categorical_accuracy: 0.9817
Evaluation results: [0.07870367914438248, 0.9819602966308594, 0.9819602966308594]
[1m156/156[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 5ms/step


In [17]:
with mlflow.start_run(run_name=f"{EVALUATION_MLFLOW_RUN}"):
    class_names = label_encoder.classes_.tolist()
    report_path =f'../model/artifacts/{REPORT_NAME}.json'
    report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
    print("Classification Report", json.dumps(report, indent=4))

    with open(report_path, "w") as f:
        json.dump(report, f, indent=4)

    mlflow.log_artifact(report_path, "evaluation_metrics")

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()

    cm_local_path = os.path.join("../model/artifacts", f"{CM_NAME}.png")
    plt.savefig(cm_local_path, dpi=300, bbox_inches='tight')
    
    mlflow.log_figure(plt.gcf(), f"evaluation_plots/{CM_NAME}.png")

    plt.close()


Classification Report {
    "a": {
        "precision": 0.9836956521739131,
        "recall": 0.9890710382513661,
        "f1-score": 0.9863760217983651,
        "support": 183.0
    },
    "b": {
        "precision": 0.9319727891156463,
        "recall": 0.9927536231884058,
        "f1-score": 0.9614035087719298,
        "support": 138.0
    },
    "c": {
        "precision": 0.9890710382513661,
        "recall": 0.9945054945054945,
        "f1-score": 0.9917808219178083,
        "support": 182.0
    },
    "d": {
        "precision": 0.9808917197452229,
        "recall": 0.9506172839506173,
        "f1-score": 0.9655172413793104,
        "support": 162.0
    },
    "e": {
        "precision": 1.0,
        "recall": 0.9908675799086758,
        "f1-score": 0.9954128440366973,
        "support": 219.0
    },
    "f": {
        "precision": 0.9803921568627451,
        "recall": 0.9615384615384616,
        "f1-score": 0.970873786407767,
        "support": 208.0
    },
    "g": {
        "