# Notebook 01: Addestramento e Valutazione dei Modelli

**Scopo:** Questo notebook carica i dati pre-processati dal Notebook 00, definisce le architetture delle reti neurali, orchestra un ciclo di esperimenti per addestrare e valutare diverse combinazioni di modelli e ottimizzatori, e salva gli artefatti migliori per l'analisi successiva.

**Input:**
- Dati pre-processati da `../data/processed/` (`X_train.npy`, `y_train.npy`, etc.)

**Output (salvati in `../models/` e `../reports/`):**
- I modelli migliori per ogni esperimento (es. `UNet_Lite_Adam.keras`).
- Un file di riepilogo con le metriche di performance (es. `training_summary.csv`).
- (Opzionale) Le storie di training salvate.

In [13]:
# ===================================================================
# CELLA 1: SETUP, IMPORTS E CARICAMENTO DATI
# ===================================================================

import os
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import time
import traceback

import tensorflow as tf
import keras as keras
from keras import layers, models, optimizers, callbacks, regularizers
from keras.utils import to_categorical

# --- Configurazione Globale ---
PROCESSED_DATA_PATH = '../data/processed/'
MODELS_PATH = '../models/ale/'
REPORTS_PATH = '../reports/'
RANDOM_STATE = 42

os.makedirs(MODELS_PATH, exist_ok=True)
os.makedirs(REPORTS_PATH, exist_ok=True)

# 1. GPU e Mixed Precision
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True)
        print(f"✅ GPU(s) Trovata/e: {[tf.config.experimental.get_device_details(g)['device_name'] for g in gpus]}")
        policy = keras.mixed_precision.Policy('mixed_float16')
        keras.mixed_precision.set_global_policy(policy)
        print(f"✅ Politica di Mixed Precision impostata su: {keras.mixed_precision.global_policy().name}")
    except RuntimeError as e: print(f"⚠️ Errore durante l'inizializzazione della GPU: {e}")
else: print("❌ NESSUNA GPU TROVATA. L'allenamento sarà su CPU.")

# 2. Caricamento Dati Pre-processati
print("\n🔄 Caricamento dei dati pre-processati...")
try:
    X_train = np.load(os.path.join(PROCESSED_DATA_PATH, 'X_train.npy'))
    y_train = np.load(os.path.join(PROCESSED_DATA_PATH, 'y_train.npy'))
    X_val = np.load(os.path.join(PROCESSED_DATA_PATH, 'X_val.npy'))
    y_val = np.load(os.path.join(PROCESSED_DATA_PATH, 'y_val.npy'))
    X_test = np.load(os.path.join(PROCESSED_DATA_PATH, 'X_test.npy'))
    y_test = np.load(os.path.join(PROCESSED_DATA_PATH, 'y_test.npy'))
    
    with open(os.path.join(PROCESSED_DATA_PATH, 'label_encoder.pkl'), 'rb') as f:
        label_encoder = pickle.load(f)

    # Conversione in formato categorico
    num_classes = len(label_encoder.classes_)
    y_train_cat = to_categorical(y_train, num_classes=num_classes)
    y_val_cat = to_categorical(y_val, num_classes=num_classes)
    y_test_cat = to_categorical(y_test, num_classes=num_classes)
    
    print("\n✅ Dati caricati con successo.")
    print(f"   - Shape X_train: {X_train.shape} | Shape y_train_cat: {y_train_cat.shape}")
    print(f"   - Numero di classi: {num_classes}")
except FileNotFoundError:
    print("❌ ERRORE: File di dati non trovati. Eseguire prima il notebook '00_Setup_and_Data_Preparation.ipynb'.")

✅ GPU(s) Trovata/e: ['NVIDIA GeForce RTX 4070']
✅ Politica di Mixed Precision impostata su: mixed_float16

🔄 Caricamento dei dati pre-processati...

✅ Dati caricati con successo.
   - Shape X_train: (5990, 128, 128, 1) | Shape y_train_cat: (5990, 10)
   - Numero di classi: 10


In [14]:
# ===================================================================
# CELLA 2: MODELFACTORY PER IL "GRAND TOURNAMENT"
# ===================================================================
from tensorflow.keras import layers, models, regularizers

class ModelFactory:
    """
    Contiene le factory per i nostri tre migliori candidati.
    Ogni modello rappresenta un diverso compromesso tra semplicità e potenza.
    """
    
    @staticmethod
    def _se_block(input_tensor, ratio=16):
        channels = input_tensor.shape[-1]
        se = layers.GlobalAveragePooling2D()(input_tensor)
        se = layers.Reshape((1, 1, channels))(se)
        se = layers.Dense(channels // ratio, activation='relu')(se)
        se = layers.Dense(channels, activation='sigmoid')(se)
        return layers.Multiply()([input_tensor, se])

    # --- Candidato 1: Il Campione Stabile ---
    @staticmethod
    def build_se_audio_cnn(input_shape, num_classes):
        """Architettura VGG-style potenziata con SE. Semplice, veloce, robusta."""
        inputs = layers.Input(shape=input_shape)
        # Blocco 1
        x = layers.Conv2D(32, (3, 3), padding='same', use_bias=False)(inputs)
        x = layers.BatchNormalization()(x); x = layers.Activation('relu')(x)
        x = layers.Conv2D(32, (3, 3), padding='same', use_bias=False)(x)
        x = layers.BatchNormalization()(x); x = layers.Activation('relu')(x)
        x = layers.MaxPooling2D((2, 2))(x)
        x = ModelFactory._se_block(x)
        x = layers.Dropout(0.25)(x)
        # Blocco 2
        x = layers.Conv2D(64, (3, 3), padding='same', use_bias=False)(x)
        x = layers.BatchNormalization()(x); x = layers.Activation('relu')(x)
        x = layers.Conv2D(64, (3, 3), padding='same', use_bias=False)(x)
        x = layers.BatchNormalization()(x); x = layers.Activation('relu')(x)
        x = layers.MaxPooling2D((2, 2))(x)
        x = ModelFactory._se_block(x)
        x = layers.Dropout(0.25)(x)
        # Testa
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dense(128, activation='relu')(x)
        x = layers.Dropout(0.5)(x)
        outputs = layers.Dense(num_classes, activation='softmax', dtype='float32')(x)
        return models.Model(inputs=inputs, outputs=outputs, name='SE_AudioCNN')

    # --- Candidato 2: L'Equilibrio di Potenza ---
    @staticmethod
    def _res_se_block(input_tensor, filters, stride=1):
        shortcut = input_tensor
        x = layers.Conv2D(filters, 3, strides=stride, padding='same', use_bias=False)(input_tensor)
        x = layers.BatchNormalization()(x); x = layers.PReLU(shared_axes=[1, 2])(x)
        x = layers.Conv2D(filters, 3, padding='same', use_bias=False)(x)
        x = layers.BatchNormalization()(x); x = ModelFactory._se_block(x)
        if stride > 1 or shortcut.shape[-1] != filters:
            shortcut = layers.Conv2D(filters, 1, strides=stride, use_bias=False)(shortcut)
            shortcut = layers.BatchNormalization()(shortcut)
        x = layers.Add()([shortcut, x]); x = layers.PReLU(shared_axes=[1, 2])(x)
        return x

    @staticmethod
    def build_res_se_audio_cnn(input_shape, num_classes):
        """Architettura ResNet-style con SE e PReLU. Potente e stabile."""
        inputs = layers.Input(shape=input_shape)
        x = layers.Conv2D(32, 3, strides=1, padding='same', use_bias=False)(inputs)
        x = layers.BatchNormalization()(x); x = layers.PReLU(shared_axes=[1, 2])(x)
        x = ModelFactory._res_se_block(x, 64, stride=2)
        x = ModelFactory._res_se_block(x, 128, stride=2)
        x = ModelFactory._res_se_block(x, 256, stride=2)
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dropout(0.5)(x)
        outputs = layers.Dense(num_classes, activation='softmax', dtype='float32')(x)
        return models.Model(inputs=inputs, outputs=outputs, name='ResSE_AudioCNN')

    # --- Candidato 3: La Punta di Diamante ---
    @staticmethod
    def _resnext_se_block(input_tensor, filters, stride=1, cardinality=8):
        shortcut = input_tensor
        if stride > 1 or shortcut.shape[-1] != filters:
            shortcut = layers.Conv2D(filters, 1, strides=stride, use_bias=False)(shortcut)
            shortcut = layers.BatchNormalization()(shortcut)
        x = layers.Conv2D(filters // 2, 1, use_bias=False)(input_tensor)
        x = layers.BatchNormalization()(x); x = layers.PReLU(shared_axes=[1, 2])(x)
        group_filters = (filters // 2) // cardinality
        groups = [layers.Conv2D(group_filters, 3, strides=stride, padding='same', use_bias=False)(x) for _ in range(cardinality)]
        x = layers.Concatenate()(groups)
        x = layers.BatchNormalization()(x); x = layers.PReLU(shared_axes=[1, 2])(x)
        x = layers.Conv2D(filters, 1, use_bias=False)(x)
        x = layers.BatchNormalization()(x); x = ModelFactory._se_block(x)
        x = layers.Add()([shortcut, x]); x = layers.PReLU(shared_axes=[1, 2])(x)
        return x

    @staticmethod
    def build_resnext_se_audio_cnn(input_shape, num_classes):
        """Architettura ResNeXt-style. La nostra opzione più potente."""
        inputs = layers.Input(shape=input_shape)
        x = layers.Conv2D(64, 3, strides=1, padding='same', use_bias=False)(inputs)
        x = layers.BatchNormalization()(x); x = layers.PReLU(shared_axes=[1, 2])(x)
        x = ModelFactory._resnext_se_block(x, 128, stride=2)
        x = ModelFactory._resnext_se_block(x, 256, stride=2)
        x = ModelFactory._resnext_se_block(x, 512, stride=2)
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dropout(0.5)(x)
        outputs = layers.Dense(num_classes, activation='softmax', dtype='float32')(x)
        return models.Model(inputs=inputs, outputs=outputs, name='ResNeXt_SE_AudioCNN')

print("✅ ModelFactory pronta per il torneo con i 3 migliori candidati.")

✅ ModelFactory pronta per il torneo con i 3 migliori candidati.


In [None]:
# ===================================================================
# CELLA 3: FRAMEWORK DI TRAINING PER IL "GRAND TOURNAMENT"
# ===================================================================
import os
import pandas as pd
import traceback
import tensorflow as tf
from tensorflow.keras import optimizers, callbacks

# Funzione SpecAugment (invariata)
@tf.function
def spec_augment_tf(spectrogram, label):
    aug_spec = tf.identity(spectrogram)
    freq_bins = tf.shape(aug_spec)[0]
    time_steps = tf.shape(aug_spec)[1]
    # Mascheramento Frequenza
    f_param = tf.cast(tf.cast(freq_bins, tf.float32) * 0.2, tf.int32)
    if f_param > 1:
        f = tf.random.uniform(shape=(), minval=1, maxval=f_param, dtype=tf.int32)
        f0 = tf.random.uniform(shape=(), minval=0, maxval=freq_bins - f, dtype=tf.int32)
        mask_freq_values = tf.concat([tf.ones((f0,)), tf.zeros((f,)), tf.ones((freq_bins - f0 - f,))], axis=0)
        mask_freq = tf.reshape(tf.cast(mask_freq_values, aug_spec.dtype), (freq_bins, 1, 1))
        aug_spec *= mask_freq
    # Mascheramento Tempo
    t_param = tf.cast(tf.cast(time_steps, tf.float32) * 0.2, tf.int32)
    if t_param > 1:
        t = tf.random.uniform(shape=(), minval=1, maxval=t_param, dtype=tf.int32)
        t0 = tf.random.uniform(shape=(), minval=0, maxval=time_steps - t, dtype=tf.int32)
        mask_time_values = tf.concat([tf.ones((t0,)), tf.zeros((t,)), tf.ones((time_steps - t0 - t,))], axis=0)
        mask_time = tf.reshape(tf.cast(mask_time_values, aug_spec.dtype), (1, time_steps, 1))
        aug_spec *= mask_time
    return aug_spec, label

# Classe per orchestrare l'esperimento comparativo
class ModelEvaluator:
    def __init__(self, class_names):
        self.class_names = class_names
        self.results = []

    def run_experiments(self, model_factories, train_data, val_data, test_data, epochs, batch_size):
        for model_name, model_factory in model_factories.items():
            print(f"\n{'='*80}\nARCHITETTURA IN TEST: '{model_name}'\n{'='*80}")
            try:
                model = model_factory()
                # Usiamo solo Adam con un setup semplice e robusto
                optimizer = optimizers.Adam(learning_rate=1e-3)
                model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
                
                callbacks_list = [
                    callbacks.EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True, verbose=1),
                    callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, verbose=1),
                    callbacks.ModelCheckpoint(os.path.join(MODELS_PATH, f"{model_name}_best.keras"), 
                                              monitor='val_accuracy', save_best_only=True)
                ]
                
                history = model.fit(train_data, epochs=epochs, validation_data=val_data, callbacks=callbacks_list, verbose=2)
                
                test_loss, test_acc = model.evaluate(test_data, verbose=0)
                self.results.append({
                    'Model': model_name,
                    'Test_Accuracy': test_acc,
                    'Test_Loss': test_loss,
                    'Best_Val_Accuracy': max(history.history['val_accuracy']),
                    'Epochs_Run': len(history.history['val_accuracy']),
                })
            except Exception:
                print(f"❌ ERRORE durante il training di [{model_name}]:")
                traceback.print_exc()
        return pd.DataFrame(self.results)

# -------------------------------------------------------------------
# ESECUZIONE DEL CICLO DI TRAINING
# -------------------------------------------------------------------
AUTOTUNE = tf.data.AUTOTUNE
BATCH_SIZE = 64
EPOCHS = 50

tf.keras.mixed_precision.set_global_policy('float32')

# Assicuriamoci che i tipi di dato siano corretti
y_train_cat = y_train_cat.astype('float32')
y_val_cat = y_val_cat.astype('float32')
y_test_cat = y_test_cat.astype('float32')

# Pipeline di dati (solo SpecAugment, niente Mixup per un test più pulito)
train_pipeline = (tf.data.Dataset.from_tensor_slices((X_train, y_train_cat)).cache().shuffle(len(X_train))
                  .map(spec_augment_tf, num_parallel_calls=AUTOTUNE).batch(BATCH_SIZE).prefetch(AUTOTUNE))
val_pipeline = (tf.data.Dataset.from_tensor_slices((X_val, y_val_cat)).cache().batch(BATCH_SIZE).prefetch(AUTOTUNE))
test_pipeline = (tf.data.Dataset.from_tensor_slices((X_test, y_test_cat)).cache().batch(BATCH_SIZE).prefetch(AUTOTUNE))

# Factory dei modelli per il torneo
input_shape = X_train.shape[1:]
num_classes = y_train_cat.shape[1]
model_factories = {
    'SE_AudioCNN': lambda: ModelFactory.build_se_audio_cnn(input_shape, num_classes),
    'ResSE_AudioCNN': lambda: ModelFactory.build_res_se_audio_cnn(input_shape, num_classes),
    'ResNeXt_SE_AudioCNN': lambda: ModelFactory.build_resnext_se_audio_cnn(input_shape, num_classes),
}

# Esecuzione del torneo
evaluator = ModelEvaluator(class_names=label_encoder.classes_)
results_df = evaluator.run_experiments(
    model_factories, train_pipeline, val_pipeline, test_pipeline, EPOCHS, BATCH_SIZE
)

# Salvataggio e Report dei risultati del torneo
if not results_df.empty:
    results_df.to_csv(os.path.join(REPORTS_PATH, 'training_summary.csv'), index=False)
    print("\n🎉 CHAMPIONSHIP RUN COMPLETATO 🎉")
    print("\nRisultati Finali:")
    print(results_df.to_markdown(index=False))


ARCHITETTURA IN TEST: 'SE_AudioCNN'
Epoch 1/50





94/94 - 21s - 220ms/step - accuracy: 0.2997 - loss: 1.9117 - val_accuracy: 0.2035 - val_loss: 2.1381 - learning_rate: 1.0000e-03
Epoch 2/50
94/94 - 3s - 32ms/step - accuracy: 0.4083 - loss: 1.5999 - val_accuracy: 0.2335 - val_loss: 2.0751 - learning_rate: 1.0000e-03
Epoch 3/50
94/94 - 3s - 32ms/step - accuracy: 0.4584 - loss: 1.4955 - val_accuracy: 0.2915 - val_loss: 1.8429 - learning_rate: 1.0000e-03
Epoch 4/50
94/94 - 3s - 33ms/step - accuracy: 0.5015 - loss: 1.3805 - val_accuracy: 0.3105 - val_loss: 1.9694 - learning_rate: 1.0000e-03
Epoch 5/50
94/94 - 3s - 32ms/step - accuracy: 0.5611 - loss: 1.2618 - val_accuracy: 0.4755 - val_loss: 1.5652 - learning_rate: 1.0000e-03
Epoch 6/50
94/94 - 3s - 32ms/step - accuracy: 0.5922 - loss: 1.1579 - val_accuracy: 0.3295 - val_loss: 2.1600 - learning_rate: 1.0000e-03
Epoch 7/50
94/94 - 3s - 33ms/step - accuracy: 0.6105 - loss: 1.1114 - val_accuracy: 0.5340 - val_loss: 1.3237 - learning_rate: 1.0000e-03
Epoch 8/50
94/94 - 3s - 32ms/step - accurac








94/94 - 59s - 629ms/step - accuracy: 0.4297 - loss: 1.6043 - val_accuracy: 0.1595 - val_loss: 3.7259 - learning_rate: 1.0000e-03
Epoch 2/50
94/94 - 12s - 125ms/step - accuracy: 0.5701 - loss: 1.1754 - val_accuracy: 0.0845 - val_loss: 6.3565 - learning_rate: 1.0000e-03
Epoch 3/50
94/94 - 12s - 125ms/step - accuracy: 0.6616 - loss: 0.9843 - val_accuracy: 0.1485 - val_loss: 4.5496 - learning_rate: 1.0000e-03
Epoch 4/50
94/94 - 12s - 128ms/step - accuracy: 0.7002 - loss: 0.8541 - val_accuracy: 0.3485 - val_loss: 3.1679 - learning_rate: 1.0000e-03
Epoch 5/50
94/94 - 12s - 129ms/step - accuracy: 0.7536 - loss: 0.7137 - val_accuracy: 0.4140 - val_loss: 2.1374 - learning_rate: 1.0000e-03
Epoch 6/50
94/94 - 12s - 128ms/step - accuracy: 0.7888 - loss: 0.6080 - val_accuracy: 0.6730 - val_loss: 1.1723 - learning_rate: 1.0000e-03
Epoch 7/50
94/94 - 12s - 125ms/step - accuracy: 0.8277 - loss: 0.5152 - val_accuracy: 0.4850 - val_loss: 2.9013 - learning_rate: 1.0000e-03
Epoch 8/50
94/94 - 12s - 126ms/

ImportError: Missing optional dependency 'tabulate'.  Use pip or conda to install tabulate.

In [20]:
if not results_df.empty:
    results_df.to_csv(os.path.join(REPORTS_PATH, 'training_summary_CHAMPIONSHIP_RUN.csv'), index=False)
    print("\n🎉 CHAMPIONSHIP RUN COMPLETATO 🎉")
    print("\nRisultati Finali:")
    print(results_df.to_markdown(index=False))


🎉 CHAMPIONSHIP RUN COMPLETATO 🎉

Risultati Finali:
| Model               |   Test_Accuracy |   Test_Loss |   Best_Val_Accuracy |   Epochs_Run |
|:--------------------|----------------:|------------:|--------------------:|-------------:|
| SE_AudioCNN         |          0.696  |    0.936001 |              0.7235 |           50 |
| ResSE_AudioCNN      |          0.7805 |    0.926292 |              0.793  |           42 |
| ResNeXt_SE_AudioCNN |          0.7795 |    0.99514  |              0.7675 |           46 |
