## Script assurdo

Il modello è Seq2Seq con attention, utilizzando LSTM come encoder e decoder. Inoltre, il modello utilizza un layer di attenzione (customizzato con AttentionLayer)

In [1]:
import os
from keras import backend as K
from keras.utils import plot_model
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from matplotlib import pyplot as plt
from architectures.Seq2SeqGRU import Seq2SeqGRU
from architectures.Seq2SeqLSTM import Seq2SeqLSTM
from architectures.Seq2SeqLSTMGlove import Seq2SeqLSTMGlove
from architectures.Seq2SeqBiLSTM import Seq2SeqBiLSTM
from architectures.Seq2Seq3BiLSTM import Seq2Seq3BiLSTM
from architectures.Seq2SeqLSTMTransformer import Seq2SeqLSTMTransformer
import pandas as pd

from utils import (
    evaluate_rouge,
    evaluate_wer,
    evaluate_cosine_similarity,
    plot_rouge,
    plot_wer,
    plot_cosine_similarity,
    generate_summaries,
    create_hyperparameter_grid,
    prepare_data,
)


def save_metrics_results(df_summaries, model_name, results_path):
    metrics_file_path = f"{results_path}/csv/{model_name}_metrics_scores.csv"
    df_summaries.to_csv(metrics_file_path, index=False)
    print(f"Metrics results saved to {metrics_file_path}")


def plot_training_history(history, model_name, save_path):
    plt.plot(history["loss"], label="train")
    plt.plot(history["val_loss"], label="test")
    plt.legend()
    plt.title(f"Model Loss Over Epochs - {model_name}")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.grid(True)

    # Save the plot to a file
    os.makedirs(save_path, exist_ok=True)
    plt.savefig(
        os.path.join(save_path, f"{model_name}_lossplot.png"),
        dpi=300,
        bbox_inches="tight",
    )

    # Close the plot
    plt.close()


def save_model(model, model_name, save_path, save_full_model=True):
    os.makedirs(save_path, exist_ok=True)
    # Save the model weights
    # model.save_weights(os.path.join(save_path, f"{model_name}.weights.h5"))
    if save_full_model:
        # Save the full model
        model.save(os.path.join(save_path, f"{model_name}_full_model.h5"))


def train_model(
    model_instance,
    hyperparams,
    x_training_padded,
    y_training_padded,
    x_validation_padded,
    y_validation_padded,
    save_path,
):
    K.clear_session()

    # Extract hyperparameters
    latent_dim = hyperparams["latent_dim"]
    embedding_dim = hyperparams["embedding_dim"]
    encoder_dropout = hyperparams["encoder_dropout"]
    encoder_recurrent_dropout = hyperparams["encoder_recurrent_dropout"]
    decoder_dropout = hyperparams["decoder_dropout"]
    decoder_recurrent_dropout = hyperparams["decoder_recurrent_dropout"]
    optimizer_class = hyperparams["optimizer_class"]
    epochs = hyperparams["epochs"]
    batch_size = hyperparams["batch_size"]
    learning_rate = hyperparams["learning_rate"]

    # Create optimizer
    optimizer = optimizer_class(learning_rate=learning_rate)

    # Set optimizer and callbacks
    model_instance.change_optimizer(optimizer)

    # Early stopping
    early_stopping = EarlyStopping(
        monitor="val_loss",
        mode="min",
        verbose=1,
        patience=3,
        restore_best_weights=True,
    )

    # Define learning rate scheduler
    def lr_schedule(epoch, lr):
        decay_rate = 0.95
        decay_step = 1
        if epoch % decay_step == 0 and epoch != 0:
            return lr * decay_rate
        return lr

    learning_rate_scheduler = LearningRateScheduler(lr_schedule, verbose=1)

    # Reduce LR on Plateau
    reduce_lr_on_plateau = ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.5,
        patience=2,
        verbose=1,
        min_lr=1e-6,
    )

    # Add callbacks to the model instance
    model_instance.add_callbacks(
        [early_stopping, learning_rate_scheduler, reduce_lr_on_plateau]
    )

    model = model_instance.get_model()

    # Train model
    history = model.fit(
        [x_training_padded, y_training_padded[:, :-1]],
        y_training_padded.reshape(
            y_training_padded.shape[0], y_training_padded.shape[1], 1
        )[:, 1:],
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(
            [x_validation_padded, y_validation_padded[:, :-1]],
            y_validation_padded.reshape(
                y_validation_padded.shape[0], y_validation_padded.shape[1], 1
            )[:, 1:],
        ),
        callbacks=model_instance.get_callbacks(),
    )

    # Save results
    TO_SAVE_MODEL = False
    model_name = model_instance.name
    model_save_path = os.path.join(save_path, "weights")
    if TO_SAVE_MODEL:
        save_model(model, model_name, model_save_path)

    # Plot training history
    plot_training_history(
        history.history, model_name, os.path.join(save_path, "graphs")
    )

    return history.history


# Define hyperparameter grid
hyperparameter_grid = create_hyperparameter_grid()

# Define models
model_classes = [
    #Seq2SeqGRU,
    #Seq2SeqLSTM,
    Seq2SeqLSTMGlove,
    #Seq2SeqBiLSTM,
    #Seq2Seq3BiLSTM,
    # Seq2SeqLSTMTransformer,
]

# Training loop
results_path = f"results/"
os.makedirs(results_path, exist_ok=True)

for model_class in model_classes:
    print("\n" + "=" * 50)
    print(f"Training: {model_class.__name__}")

    results_path = f"results/{model_class.__name__}"
    os.makedirs(results_path, exist_ok=True)

    # Crea the subdirectories
    os.makedirs(f"{results_path}/weights", exist_ok=True)
    os.makedirs(f"{results_path}/media/graphs", exist_ok=True)
    os.makedirs(f"{results_path}/media/architectures", exist_ok=True)
    os.makedirs(f"{results_path}/csv", exist_ok=True)
    os.makedirs(f"{results_path}/histories", exist_ok=True)

    for hyperparams in hyperparameter_grid:
        # Get prepared data
        (
            x_voc,
            y_voc,
            x_tokenizer,
            y_tokenizer,
            x_training_padded,
            y_training_padded,
            x_validation_padded,
            y_validation_padded,
            max_text_len,
            max_summary_len,
        ) = prepare_data()

        # Create the model instance
        model_instance = model_class(
            x_voc=x_voc,
            y_voc=y_voc,
            max_text_len=max_text_len,
            max_summary_len=max_summary_len,
            x_tokenizer=x_tokenizer,
            y_tokenizer=y_tokenizer,
            name_additional_info=f"_optimizer{hyperparams['optimizer_class'].__name__}_lr{hyperparams['learning_rate']}_ed{hyperparams['embedding_dim']}_ld{hyperparams['latent_dim']}_do{hyperparams['decoder_dropout']}_drdo{hyperparams['decoder_recurrent_dropout']}_edo{hyperparams['encoder_dropout']}_erdo{hyperparams['encoder_recurrent_dropout']}",
            latent_dim=hyperparams["latent_dim"],
            embedding_dim=hyperparams["embedding_dim"],
            encoder_dropout=hyperparams["encoder_dropout"],
            encoder_recurrent_dropout=hyperparams["encoder_recurrent_dropout"],
            decoder_dropout=hyperparams["decoder_dropout"],
            decoder_recurrent_dropout=hyperparams["decoder_recurrent_dropout"],
        )

        # Plot the model architecture
        TO_SAVE_MODEL_ARCHITECTURE = False
        if TO_SAVE_MODEL_ARCHITECTURE:
            plot_model(
                model_instance.get_model(),
                to_file=f"{results_path}/media/architectures/{model_instance.name}_architecture.png",
                show_shapes=True,
            )

        print(f"Training {model_instance.name} with hyperparameters {hyperparams}")
        history = train_model(
            model_instance,
            hyperparams,
            x_training_padded,
            y_training_padded,
            x_validation_padded,
            y_validation_padded,
            results_path,
        )

        # Save training history
        history_path = os.path.join(
            results_path, f"histories/{model_instance.name}_history.txt"
        )
        with open(history_path, "a") as f:
            f.write(f"Hyperparameters: {hyperparams}\n")
            f.write(f"History: {history}\n\n")
            # Write last epoch loss, val_loss, accuracy, val_accuracy
            f.write(
                f"Last epoch loss: {history['loss'][-1]}, val_loss: {history['val_loss'][-1]}\n"
            )

        # Generate and save summaries
        print(f"Generating summaries for {model_instance.name}")
        summaries_path = os.path.join(results_path, "csv")
        df_summaries = generate_summaries(
            model_instance,
            x_training_padded,
            y_training_padded,
            max_text_len,
            n_summaries=1000,
            save_path=summaries_path,
        )

        df_summaries, mean_scores_rouge = evaluate_rouge(df_summaries)
        df_summaries, mean_score_wer = evaluate_wer(df_summaries)
        df_summaries, mean_score_cosine_similarity = evaluate_cosine_similarity(
            df_summaries
        )

        # Save evaluation results
        TO_SAVE_METRICS_RESULTS = True
        if TO_SAVE_METRICS_RESULTS:
            save_metrics_results(df_summaries, model_instance.name, results_path)

        # Print mean scores in history file
        with open(history_path, "a") as f:
            f.write(f"Mean ROUGE scores: {mean_scores_rouge}\n")
            f.write(f"Mean WER score: {mean_score_wer}\n")
            f.write(f"Mean Cosine Similarity score: {mean_score_cosine_similarity}\n\n")

        # Plot evaluation results
        TO_SAVE_PLOTS = False

        if TO_SAVE_PLOTS:
            plot_rouge(
                df_summaries,
                f"{results_path}/media/graphs",
                model_instance,
                metric="rouge1",
                title=f"ROUGE-1 Score Distribution - {model_instance.name}",
                color="blue",
            )

            plot_rouge(
                df_summaries,
                f"{results_path}/media/graphs",
                model_instance,
                metric="rouge2",
                title=f"ROUGE-2 Score Distribution - {model_instance.name}",
                color="blue",
            )
            plot_rouge(
                df_summaries,
                f"{results_path}/media/graphs",
                model_instance,
                metric="rougeL",
                title=f"ROUGE-L Score Distribution - {model_instance.name}",
                color="blue",
            )
            plot_wer(
                df_summaries,
                f"{results_path}/media/graphs",
                model_instance,
                title=f"WER Score Distribution - {model_instance.name}",
                color="red",
            )
            plot_cosine_similarity(
                df_summaries,
                f"{results_path}/media/graphs",
                model_instance,
                title=f"Cosine Similarity Distribution - {model_instance.name}",
                color="green",
            )

2025-02-26 08:21:30.690878: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-26 08:21:30.699872: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740554490.710013    8386 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740554490.712791    8386 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-26 08:21:30.723685: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

Number of hyperparameter combinations: 2

Training: Seq2SeqLSTMGlove


[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


GloVe embeddings already downloaded.
Extracting ./architectures/weightsGLOVE/glove.6B.zip...
Extraction complete.
GloVe embeddings already downloaded.
Extracting ./architectures/weightsGLOVE/glove.6B.zip...
Extraction complete.


I0000 00:00:1740554521.600524    8386 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5807 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060, pci bus id: 0000:01:00.0, compute capability: 8.9


AttributeError: 'Seq2SeqLSTMGlove' object has no attribute 'encoder_dropout'