# Training and Evaluation Scripts

## Training

In [1]:
TO_SAVE_MODEL = True
TO_SAVE_MODEL_ARCHITECTURE = True
TO_GENERATE_SUMMARIES = True

from utils import (
    generate_summaries,
    create_hyperparameter_grid,
    prepare_data,
)
from tensorflow.keras.optimizers import Adam, RMSprop, SGD

# Define hyperparameter to permutate
hyperparameter_grid = create_hyperparameter_grid(
    embedding_dim=[512, 256],
    latent_dim=[256, 128],
    encoder_dropout=[0.2],
    encoder_recurrent_dropout=[0.2],
    decoder_dropout=[0.2],
    decoder_recurrent_dropout=[0.2],
    optimizers=[{"class": Adam, "learning_rate": 0.001}],
    epochs=[50],
    batch_size=[128],
)

from architectures.Seq2SeqGRU import Seq2SeqGRU
from architectures.Seq2SeqLSTM import Seq2SeqLSTM
from architectures.Seq2SeqLSTMGlove import Seq2SeqLSTMGlove
from architectures.Seq2SeqBiLSTM import Seq2SeqBiLSTM
from architectures.Seq2Seq3BiLSTM import Seq2Seq3BiLSTM
from architectures.Seq2SeqLSTMTransformer import Seq2SeqLSTMTransformer
from architectures.Seq2SeqBiLSTMImproved import Seq2SeqBiLSTMImproved

# Define models
model_classes = [
    # Seq2SeqLSTMGlove,
    Seq2Seq3BiLSTM,
    Seq2SeqBiLSTM,
    Seq2SeqLSTM,
    Seq2SeqGRU,
    # Seq2SeqLSTMTransformer,
    # Seq2SeqBiLSTMImproved,
]

import os
from keras import backend as K
from keras.utils import plot_model
from keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from matplotlib import pyplot as plt
import pandas as pd


def save_metrics_results(df_summaries, model_name, results_path):
    metrics_file_path = f"{results_path}/csv/{model_name}_metrics_scores.csv"
    df_summaries.to_csv(metrics_file_path, index=False)
    print(f"Metrics results saved to {metrics_file_path}")


def plot_training_history(history, model_name, save_path):
    plt.plot(history["loss"], label="train")
    plt.plot(history["val_loss"], label="test")
    plt.legend()
    plt.title(f"Model Loss Over Epochs - {model_name}")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.grid(True)

    # Save the plot to a file
    os.makedirs(save_path, exist_ok=True)
    plt.savefig(
        os.path.join(save_path, f"{model_name}_lossplot.png"),
        dpi=300,
        bbox_inches="tight",
    )

    # Close the plot
    plt.close()


def save_model(model, model_name, save_path, save_full_model=True):
    os.makedirs(save_path, exist_ok=True)
    # Save the model weights

    if save_full_model:
        # Save the full model
        model.save(os.path.join(save_path, f"{model_name}_full_model.h5"))
    else:
        model.save_weights(os.path.join(save_path, f"{model_name}.weights.h5"))


def train_model(
    model_instance,
    hyperparams,
    x_training_padded,
    y_training_padded,
    x_validation_padded,
    y_validation_padded,
    save_path,
):
    K.clear_session()

    # Extract hyperparameters
    latent_dim = hyperparams["latent_dim"]
    embedding_dim = hyperparams["embedding_dim"]
    encoder_dropout = hyperparams["encoder_dropout"]
    encoder_recurrent_dropout = hyperparams["encoder_recurrent_dropout"]
    decoder_dropout = hyperparams["decoder_dropout"]
    decoder_recurrent_dropout = hyperparams["decoder_recurrent_dropout"]
    optimizer_class = hyperparams["optimizer_class"]
    epochs = hyperparams["epochs"]
    batch_size = hyperparams["batch_size"]
    learning_rate = hyperparams["learning_rate"]

    # Create optimizer
    optimizer = optimizer_class(learning_rate=learning_rate)

    # Set optimizer and callbacks
    model_instance.change_optimizer(optimizer)

    # Early stopping
    early_stopping = EarlyStopping(
        monitor="val_loss",
        mode="min",
        verbose=1,
        patience=3,
        restore_best_weights=True,
    )

    # Define learning rate scheduler
    def lr_schedule(epoch, lr):
        decay_rate = 0.95
        decay_step = 1
        if epoch % decay_step == 0 and epoch != 0:
            return lr * decay_rate
        return lr

    learning_rate_scheduler = LearningRateScheduler(lr_schedule, verbose=1)

    # Reduce LR on Plateau
    reduce_lr_on_plateau = ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.5,
        patience=2,
        verbose=1,
        min_lr=1e-6,
    )

    # Add callbacks to the model instance
    model_instance.add_callbacks([early_stopping, reduce_lr_on_plateau])

    model = model_instance.get_model()

    # Train model
    history = model.fit(
        [x_training_padded, y_training_padded[:, :-1]],
        y_training_padded.reshape(
            y_training_padded.shape[0], y_training_padded.shape[1], 1
        )[:, 1:],
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(
            [x_validation_padded, y_validation_padded[:, :-1]],
            y_validation_padded.reshape(
                y_validation_padded.shape[0], y_validation_padded.shape[1], 1
            )[:, 1:],
        ),
        callbacks=model_instance.get_callbacks(),
    )

    # Save results
    model_name = model_instance.name
    model_save_path = os.path.join(save_path, "weights")
    if TO_SAVE_MODEL:
        save_model(model, model_name, model_save_path, save_full_model=False)

    # Plot training history
    plot_training_history(
        history.history, model_name, os.path.join(save_path, "media/graphs")
    )

    return history.history


# Training loop
results_path = f"results/"
os.makedirs(results_path, exist_ok=True)

for model_class in model_classes:
    print("\n" + "=" * 50)
    print(f"Training: {model_class.__name__}")

    results_path = f"results/{model_class.__name__}"
    os.makedirs(results_path, exist_ok=True)

    # Crea the subdirectories
    os.makedirs(f"{results_path}/weights", exist_ok=True)
    os.makedirs(f"{results_path}/media/graphs", exist_ok=True)
    os.makedirs(f"{results_path}/media/architectures", exist_ok=True)
    os.makedirs(f"{results_path}/csv", exist_ok=True)
    os.makedirs(f"{results_path}/histories", exist_ok=True)

    for hyperparams in hyperparameter_grid:
        # Get prepared data
        (
            x_voc,
            y_voc,
            x_tokenizer,
            y_tokenizer,
            x_training_padded,
            y_training_padded,
            x_validation_padded,
            y_validation_padded,
            max_text_len,
            max_summary_len,
        ) = prepare_data()

        # Name additional information for the instance
        additional_info = f""

        # Create the model instance
        model_instance = model_class(
            x_voc=x_voc,
            y_voc=y_voc,
            max_text_len=max_text_len,
            max_summary_len=max_summary_len,
            x_tokenizer=x_tokenizer,
            y_tokenizer=y_tokenizer,
            name_additional_info=f"{additional_info}_optimizer{hyperparams['optimizer_class'].__name__}_lr{hyperparams['learning_rate']}_ed{hyperparams['embedding_dim']}_ld{hyperparams['latent_dim']}_do{hyperparams['decoder_dropout']}_drdo{hyperparams['decoder_recurrent_dropout']}_edo{hyperparams['encoder_dropout']}_erdo{hyperparams['encoder_recurrent_dropout']}_batch_size{hyperparams['batch_size']}_epochs{hyperparams['epochs']}",
            latent_dim=hyperparams["latent_dim"],
            embedding_dim=hyperparams["embedding_dim"],
            encoder_dropout=hyperparams["encoder_dropout"],
            encoder_recurrent_dropout=hyperparams["encoder_recurrent_dropout"],
            decoder_dropout=hyperparams["decoder_dropout"],
            decoder_recurrent_dropout=hyperparams["decoder_recurrent_dropout"],
        )

        # Plot the model architecture
        if TO_SAVE_MODEL_ARCHITECTURE:
            plot_model(
                model_instance.get_model(),
                to_file=f"{results_path}/media/architectures/{model_instance.name}_architecture.png",
                show_shapes=True,
            )

        print(f"Training {model_instance.name} with hyperparameters {hyperparams}")
        history = train_model(
            model_instance,
            hyperparams,
            x_training_padded,
            y_training_padded,
            x_validation_padded,
            y_validation_padded,
            results_path,
        )

        # Save training history
        history_path = os.path.join(
            results_path, f"histories/{model_instance.name}_history.txt"
        )
        with open(history_path, "a") as f:
            f.write(f"Hyperparameters: {hyperparams}\n")
            f.write(f"History: {history}\n")
            # Write last epoch loss, val_loss, accuracy, val_accuracy
            f.write(
                f"Last epoch loss: {history['loss'][-1]}, val_loss: {history['val_loss'][-1]}, accuracy: {history['accuracy'][-1]}, val_accuracy: {history['val_accuracy'][-1]}\n"
            )
            f.write("\n")

        if TO_GENERATE_SUMMARIES:
            # Generate and save summaries
            print(f"Generating summaries for {model_instance.name}")
            summaries_path = os.path.join(results_path, "csv")
            df_summaries = generate_summaries(
                model_instance,
                x_validation_padded,
                y_validation_padded,
                max_text_len,
                n_summaries=1000,
                save_path=summaries_path,
            )

2025-05-22 16:38:12.173987: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-22 16:38:12.181795: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747924692.190996  435352 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747924692.193893  435352 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-22 16:38:12.204323: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

Number of hyperparameter combinations: 4

Training: Seq2Seq3BiLSTM


[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
I0000 00:00:1747924703.286990  435352 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4232 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060, pci bus id: 0000:01:00.0, compute capability: 8.9


Training Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 256, 'embedding_dim': 512, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 89ms/step - accuracy: 0.5556 - loss: 3.1084 - val_accuracy: 0.6152 - val_loss: 2.3980 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 85ms/step - accuracy: 0.6165 - loss: 2.3494 - val_accuracy: 0.6274 - val_loss: 2.2279 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 86ms/step - accuracy: 0.6292 - loss: 2.1650 - val_accuracy: 0.6372 - val_loss: 2.1338 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 256, 'embedding_dim': 256, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 86ms/step - accuracy: 0.5523 - loss: 3.1228 - val_accuracy: 0.6105 - val_loss: 2.4514 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 82ms/step - accuracy: 0.6119 - loss: 2.4141 - val_accuracy: 0.6253 - val_loss: 2.2681 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 82ms/step - accuracy: 0.6272 - loss: 2.2138 - val_accuracy: 0.6337 - val_loss: 2.1736 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 128, 'embedding_dim': 512, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 72ms/step - accuracy: 0.5457 - loss: 3.2554 - val_accuracy: 0.6091 - val_loss: 2.4981 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 68ms/step - accuracy: 0.6090 - loss: 2.4685 - val_accuracy: 0.6207 - val_loss: 2.3145 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 68ms/step - accuracy: 0.6204 - loss: 2.2806 - val_accuracy: 0.6315 - val_loss: 2.1976 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 128, 'embedding_dim': 256, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 70ms/step - accuracy: 0.5410 - loss: 3.3020 - val_accuracy: 0.6009 - val_loss: 2.5398 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 66ms/step - accuracy: 0.6024 - loss: 2.5198 - val_accuracy: 0.6174 - val_loss: 2.3516 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 66ms/step - accuracy: 0.6183 - loss: 2.3232 - val_accuracy: 0.6292 - val_loss: 2.2373 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 256, 'embedding_dim': 512, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 44ms/step - accuracy: 0.5490 - loss: 3.1758 - val_accuracy: 0.6179 - val_loss: 2.3741 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 42ms/step - accuracy: 0.6228 - loss: 2.2881 - val_accuracy: 0.6409 - val_loss: 2.1042 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 42ms/step - accuracy: 0.6429 - loss: 2.0053 - val_accuracy: 0.6505 - val_loss: 1.9968 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [3

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 256, 'embedding_dim': 256, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 39ms/step - accuracy: 0.5409 - loss: 3.2399 - val_accuracy: 0.6081 - val_loss: 2.4961 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 38ms/step - accuracy: 0.6090 - loss: 2.4513 - val_accuracy: 0.6277 - val_loss: 2.2413 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 37ms/step - accuracy: 0.6303 - loss: 2.1734 - val_accuracy: 0.6404 - val_loss: 2.0990 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [3

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 128, 'embedding_dim': 512, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 33ms/step - accuracy: 0.5429 - loss: 3.2856 - val_accuracy: 0.6078 - val_loss: 2.4709 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 31ms/step - accuracy: 0.6129 - loss: 2.4027 - val_accuracy: 0.6320 - val_loss: 2.1930 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - accuracy: 0.6345 - loss: 2.1230 - val_accuracy: 0.6425 - val_loss: 2.0687 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [3

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 128, 'embedding_dim': 256, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 30ms/step - accuracy: 0.5347 - loss: 3.3877 - val_accuracy: 0.6022 - val_loss: 2.5296 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 28ms/step - accuracy: 0.6039 - loss: 2.4965 - val_accuracy: 0.6224 - val_loss: 2.2870 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 29ms/step - accuracy: 0.6249 - loss: 2.2375 - val_accuracy: 0.6344 - val_loss: 2.1526 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [32m

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 256, 'embedding_dim': 512, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 50ms/step - accuracy: 0.5447 - loss: 3.2688 - val_accuracy: 0.6090 - val_loss: 2.4985 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 48ms/step - accuracy: 0.6103 - loss: 2.4615 - val_accuracy: 0.6203 - val_loss: 2.3392 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 47ms/step - accuracy: 0.6203 - loss: 2.3010 - val_accuracy: 0.6308 - val_loss: 2.1930 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [32m

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 256, 'embedding_dim': 256, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 45ms/step - accuracy: 0.5381 - loss: 3.3298 - val_accuracy: 0.6036 - val_loss: 2.5516 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 44ms/step - accuracy: 0.6013 - loss: 2.5327 - val_accuracy: 0.6157 - val_loss: 2.3861 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 44ms/step - accuracy: 0.6162 - loss: 2.3495 - val_accuracy: 0.6275 - val_loss: 2.2377 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [32m

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 128, 'embedding_dim': 512, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 45ms/step - accuracy: 0.5289 - loss: 3.5360 - val_accuracy: 0.6027 - val_loss: 2.5749 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 43ms/step - accuracy: 0.6010 - loss: 2.5585 - val_accuracy: 0.6156 - val_loss: 2.4178 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 42ms/step - accuracy: 0.6139 - loss: 2.3886 - val_accuracy: 0.6231 - val_loss: 2.2860 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [32m

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqLSTM_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 128, 'embedding_dim': 256, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 43ms/step - accuracy: 0.5236 - loss: 3.6012 - val_accuracy: 0.5985 - val_loss: 2.6092 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 42ms/step - accuracy: 0.5953 - loss: 2.5995 - val_accuracy: 0.6101 - val_loss: 2.4645 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 42ms/step - accuracy: 0.6084 - loss: 2.4408 - val_accuracy: 0.6185 - val_loss: 2.3286 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [32m

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 256, 'embedding_dim': 512, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 57ms/step - accuracy: 0.5551 - loss: 3.1526 - val_accuracy: 0.6163 - val_loss: 2.4246 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 55ms/step - accuracy: 0.6190 - loss: 2.3334 - val_accuracy: 0.6327 - val_loss: 2.1877 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 53ms/step - accuracy: 0.6375 - loss: 2.0836 - val_accuracy: 0.6428 - val_loss: 2.0825 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [32m━

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqGRU_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 256, 'embedding_dim': 256, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 53ms/step - accuracy: 0.5479 - loss: 3.2514 - val_accuracy: 0.6099 - val_loss: 2.5443 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 50ms/step - accuracy: 0.6109 - loss: 2.4236 - val_accuracy: 0.6234 - val_loss: 2.3247 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 50ms/step - accuracy: 0.6281 - loss: 2.1940 - val_accuracy: 0.6319 - val_loss: 2.2052 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [32m━

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 128, 'embedding_dim': 512, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 55ms/step - accuracy: 0.5397 - loss: 3.3685 - val_accuracy: 0.6049 - val_loss: 2.6345 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 52ms/step - accuracy: 0.6071 - loss: 2.4578 - val_accuracy: 0.6175 - val_loss: 2.4317 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 52ms/step - accuracy: 0.6210 - loss: 2.2753 - val_accuracy: 0.6268 - val_loss: 2.3052 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [32m━

[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Training Seq2SeqGRU_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50 with hyperparameters {'latent_dim': 128, 'embedding_dim': 256, 'encoder_dropout': 0.2, 'encoder_recurrent_dropout': 0.2, 'decoder_dropout': 0.2, 'decoder_recurrent_dropout': 0.2, 'optimizer_class': <class 'keras.src.optimizers.adam.Adam'>, 'learning_rate': 0.001, 'epochs': 50, 'batch_size': 128}
Epoch 1/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 54ms/step - accuracy: 0.5329 - loss: 3.4557 - val_accuracy: 0.6040 - val_loss: 2.6678 - learning_rate: 0.0010
Epoch 2/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 51ms/step - accuracy: 0.6048 - loss: 2.5045 - val_accuracy: 0.6109 - val_loss: 2.5323 - learning_rate: 0.0010
Epoch 3/50
[1m323/323[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 51ms/step - accuracy: 0.6174 - loss: 2.3224 - val_accuracy: 0.6209 - val_loss: 2.4055 - learning_rate: 0.0010
Epoch 4/50
[1m323/323[0m [32m━

## Evaluation

### Compute Evaluation

In [1]:
TO_EVALUATE_SUMMARIES = True
TO_SAVE_PLOTS = True
from architectures.Seq2SeqGRU import Seq2SeqGRU
from architectures.Seq2SeqLSTM import Seq2SeqLSTM
from architectures.Seq2SeqLSTMGlove import Seq2SeqLSTMGlove
from architectures.Seq2SeqBiLSTM import Seq2SeqBiLSTM
from architectures.Seq2Seq3BiLSTM import Seq2Seq3BiLSTM
from architectures.Seq2SeqLSTMTransformer import Seq2SeqLSTMTransformer
from architectures.Seq2SeqBiLSTMImproved import Seq2SeqBiLSTMImproved
model_classes = [
    Seq2SeqGRU,
    Seq2SeqLSTM,
    # Seq2SeqLSTMGlove,
    Seq2SeqBiLSTM,
    Seq2Seq3BiLSTM,
    # Seq2SeqLSTMTransformer,
    # Seq2SeqBiLSTMImproved,
]

import os
from keras import backend as K
from keras.utils import plot_model
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from matplotlib import pyplot as plt
import pandas as pd

from utils import (
    evaluate_rouge,
    evaluate_wer,
    evaluate_cosine_similarity,
    evaluate_myevalutation,
    evaluate_bert_score,
    plot_rouge,
    plot_wer,
    plot_cosine_similarity,
    plot_myevaluation,
    plot_bert_score,
)
import glob


model_instances = {}

for model in model_classes:
    model_name = str(model.__name__)
    csv_dir = os.path.join("results", model_name, "csv")

    # Find all CSV files in the directory
    csv_files = glob.glob(os.path.join(csv_dir, "*.csv"))

    # Filter only summaries files
    summaries_files = [
        f for f in csv_files if "summaries" in os.path.basename(f).lower()
    ]

    # If file name contains "evaluated", remove it
    summaries_files = [
        f for f in summaries_files if "evaluated" not in os.path.basename(f).lower()
    ]

    # Extract file names
    file_names = [os.path.basename(f) for f in summaries_files]

    # Remove duplicates
    file_names = list(set(file_names))

    model_instances[model] = sorted(file_names)  # Order by name

# Print model instances
for model, instances in model_instances.items():
    print(f"Model: {model.__name__}")
    print(f"CSV files found ({len(instances)}): {instances}\n")


def save_metrics_results(df_summaries, model_name, results_path):
    metrics_file_path = f"{results_path}/csv/{model_name}_metrics_scores.csv"
    df_summaries.to_csv(metrics_file_path, index=False)
    print(f"Metrics results saved to {metrics_file_path}")


if TO_EVALUATE_SUMMARIES:
    # Iterate through all models and their instances
    for model, instances in model_instances.items():
        print("=" * 50)
        print(f"Evaluating summaries for {model.__name__}")
        for csv_file in instances:
            print(f"Evaluating file: {csv_file}")

            # Load original csv
            original_path = os.path.join("results", model.__name__, "csv", csv_file)
            df_summaries = pd.read_csv(original_path)

            # Evaluate summaries
            print(f"Evaluating rouge")
            df_summaries, mean_scores_rouge = evaluate_rouge(df_summaries)
            # print(f"Evaluating wer")
            # df_summaries, mean_score_wer = evaluate_wer(df_summaries)
            print(f"Evaluating cosine similarity")
            df_summaries, mean_score_cosine_similarity = evaluate_cosine_similarity(
                df_summaries
            )
            print(f"Evaluating BERT score")
            df_summaries, mean_score_bert_score = evaluate_bert_score(df_summaries)
            print(f"Evaluating my evaluation")
            df_summaries, mean_score_myevaluation = evaluate_myevalutation(df_summaries)


            print("Finished evaluation")

            # Create new file name
            base_name = os.path.splitext(csv_file)[0]
            evaluated_filename = f"{base_name}_evaluated.csv"
            evaluated_path = os.path.join(
                "results", model.__name__, "csv", evaluated_filename
            )

            # Save evaluated file
            df_summaries.to_csv(evaluated_path, index=False)
            print(f"Evaluated file: {evaluated_path}")

            results_path = f"results/{model.__name__}"

            # Plotting
            if TO_SAVE_PLOTS:
                graph_dir = os.path.join(results_path, "media/graphs", base_name)
                os.makedirs(graph_dir, exist_ok=True)

                plot_rouge(
                    df_summaries,
                    graph_dir,
                    base_name,
                    metric="rouge1",
                    title=f"ROUGE-1 - {base_name}",
                    color="blue",
                )

                plot_rouge(
                    df_summaries,
                    graph_dir,
                    base_name,
                    metric="rouge2",
                    title=f"ROUGE-2 - {base_name}",
                    color="blue",
                )

                plot_rouge(
                    df_summaries,
                    graph_dir,
                    base_name,
                    metric="rougeL",
                    title=f"ROUGE-L - {base_name}",
                    color="blue",
                )

                # plot_wer(
                #     df_summaries,
                #     graph_dir,
                #     base_name,
                #     title=f"WER - {base_name}",
                #     color="red",
                # )

                plot_cosine_similarity(
                    df_summaries,
                    graph_dir,
                    base_name,
                    title=f"Cosine Similarity - {base_name}",
                    color="green",
                )

                plot_myevaluation(
                    df_summaries,
                    graph_dir,
                    base_name,
                    title=f"My Evaluation - {base_name}",
                    color="purple",
                )

                plot_bert_score(
                    df_summaries,
                    graph_dir,
                    base_name,
                    title=f"BERT Score - {base_name}",
                    color="orange",
                )

            # Update history file
            history_path = os.path.join(results_path, f"histories/{base_name}_history")
            with open(history_path, "a") as f:
                f.write(f"\nEvaluation for {csv_file}:\n")
                f.write(f"Mean ROUGE scores: {mean_scores_rouge}\n")
                # f.write(f"Mean WER score: {mean_score_wer}\n")
                f.write(f"Mean Cosine Similarity: {mean_score_cosine_similarity}\n")
                f.write(f"Mean My Evaluation: {mean_score_myevaluation}\n")

2025-05-26 21:58:14.237675: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-26 21:58:14.246153: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748289494.255911    5705 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748289494.258821    5705 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-26 21:58:14.269830: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

Model: Seq2SeqGRU
CSV files found (4): ['Seq2SeqGRU_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv', 'Seq2SeqGRU_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv', 'Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv', 'Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv']

Model: Seq2SeqLSTM
CSV files found (4): ['Seq2SeqLSTM_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv', 'Seq2SeqLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv', 'Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv', 'Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv']

M

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/15 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.72 seconds, 1394.36 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqGRU/csv/Seq2SeqGRU_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2SeqGRU_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/17 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.73 seconds, 1360.83 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqGRU/csv/Seq2SeqGRU_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.71 seconds, 1402.38 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqGRU/csv/Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.70 seconds, 1430.47 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqGRU/csv/Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating summaries for Seq2SeqLSTM
Evaluating file: Seq2SeqLSTM_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.69 seconds, 1442.55 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqLSTM/csv/Seq2SeqLSTM_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2SeqLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/17 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.74 seconds, 1346.44 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqLSTM/csv/Seq2SeqLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.73 seconds, 1367.72 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqLSTM/csv/Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.74 seconds, 1348.00 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqLSTM/csv/Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating summaries for Seq2SeqBiLSTM
Evaluating file: Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/18 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.80 seconds, 1248.87 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqBiLSTM/csv/Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/17 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.74 seconds, 1348.79 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqBiLSTM/csv/Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/18 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.79 seconds, 1262.71 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqBiLSTM/csv/Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/18 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.78 seconds, 1274.23 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2SeqBiLSTM/csv/Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating summaries for Seq2Seq3BiLSTM
Evaluating file: Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/15 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.66 seconds, 1514.01 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2Seq3BiLSTM/csv/Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/15 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.68 seconds, 1471.51 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2Seq3BiLSTM/csv/Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.69 seconds, 1444.99 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2Seq3BiLSTM/csv/Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv
Evaluating file: Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries.csv
Evaluating rouge
Evaluating cosine similarity
Evaluating BERT score


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/16 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/16 [00:00<?, ?it/s]

done in 0.70 seconds, 1427.77 sentences/sec
Evaluating my evaluation


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Finished evaluation
Evaluated file: results/Seq2Seq3BiLSTM/csv/Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv


### Evaluations report

In [3]:
TO_GENERATE_REPORT_SUMMARIES = True
from architectures.Seq2SeqGRU import Seq2SeqGRU
from architectures.Seq2SeqLSTM import Seq2SeqLSTM
from architectures.Seq2SeqLSTMGlove import Seq2SeqLSTMGlove
from architectures.Seq2SeqBiLSTM import Seq2SeqBiLSTM
from architectures.Seq2Seq3BiLSTM import Seq2Seq3BiLSTM
from architectures.Seq2SeqLSTMTransformer import Seq2SeqLSTMTransformer
from architectures.Seq2SeqBiLSTMImproved import Seq2SeqBiLSTMImproved

model_classes = [
    Seq2SeqGRU,
    Seq2SeqLSTM,
    Seq2SeqLSTMGlove,
    Seq2SeqBiLSTM,
    Seq2Seq3BiLSTM,
    # Seq2SeqLSTMTransformer,
    # Seq2SeqBiLSTMImproved,
]
import os
from keras import backend as K
from keras.utils import plot_model
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from matplotlib import pyplot as plt
import pandas as pd

from utils import (
    evaluate_rouge,
    evaluate_wer,
    evaluate_cosine_similarity,
    evaluate_myevalutation,
    plot_rouge,
    plot_wer,
    plot_cosine_similarity,
    plot_myevaluation,
)
import ast
import numpy as np
import glob


def generate_metric_reports(all_metrics, results_root="results"):
    # Crea directory principale per i report
    metrics_dir = os.path.join(results_root, "evaluations_metrics")
    os.makedirs(metrics_dir, exist_ok=True)

    # Definizione delle metriche e direzioni di ordinamento
    metrics_config = {
        "mean_rouge1": {"name": "ROUGE-1", "ascending": False},
        "mean_rouge2": {"name": "ROUGE-2", "ascending": False},
        "mean_rougeL": {"name": "ROUGE-L", "ascending": False},
        # "mean_wer": {"name": "WER", "ascending": True},
        "mean_cosine": {"name": "Cosine Similarity", "ascending": False},
        "mean_myevaluation": {"name": "Custom Evaluation", "ascending": False},
        "mean_BERTScore": {"name": "BERT Score", "ascending": False},
    }

    for metric, config in metrics_config.items():
        # Ordina i risultati
        sorted_metrics = sorted(
            all_metrics, key=lambda x: x[metric], reverse=not config["ascending"]
        )

        # Crea contenuto del report
        report_content = f"""
{'='*80}
{config['name']} Metric Report - Sorted by {config['name']} ({'Descending' if not config['ascending'] else 'Ascending'})
{'='*80}
Generated: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}
Total Models/Instances: {len(sorted_metrics)}

{'='*80}
{"Rank":<5} | {"Model":<20} | {"Instance":<40} | {config['name']:<10} | Other Metrics
{'-'*80}
"""

        for rank, item in enumerate(sorted_metrics, 1):
            other_metrics = ", ".join(
                [
                    f"{k.split('_')[1].title()}: {v:.4f}"
                    for k, v in item.items()
                    if k != "model" and k != "instance" and k != metric
                ]
            )

            report_content += (
                f"{rank:<5} | {item['model']:<20} | {item['instance']:<40} | "
                f"{item[metric]:<10.4f} | {other_metrics}\n"
            )

        # Salva il file
        report_path = os.path.join(metrics_dir, f"evaluation_{metric}.txt")
        with open(report_path, "w") as f:
            f.write(report_content)

        print(f"Generated {config['name']} report: {report_path}")


def generate_metric_reports_table(
    all_metrics, output_path="results/evaluations_metrics/table_report.md"
):
    """
    Genera una tabella in formato Markdown che riassume le medie di ogni metrica per ogni istanza di modello.

    - Le righe sono ordinate alfabeticamente in base al nome del modello e dell'istanza.
    - Le colonne corrispondono alle seguenti metriche:
        mean_rouge1, mean_rouge2, mean_rougeL mean_cosine, mean_myevaluation, mean_BERTScore
    - In ogni cella viene visualizzata la media della metrica formattata a 4 decimali.
    - Per ogni metrica, la cella con il valore massimo viene resa in grassetto.
    - Il report viene salvato in output_path.
    """
    # Ordine delle metriche da visualizzare
    metric_keys = [
        "mean_cosine",
        "mean_myevaluation",
        "mean_BERTScore",
        # "mean_wer",
        "mean_rouge1",
        "mean_rouge2",
        "mean_rougeL",
    ]

    # Ordina le righe in base a "model" e "instance" in ordine alfabetico
    sorted_rows = sorted(
        all_metrics, key=lambda x: (x["model"].lower(), x["instance"].lower())
    )

    # Calcola il valore massimo per ogni metrica
    max_per_metric = {key: max(row[key] for row in sorted_rows) for key in metric_keys}

    # Intestazione della tabella Markdown
    header = "| Model - Instance | " + " | ".join(metric_keys) + " |"
    separator = "|---|" + "|".join(["---"] * len(metric_keys)) + "|"

    table_lines = [header, separator]

    # Aggiungi ogni riga della tabella
    for row in sorted_rows:
        row_label = f"{row['model']} - {row['instance']}"
        cell_values = []
        for key in metric_keys:
            value = row[key]
            # Formatta il valore a 4 decimali
            formatted_value = f"{value:.4f}"
            # Se il valore è il massimo per quella metrica, rendilo in grassetto
            if value == max_per_metric[key]:
                formatted_value = f"**{formatted_value}**"
            cell_values.append(formatted_value)
        table_line = f"| {row_label} | " + " | ".join(cell_values) + " |"
        table_lines.append(table_line)

    # Unione delle righe della tabella
    table_md = "\n".join(table_lines)

    # Salva la tabella su file
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, "w") as f:
        f.write(table_md)

    print(f"Generated Markdown table report: {output_path}")
    print(table_md)


model_instances = {}

for model in model_classes:
    model_name = str(model.__name__)
    csv_dir = os.path.join("results", model_name, "csv")

    # Find all CSV files in the directory
    csv_files = glob.glob(os.path.join(csv_dir, "*.csv"))

    # Filter only summaries files
    summaries_files = [
        f for f in csv_files if "summaries_evaluated" in os.path.basename(f).lower()
    ]

    # Extract file names
    file_names = [os.path.basename(f) for f in summaries_files]

    # Remove duplicates
    file_names = list(set(file_names))

    model_instances[model] = sorted(file_names)  # Order by name

# Print model instances
for model, instances in model_instances.items():
    print(f"Model: {model.__name__}")
    print(f"CSV files found ({len(instances)}): {instances}\n")


if TO_GENERATE_REPORT_SUMMARIES:
    all_metrics = []

    for model, instances in model_instances.items():
        print("=" * 50)
        print(f"Processing reports for {model.__name__}")

        for csv_file in instances:
            # Costruisci il percorso completo
            csv_path = os.path.join("results", model.__name__, "csv", csv_file)

            # Carica il CSV
            df = pd.read_csv(csv_path)

            # Funzione per estrarre i valori F1 da rouge_scores
            import re

            def parse_rouge(row):
                try:
                    text = row["rouge_scores"]
                    # Regex modificata per catturare sia numeri che 'L'
                    pattern = r"rouge([\dL]+)': Score\(precision=([\d\.]+), recall=([\d\.]+), fmeasure=([\d\.]+)\)"
                    matches = re.findall(pattern, text)
                    scores = {}
                    for m in matches:
                        key = f"rouge{m[0]}"  # può essere 'rouge1', 'rouge2' o 'rougeL'
                        scores[key] = float(m[3])

                    for k in ["rouge1", "rouge2", "rougeL"]:
                        scores.setdefault(k, 0.0)
                    return scores
                except Exception as e:
                    return {"rouge1": 0, "rouge2": 0, "rougeL": 0}

            # Applicazione della funzione per estrarre i valori
            rouge_values = df.apply(parse_rouge, axis=1)

            # Calcola le medie
            mean_rouge1 = np.mean([v["rouge1"] for v in rouge_values])
            mean_rouge2 = np.mean([v["rouge2"] for v in rouge_values])
            mean_rougeL = np.mean([v["rougeL"] for v in rouge_values])
            # mean_wer = df["wer_scores"].mean()
            mean_cosine = df["cosine_similarity"].mean()
            mean_myevaluation = df["myevaluation_scores"].mean()
            mean_BERTScore = df["bert_score"].mean()

            # Crea il dizionario delle metriche
            metrics_dict = {
                "mean_rouge1": mean_rouge1,
                "mean_rouge2": mean_rouge2,
                "mean_rougeL": mean_rougeL,
                # "mean_wer": mean_wer,
                "mean_cosine": mean_cosine,
                "mean_myevaluation": mean_myevaluation,
                "mean_BERTScore": mean_BERTScore,
                "model": model.__name__,
                "instance": os.path.splitext(csv_file)[0].replace("_evaluated", ""),
            }

            all_metrics.append(metrics_dict)
            print(f"Processed: {csv_file}")

    # Genera i report aggregati per metrica
    generate_metric_reports(all_metrics)
    generate_metric_reports_table(all_metrics)
    print("All reports generated.")

Model: Seq2SeqGRU
CSV files found (4): ['Seq2SeqGRU_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv', 'Seq2SeqGRU_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv', 'Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv', 'Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv']

Model: Seq2SeqLSTM
CSV files found (4): ['Seq2SeqLSTM_optimizerAdam_lr0.001_ed256_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv', 'Seq2SeqLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv', 'Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld128_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_evaluated.csv', 'Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld256_

In [None]:
import os
import glob

# Cartella in cui cercare i file
base_dir = os.path.join("report", "media")

# Lista dei prefissi da processare
prefixes = [
    "Seq2Seq3BiLSTM",
    "Seq2SeqBiLSTM",
    "Seq2SeqLSTM",
    "Seq2SeqGRU",
]

# Mappatura dai suffissi ai nomi target
suffix_map = {
    "architecture.png": "best_architecture.png",
    "lossplot.png": "best_lossplot.png",
    "bert_scores.png": "best_bert_scores.png",
    "cosine_similarity_scores.png": "best_cosine_similarity_scores.png",
    "myevaluation_scores.png": "best_myevaluation_scores.png",
    "rouge1_scores.png": "best_rouge1_scores.png",
    "rouge2_scores.png": "best_rouge2_scores.png",
    "rougeL_scores.png": "best_rougeL_scores.png",
    # "wer_scores.png": "best_wer_scores.png",
}

for prefix in prefixes:
    # Costruisco il pattern nella cartella specificata
    pattern = os.path.join(base_dir, f"{prefix}_optimizerAdam_*")
    for filepath in glob.glob(pattern):
        filename = os.path.basename(filepath)
        for suffix, new_suffix in suffix_map.items():
            if filename.endswith(suffix):
                new_name = f"{prefix}_{new_suffix}"
                new_path = os.path.join(base_dir, new_name)
                print(f"Rinomino: {filename} → {new_name}")
                os.rename(filepath, new_path)
                break


Rinomino: Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_wer_scores.png → Seq2Seq3BiLSTM_best_wer_scores.png
Rinomino: Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_rougeL_scores.png → Seq2Seq3BiLSTM_best_rougeL_scores.png
Rinomino: Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_rouge2_scores.png → Seq2Seq3BiLSTM_best_rouge2_scores.png
Rinomino: Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_architecture.png → Seq2Seq3BiLSTM_best_architecture.png
Rinomino: Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_bert_scores.png → Seq2Seq3BiLSTM_best_bert_scores.png
Rinomino: Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed256_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_batch_size128_epochs50_summaries_myeval