## Script assurdo

Il modello è Seq2Seq con attention, utilizzando LSTM come encoder e decoder. Inoltre, il modello utilizza un layer di attenzione (customizzato con AttentionLayer)

In [None]:
import os
from keras import backend as K
from keras.utils import plot_model
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from matplotlib import pyplot as plt
from architectures.Seq2SeqGRU import Seq2SeqGRU
from architectures.Seq2SeqLSTM import Seq2SeqLSTM
from architectures.Seq2SeqLSTMGlove import Seq2SeqLSTMGlove
from architectures.Seq2SeqBiLSTM import Seq2SeqBiLSTM
from architectures.Seq2Seq3BiLSTM import Seq2Seq3BiLSTM
from architectures.Seq2SeqLSTMTransformer import Seq2SeqLSTMTransformer
import pandas as pd

from utils import (
    evaluate_rouge,
    evaluate_wer,
    evaluate_cosine_similarity,
    evaluate_myevalutation,
    plot_rouge,
    plot_wer,
    plot_cosine_similarity,
    generate_summaries,
    create_hyperparameter_grid,
    prepare_data,
    plot_myevaluation,
)


def save_metrics_results(df_summaries, model_name, results_path):
    metrics_file_path = f"{results_path}/csv/{model_name}_metrics_scores.csv"
    df_summaries.to_csv(metrics_file_path, index=False)
    print(f"Metrics results saved to {metrics_file_path}")


def plot_training_history(history, model_name, save_path):
    plt.plot(history["loss"], label="train")
    plt.plot(history["val_loss"], label="test")
    plt.legend()
    plt.title(f"Model Loss Over Epochs - {model_name}")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.grid(True)

    # Save the plot to a file
    os.makedirs(save_path, exist_ok=True)
    plt.savefig(
        os.path.join(save_path, f"{model_name}_lossplot.png"),
        dpi=300,
        bbox_inches="tight",
    )

    # Close the plot
    plt.close()


def save_model(model, model_name, save_path, save_full_model=True):
    os.makedirs(save_path, exist_ok=True)
    # Save the model weights
    # model.save_weights(os.path.join(save_path, f"{model_name}.weights.h5"))
    if save_full_model:
        # Save the full model
        model.save(os.path.join(save_path, f"{model_name}_full_model.h5"))


def train_model(
    model_instance,
    hyperparams,
    x_training_padded,
    y_training_padded,
    x_validation_padded,
    y_validation_padded,
    save_path,
):
    K.clear_session()

    # Extract hyperparameters
    latent_dim = hyperparams["latent_dim"]
    embedding_dim = hyperparams["embedding_dim"]
    encoder_dropout = hyperparams["encoder_dropout"]
    encoder_recurrent_dropout = hyperparams["encoder_recurrent_dropout"]
    decoder_dropout = hyperparams["decoder_dropout"]
    decoder_recurrent_dropout = hyperparams["decoder_recurrent_dropout"]
    optimizer_class = hyperparams["optimizer_class"]
    epochs = hyperparams["epochs"]
    batch_size = hyperparams["batch_size"]
    learning_rate = hyperparams["learning_rate"]

    # Create optimizer
    optimizer = optimizer_class(learning_rate=learning_rate)

    # Set optimizer and callbacks
    model_instance.change_optimizer(optimizer)

    # Early stopping
    early_stopping = EarlyStopping(
        monitor="val_loss",
        mode="min",
        verbose=1,
        patience=3,
        restore_best_weights=True,
    )

    # Define learning rate scheduler
    def lr_schedule(epoch, lr):
        decay_rate = 0.95
        decay_step = 1
        if epoch % decay_step == 0 and epoch != 0:
            return lr * decay_rate
        return lr

    learning_rate_scheduler = LearningRateScheduler(lr_schedule, verbose=1)

    # Reduce LR on Plateau
    reduce_lr_on_plateau = ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.5,
        patience=2,
        verbose=1,
        min_lr=1e-6,
    )

    # Add callbacks to the model instance
    model_instance.add_callbacks(
        [early_stopping, learning_rate_scheduler, reduce_lr_on_plateau]
    )

    model = model_instance.get_model()

    # Train model
    history = model.fit(
        [x_training_padded, y_training_padded[:, :-1]],
        y_training_padded.reshape(
            y_training_padded.shape[0], y_training_padded.shape[1], 1
        )[:, 1:],
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(
            [x_validation_padded, y_validation_padded[:, :-1]],
            y_validation_padded.reshape(
                y_validation_padded.shape[0], y_validation_padded.shape[1], 1
            )[:, 1:],
        ),
        callbacks=model_instance.get_callbacks(),
    )

    # Save results
    TO_SAVE_MODEL = False
    model_name = model_instance.name
    model_save_path = os.path.join(save_path, "weights")
    if TO_SAVE_MODEL:
        save_model(model, model_name, model_save_path)

    # Plot training history
    plot_training_history(
        history.history, model_name, os.path.join(save_path, "media/graphs")
    )

    return history.history


# Define hyperparameter grid
hyperparameter_grid = create_hyperparameter_grid()

# Define models
model_classes = [
    #Seq2SeqGRU,
    #Seq2SeqLSTM,
    Seq2SeqLSTMGlove,
    Seq2SeqBiLSTM,
    Seq2Seq3BiLSTM,
    # Seq2SeqLSTMTransformer,
]

# Training loop
results_path = f"results/"
os.makedirs(results_path, exist_ok=True)

for model_class in model_classes:
    print("\n" + "=" * 50)
    print(f"Training: {model_class.__name__}")

    results_path = f"results/{model_class.__name__}"
    os.makedirs(results_path, exist_ok=True)

    # Crea the subdirectories
    os.makedirs(f"{results_path}/weights", exist_ok=True)
    os.makedirs(f"{results_path}/media/graphs", exist_ok=True)
    os.makedirs(f"{results_path}/media/architectures", exist_ok=True)
    os.makedirs(f"{results_path}/csv", exist_ok=True)
    os.makedirs(f"{results_path}/histories", exist_ok=True)

    for hyperparams in hyperparameter_grid:
        # Get prepared data
        (
            x_voc,
            y_voc,
            x_tokenizer,
            y_tokenizer,
            x_training_padded,
            y_training_padded,
            x_validation_padded,
            y_validation_padded,
            max_text_len,
            max_summary_len,
        ) = prepare_data()

        # Create the model instance
        model_instance = model_class(
            x_voc=x_voc,
            y_voc=y_voc,
            max_text_len=max_text_len,
            max_summary_len=max_summary_len,
            x_tokenizer=x_tokenizer,
            y_tokenizer=y_tokenizer,
            name_additional_info=f"_optimizer{hyperparams['optimizer_class'].__name__}_lr{hyperparams['learning_rate']}_ed{hyperparams['embedding_dim']}_ld{hyperparams['latent_dim']}_do{hyperparams['decoder_dropout']}_drdo{hyperparams['decoder_recurrent_dropout']}_edo{hyperparams['encoder_dropout']}_erdo{hyperparams['encoder_recurrent_dropout']}",
            latent_dim=hyperparams["latent_dim"],
            embedding_dim=hyperparams["embedding_dim"],
            encoder_dropout=hyperparams["encoder_dropout"],
            encoder_recurrent_dropout=hyperparams["encoder_recurrent_dropout"],
            decoder_dropout=hyperparams["decoder_dropout"],
            decoder_recurrent_dropout=hyperparams["decoder_recurrent_dropout"],
        )

        # Plot the model architecture
        TO_SAVE_MODEL_ARCHITECTURE = False
        if TO_SAVE_MODEL_ARCHITECTURE:
            plot_model(
                model_instance.get_model(),
                to_file=f"{results_path}/media/architectures/{model_instance.name}_architecture.png",
                show_shapes=True,
            )

        print(f"Training {model_instance.name} with hyperparameters {hyperparams}")
        history = train_model(
            model_instance,
            hyperparams,
            x_training_padded,
            y_training_padded,
            x_validation_padded,
            y_validation_padded,
            results_path,
        )

        # Save training history
        history_path = os.path.join(
            results_path, f"histories/{model_instance.name}_history.txt"
        )
        with open(history_path, "a") as f:
            f.write(f"Hyperparameters: {hyperparams}\n")
            f.write(f"History: {history}\n\n")
            # Write last epoch loss, val_loss, accuracy, val_accuracy
            f.write(
                f"Last epoch loss: {history['loss'][-1]}, val_loss: {history['val_loss'][-1]}\n"
            )

        TO_GENERATE_SUMMARIES = True
        if TO_GENERATE_SUMMARIES:
            # Generate and save summaries
            print(f"Generating summaries for {model_instance.name}")
            summaries_path = os.path.join(results_path, "csv")
            df_summaries = generate_summaries(
                model_instance,
                x_training_padded,
                y_training_padded,
                max_text_len,
                n_summaries=1000,
                save_path=summaries_path,
            )

            TO_EVALUATE_SUMMARIES = False
            if TO_EVALUATE_SUMMARIES:
                # Evaluate summaries
                df_summaries, mean_scores_rouge = evaluate_rouge(df_summaries)
                df_summaries, mean_score_wer = evaluate_wer(df_summaries)
                df_summaries, mean_score_cosine_similarity = evaluate_cosine_similarity(
                    df_summaries
                )
                df_summaries, mean_score_myevaluation = evaluate_myevalutation(df_summaries)

                # Save evaluation results
                TO_SAVE_METRICS_RESULTS = True
                if TO_SAVE_METRICS_RESULTS:
                    save_metrics_results(df_summaries, model_instance.name, results_path)

                # Print mean scores in history file
                with open(history_path, "a") as f:
                    f.write(f"Mean ROUGE scores: {mean_scores_rouge}\n")
                    f.write(f"Mean WER score: {mean_score_wer}\n")
                    f.write(f"Mean Cosine Similarity score: {mean_score_cosine_similarity}\n\n")
                    f.write(f"Mean My Evaluation score: {mean_score_myevaluation}\n\n")

                # Plot evaluation results
                TO_SAVE_PLOTS = True
                if TO_SAVE_PLOTS:
                    plot_rouge(
                        df_summaries,
                        f"{results_path}/media/graphs",
                        model_instance.name,
                        metric="rouge1",
                        title=f"ROUGE-1 Score Distribution - {model_instance.name}",
                        color="blue",
                    )

                    plot_rouge(
                        df_summaries,
                        f"{results_path}/media/graphs",
                        model_instance.name,
                        metric="rouge2",
                        title=f"ROUGE-2 Score Distribution - {model_instance.name}",
                        color="blue",
                    )
                    plot_rouge(
                        df_summaries,
                        f"{results_path}/media/graphs",
                        model_instance.name,
                        metric="rougeL",
                        title=f"ROUGE-L Score Distribution - {model_instance.name}",
                        color="blue",
                    )
                    plot_wer(
                        df_summaries,
                        f"{results_path}/media/graphs",
                        model_instance.name,
                        title=f"WER Score Distribution - {model_instance.name}",
                        color="red",
                    )
                    plot_cosine_similarity(
                        df_summaries,
                        f"{results_path}/media/graphs",
                        model_instance.name,
                        title=f"Cosine Similarity Distribution - {model_instance.name}",
                        color="green",
                    )
                    plot_myevaluation(
                        df_summaries,
                        f"{results_path}/media/graphs",
                        model_instance.name,
                        title=f"My Evaluation Distribution - {model_instance.name}",
                        color="purple",
                    )

Number of hyperparameter combinations: 1

Training: Seq2SeqLSTMGlove


[nltk_data] Downloading package stopwords to /home/enrico/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/enrico/Desktop/text-summarizer/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_680360/2381037303.py", line 206, in <module>
    ) = prepare_data()
        ^^^^^^^^^^^^^^
  File "/home/enrico/Desktop/text-summarizer/mega-training/utils.py", line 601, in prepare_data
  File "/home/enrico/Desktop/text-summarizer/mega-training/utils.py", line 580, in clean_text
    for word in cleaned_text.split()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/re/__init__.py", line 186, in sub
    return _compile(pattern, flags).sub(repl, string, count)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/enrico/Desktop/text-summarizer/.venv/lib/python3.12/site-packages/IPython

## Evaluation

In [None]:
import os
from keras import backend as K
from keras.utils import plot_model
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from matplotlib import pyplot as plt
from architectures.Seq2SeqGRU import Seq2SeqGRU
from architectures.Seq2SeqLSTM import Seq2SeqLSTM
from architectures.Seq2SeqLSTMGlove import Seq2SeqLSTMGlove
from architectures.Seq2SeqBiLSTM import Seq2SeqBiLSTM
from architectures.Seq2Seq3BiLSTM import Seq2Seq3BiLSTM
from architectures.Seq2SeqLSTMTransformer import Seq2SeqLSTMTransformer
import pandas as pd

from utils import (
    evaluate_rouge,
    evaluate_wer,
    evaluate_cosine_similarity,
    evaluate_myevalutation,
    plot_rouge,
    plot_wer,
    plot_cosine_similarity,
    plot_myevaluation,
)
import glob

model_classes = [
    Seq2SeqGRU,
    Seq2SeqLSTM,
    Seq2SeqLSTMGlove,
    Seq2SeqBiLSTM,
    Seq2Seq3BiLSTM,
    # Seq2SeqLSTMTransformer,
]

model_instances = {}

for model in model_classes:
    model_name = str(model.__name__)
    csv_dir = os.path.join("results", model_name, "csv")

    # Find all CSV files in the directory
    csv_files = glob.glob(os.path.join(csv_dir, "*.csv"))

    # Filter only summaries files
    summaries_files = [
        f for f in csv_files if "summaries" in os.path.basename(f).lower()
    ]

    # Extract file names
    file_names = [os.path.basename(f) for f in summaries_files]

    # Remove duplicates
    file_names = list(set(file_names))

    model_instances[model] = sorted(file_names)  # Order by name

# Print model instances
for model, instances in model_instances.items():
    print(f"Model: {model.__name__}")
    print(f"CSV files found ({len(instances)}): {instances}\n")


def save_metrics_results(df_summaries, model_name, results_path):
    metrics_file_path = f"{results_path}/csv/{model_name}_metrics_scores.csv"
    df_summaries.to_csv(metrics_file_path, index=False)
    print(f"Metrics results saved to {metrics_file_path}")


TO_EVALUATE_SUMMARIES = True
if TO_EVALUATE_SUMMARIES:
    # Iterate through all models and their instances
    for model, instances in model_instances.items():
        print("=" * 50)
        print(f"Evaluating summaries for {model.__name__}")
        for csv_file in instances:
            print(f"Evaluating file: {csv_file}")

            # Load original csv
            original_path = os.path.join("results", model.__name__, "csv", csv_file)
            df_summaries = pd.read_csv(original_path)

            # Evaluate summaries
            print(f"Evaluating rouge")
            df_summaries, mean_scores_rouge = evaluate_rouge(df_summaries)
            print(f"Evaluating wer")
            df_summaries, mean_score_wer = evaluate_wer(df_summaries)
            print(f"Evaluating cosine similarity")
            df_summaries, mean_score_cosine_similarity = evaluate_cosine_similarity(
                df_summaries
            )
            print(f"Evaluating my evaluation")
            df_summaries, mean_score_myevaluation = evaluate_myevalutation(df_summaries)

            print("Finished evaluation")

            # Create new file name
            base_name = os.path.splitext(csv_file)[0]
            evaluated_filename = f"{base_name}_evaluated.csv"
            evaluated_path = os.path.join(
                "results", model.__name__, "csv", evaluated_filename
            )

            # Save evaluated file
            df_summaries.to_csv(evaluated_path, index=False)
            print(f"Evaluated file: {evaluated_path}")


            results_path = f"results/{model.__name__}"
            
            # Plotting
            TO_SAVE_PLOTS = True
            if TO_SAVE_PLOTS:
                graph_dir = os.path.join(results_path, "media/graphs", base_name)
                os.makedirs(graph_dir, exist_ok=True)

                plot_rouge(
                    df_summaries,
                    graph_dir,
                    base_name,
                    metric="rouge1",
                    title=f"ROUGE-1 - {base_name}",
                    color="blue",
                )

                plot_rouge(
                    df_summaries,
                    graph_dir,
                    base_name,
                    metric="rouge2",
                    title=f"ROUGE-2 - {base_name}",
                    color="blue",
                )

                plot_rouge(
                    df_summaries,
                    graph_dir,
                    base_name,
                    metric="rougeL",
                    title=f"ROUGE-L - {base_name}",
                    color="blue",
                )

                plot_wer(
                    df_summaries,
                    graph_dir,
                    base_name,
                    title=f"WER - {base_name}",
                    color="red",
                )

                plot_cosine_similarity(
                    df_summaries,
                    graph_dir,
                    base_name,
                    title=f"Cosine Similarity - {base_name}",
                    color="green",
                )

                plot_myevaluation(
                    df_summaries,
                    graph_dir,
                    base_name,
                    title=f"My Evaluation - {base_name}",
                    color="purple",
                )

            # Update history file
            history_path = os.path.join(
                results_path, f"histories/{base_name}_history"
            )
            with open(history_path, "a") as f:
                f.write(f"\nEvaluation for {csv_file}:\n")
                f.write(f"Mean ROUGE scores: {mean_scores_rouge}\n")
                f.write(f"Mean WER score: {mean_score_wer}\n")
                f.write(f"Mean Cosine Similarity: {mean_score_cosine_similarity}\n")
                f.write(f"Mean My Evaluation: {mean_score_myevaluation}\n")

2025-02-27 12:11:11.794920: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-27 12:11:11.803213: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740654671.812641  682225 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740654671.815355  682225 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-27 12:11:11.826360: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

Model: Seq2SeqGRU
CSV files found (2): ['Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries.csv', 'Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries_evaluated.csv']

Model: Seq2SeqLSTM
CSV files found (1): ['Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries.csv']

Model: Seq2SeqLSTMGlove
CSV files found (1): ['Seq2SeqLSTMGlove_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries.csv']

Model: Seq2SeqBiLSTM
CSV files found (1): ['Seq2SeqBiLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries.csv']

Model: Seq2Seq3BiLSTM
CSV files found (1): ['Seq2Seq3BiLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries.csv']

Evaluating summaries for Seq2SeqGRU
Evaluating file: Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries.csv
Evaluating rouge
Evaluating wer
Evaluating cosine similarity


  from .autonotebook import tqdm as notebook_tqdm


Evaluating my evaluation
Finished evaluation
Evaluated file: results/Seq2SeqGRU/csv/Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries_evaluated.csv
Evaluating file: Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries_evaluated.csv
Evaluating rouge
Evaluating wer
Evaluating cosine similarity
Evaluating my evaluation
Finished evaluation
Evaluated file: results/Seq2SeqGRU/csv/Seq2SeqGRU_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries_evaluated_evaluated.csv
Evaluating summaries for Seq2SeqLSTM
Evaluating file: Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries.csv
Evaluating rouge
Evaluating wer
Evaluating cosine similarity
Evaluating my evaluation
Finished evaluation
Evaluated file: results/Seq2SeqLSTM/csv/Seq2SeqLSTM_optimizerAdam_lr0.001_ed512_ld256_do0.2_drdo0.2_edo0.2_erdo0.2_summaries_evaluated.csv
Evaluating summaries for Seq2SeqLSTMGlove
Evaluating file: Seq