### Load Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

### Map each Evaluation File to their Model Name

In [None]:
file_model_mapping = {
    '../predictions_csv/outputmodel_base_model.csv': "Base Model",
    '../predictions_csv/outputmodel_Meta-Llama-3-8B_1_epochs.csv': "1 Epoch",
    '../predictions_csv/outputmodel_Meta-Llama-3-8B_augmented_dataset_1_epochs.csv': "1 Epoch - Augmented",
    '../predictions_csv/outputmodel_Meta-Llama-3-8B_3_epochs.csv': "3 Epochs",
    '../predictions_csv/outputmodel_Meta-Llama-3-8B_augmented_dataset_3_epochs.csv': "3 Epochs - Augmented",
    '../predictions_csv/outputmodel_Meta-Llama-3-8B_5_epochs.csv': "5 Epochs",
    '../predictions_csv/outputmodel_Meta-Llama-3-8B_augmented_dataset_5_epochs.csv': "5 Epochs - Augmented"
}

### Define calculate Metrics function

In [None]:
def calculate_metrics(df, true_col='Memorable', pred_col='predictions'):
    metrics = {}
    metrics['accuracy'] = accuracy_score(df[true_col], df[pred_col])
    metrics['precision'] = precision_score(df[true_col], df[pred_col], zero_division=0)
    metrics['recall'] = recall_score(df[true_col], df[pred_col], zero_division=0)
    metrics['f1_score'] = f1_score(df[true_col], df[pred_col], zero_division=0)
    return metrics

### Load each evaluation df and calculate their corresponding metrics

In [None]:
results = {}
for file_path, model_name in file_model_mapping.items():
    df = pd.read_csv(file_path)
    results[model_name] = calculate_metrics(df, true_col='Memorable', pred_col='predictions')
    
results_df = pd.DataFrame(results).T

### Define function to make and save plots

In [None]:
def save_plot_metric(df, metric, color):
    plt.figure(figsize=(12, 6))
    df[metric] = df[metric] * 100
    df[metric].plot(kind='bar', color=color)
    plt.title(f'{metric.capitalize()} - comparison acros Models')
    plt.ylabel(f'{metric.capitalize()} (%)')
    plt.xticks(rotation=45, ha='right', fontsize=10)
    plt.ylim(0, 100)  

    for i, (_, value) in enumerate(df[metric].items()):
        plt.text(i, value + 1, f"{value:.2f}%", ha='center', va='bottom')  

    plt.tight_layout()
    #plt.show()
    plt.savefig(f'../figures/{metric}.png')

# Chose nice colors that are easy on the eye
save_plot_metric(results_df, 'accuracy', 'darkred')
save_plot_metric(results_df, 'precision', 'sienna')
save_plot_metric(results_df, 'recall', 'forestgreen')
save_plot_metric(results_df, 'f1_score', 'crimson')