In [71]:
import os

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

In [72]:
template_name = "amazon"
# template_name = "NS-prompts"
# template_name = "regard"

In [73]:
def find_csv_filenames(path_to_dir, template_name, suffix=".tsv"):
    all_files = []
    for root, _, files in os.walk(path_to_dir):
        for file in files:
            if file.endswith(suffix) and template_name in file:
                all_files.append(os.path.join(root, file))
    return all_files

predictions_dir = "predictions/"
prediction_tsv_paths = find_csv_filenames(predictions_dir, template_name)

output_table = pd.DataFrame()

for prediction_tsv_path in tqdm(prediction_tsv_paths, ncols=80):
    prediction_tsv_filename = os.path.basename(prediction_tsv_path)
    dataframe = pd.read_csv(prediction_tsv_path, delimiter="\t")
    del dataframe["text"]

    if output_table is None:
        output_table = dataframe
    else:
        assert isinstance(output_table, pd.DataFrame)
        output_table = pd.concat([output_table, dataframe])

output_table = output_table[["y_true", "y_pred", "model", "run_id"]]


  0%|                                                    | 0/34 [00:00<?, ?it/s]

In [74]:
def modify_model_name(model_name: str) -> str:
    model_name = model_name.replace("125m", "125M")
    model_name = model_name.replace("350m", "350M")
    model_name = model_name.replace("1.3b", "1.3B")
    model_name = model_name.replace("7b", "7B")
    model_name = model_name.replace("8b", "8B")
    model_name = model_name.replace("13b", "13B")
    model_name = model_name.replace("base", "125M")
    model_name = model_name.replace("large", "355M")
    model_name = model_name.replace("/data/llama-farnaz/", "")
    model_name = model_name.replace(" fine-tuned", "")
    model_name = model_name.replace("facebook/", "")
    model_name = model_name.replace("-lora-lr1e4", "")
    model_name = model_name.replace("opt", "OPT")
    model_name = model_name.replace("roberta", "RoBERTa")
    model_name = model_name.replace("llama", "Llama")
    model_name = model_name.replace("mistral", "Mistral")

    return model_name

output_table["model"] = output_table["model"].apply(modify_model_name)


In [75]:
def calculate_accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

grouped = output_table.groupby(['model', 'run_id']).apply(lambda x: calculate_accuracy(x['y_true'], x['y_pred']))
results = grouped.groupby('model').agg(['mean', 'std'])
results = results.rename(columns={'mean': 'Mean Accuracy', 'std': 'STD of Accuracy'})