In [None]:
import pandas as pd
import sys
import os
import re
import plotly.express as px

In [2]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(parent_dir)

In [3]:
from evaluation.utils.metrics import hard_accuracy, soft_accuracy

### Auxiliary methods

In [4]:
def extract_model_name(filename):
    # Extract the base model name before the first underscore or timestamp
    match = re.match(r"(en_)?([a-zA-Z0-9.-]+)", filename)
    if match:
        prefix, model_name = match.groups()
        if prefix:  
            return f"{model_name} (English)"
        return model_name
    return None

In [24]:
def plot_model_scores(csv_path:str, score_col:str):
    """Plot a Plotly bar chart grouped by model names with different colors for English/Ukrainian models."""
    df = pd.read_csv(csv_path)

    df["Language"] = df["model_name"].apply(lambda x: "English" if "(English)" in x else "Ukrainian")

    df["Base Model"] = df["model_name"].str.replace(r" \(English\)", "", regex=True)

    color_map = {"English": "#82c4ff", "Ukrainian": "#ffaf71"}

    fig = px.bar(
        df, 
        x="Base Model", 
        y=score_col, 
        color="Language", 
        barmode="group",
        title=score_col.replace('_', ' ').title(),
        labels={score_col: "Score", "Base Model": "Model"},
        color_discrete_map=color_map,
        hover_data={ score_col: True, "Language": True}
    )

    fig.update_layout(
        xaxis=dict(title="Model Name", tickfont=dict(size=12)),
        yaxis=dict(title="Score", gridcolor="rgba(0, 0, 0, 0.1)", range=[0, 1.1]),
        legend=dict(title="Language"),
        plot_bgcolor="white",  
        paper_bgcolor="white", 
        font=dict(color="black"),
        title=dict(font=dict(size=18), x=0.5), 
        margin=dict(l=40, r=40, t=50, b=80),
        width=800, 
        height=500  
    )

    fig.show()

In [13]:
def process_csv_files(folder_path: str, output_csv: str):
    """Processes all CSV files in a folder, computes metrics, saves results, and plots bar charts."""
    results = []

    for file in os.listdir(folder_path):
        if file.endswith(".csv"):
            file_path = os.path.join(folder_path, file)
            df = pd.read_csv(file_path)

            hard_acc = hard_accuracy(df)
            soft_acc = soft_accuracy(df)

            results.append({"model_name": extract_model_name(file), "hard_accuracy": hard_acc, "soft_accuracy": soft_acc})

    results_df = pd.DataFrame(results)
    results_df.sort_values(by="model_name", inplace=True)
    results_df.to_csv(output_csv, index=False)

### Results extraction and plotting

In [8]:
folder_path = "/Users/akravche/Projects/UCU/alignment/evaluation/results/sc_101_care_harm"
output_csv = "/Users/akravche/Projects/UCU/alignment/evaluation/results/sc_101_care_harm_results.csv"
process_csv_files(folder_path, output_csv)

In [25]:
plot_model_scores(output_csv, "hard_accuracy")

In [26]:
plot_model_scores(output_csv, "soft_accuracy")