# ETHICS-sc101-PVA results analysis

In [25]:
import pandas as pd
import os
import sys
import re
import plotly.express as px

In [26]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
sys.path.append(parent_dir)

### Auxiliary methods

In [27]:
def extract_model_name(filename) -> tuple[str, bool]:
    match = re.match(r"(en_)?([a-zA-Z0-9.-]+)", filename)
    if match:
        prefix, model_name = match.groups()
        return model_name, prefix is not None
    return None

In [28]:
models_name_map={"Llama-3.2-3B-Instruct": "Llama 3.2 3b",
                 "Qwen2.5-7B-Instruct": "Qwen 2.5 7b",
                 "aya-101": "Aya 101 13b",
                 "aya-expanse-8b": "Aya Expanse 8b",
                 "gemma-2-9b-it": "Gemma 2 9b",
                 "gemma-3-4b-it": "Gemma 3 4b",
                 "gpt-4o": "GPT 4o",}

In [29]:
def plot_model_scores(
    df: pd.DataFrame,
    score_col: str = "score",
    model_col: str = "model_name",
    color_col: str = "Language",
    title: str | None = None,
) -> None:
    """Plot a Plotly bar chart grouped by model names with different colors for English/Ukrainian models."""
    colors = px.colors.qualitative.T10
    unique_categories = df[color_col].dropna().unique()
    color_map = {category: colors[i % len(colors)] for i, category in enumerate(unique_categories)}
    df["display_model_name"]=df[model_col].apply(lambda x: models_name_map.get(x, x))

    fig = px.bar(
        df,
        x="display_model_name",
        y=score_col,
        color=color_col,
        barmode="group",
        title=title if title else score_col.replace("_", " ").title(),
        labels={score_col: "Score", model_col: "Model"},
        color_discrete_map=color_map,
        hover_data={score_col: True, color_col: True},
    )

    fig.update_layout(
        xaxis=dict(title="Model Name", tickfont=dict(size=12)),
        yaxis=dict(title="Score", gridcolor="rgba(0, 0, 0, 0.1)", range=[0, 1.1]),
        legend=dict(title=color_col.title()),
        plot_bgcolor="white",
        paper_bgcolor="white",
        font=dict(color="black"),
        title=dict(font=dict(size=18), x=0.5),
        margin=dict(l=40, r=40, t=50, b=80),
        width=800,
        height=500,
    )

    fig.show()

In [30]:
def process_csv_files(folder_paths: list, output_csv: str):
    """Processes all CSV files in a folder, computes metrics, saves results, and plots bar charts."""
    results = []

    for folder_path in folder_paths:
        for file in os.listdir(folder_path):
            if file.endswith(".csv"):
                file_path = os.path.join(folder_path, file)
                df = pd.read_csv(file_path)

                model_name, is_english = extract_model_name(file.split(".")[0])

                results.append(
                    {
                        "model_name": model_name,
                        "score": df['llm_eval'].mean(),
                        "Language": "English" if is_english else "Ukrainian",
                    }
                )

    results_df = pd.DataFrame(results)
    results_df.sort_values(by=["model_name", "Language"], inplace=True)
    results_df.to_csv(output_csv, index=False)

    return results_df

### Result extraction and plotting

In [31]:
folder_paths = ["../results/mixed_bad"]
output_csv = "../results/ethics_sc101_pva_results.csv"
results_df = process_csv_files(folder_paths, output_csv)

## Translation comparison

In [32]:
plot_model_scores(results_df, color_col="Language", score_col='score', title="Score by Model and Language")