In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

figs_path = Path("figs-benchmark-simple")

In [None]:
df = pd.read_csv(figs_path / "experiment_results.csv")

In [None]:
params = df["model"].str.split("-").str[-1].str[:-1]
llama = df["model"].str.startswith("Llama")
df.loc[llama, "Parameters"] = params[llama]
df.loc[~llama, "Parameters"] = "unknown" # filler for openai models

df["CoT"] = np.where(df["cot"].isin(["Y", "I"]), "yes", "no")
df["Model"] = df["model"].copy()
df.loc[llama, "Model"] = df["model"].str.replace("-[0-9]+B", "", regex=True)

df.loc[llama, "Family"] = "Llama"
df.loc[~llama, "Family"] = "OpenAI"
df.loc[df["model"] == "Random", "Family"] = "Random"

df["Family"] = df["Family"] + np.where(df["cot"].isin(["Y", "I"]), "-CoT", "")

df["Fewshot"] = df["fewshot"].copy()

In [None]:
fewshot_order = ["0-shot", "", "1-shot (+)", "1-shot (-)", "2-shot (+/-)", "2-shot (-/+)"]
temp = df.loc[160:]
for fewshot in fewshot_order:
    new_row = temp.copy()
    new_row["Fewshot"] = fewshot
    df = pd.concat([df, new_row])

In [None]:
df

In [None]:
cmap = sns.color_palette(palette="RdYlBu", n_colors=6)
cmap

In [None]:
cmap2 = sns.color_palette(palette="RdYlBu", n_colors=6)
cmap2

In [None]:
cmap3 = sns.color_palette(palette="tab10")
cmap3

In [None]:
colors = {
    "Random": cmap[0],
    "Llama-2": cmap[1],
    "Llama-3": cmap[2],
    "Llama-3.1": cmap[3],
    "Llama-3.2": cmap[4],
    "Llama-3.3": cmap[5],
    "GPT-3.5-turbo": cmap3[4],
    "GPT-4": cmap2[0],
    "GPT-4-turbo": cmap2[1],
    "GPT-4o-mini": cmap2[2],
    "GPT-4o": cmap2[3],
    "o1-mini": cmap2[4],
    "o1": cmap2[5],
}

hue_order = [
    "Random",
    "Llama-2",
    "Llama-3",
    "Llama-3.1",
    "Llama-3.2",
    "Llama-3.3",
    "GPT-3.5-turbo",
    "GPT-4",
    "GPT-4-turbo",
    "GPT-4o-mini",
    "GPT-4o",
    "o1-mini",
    "o1",
]

In [None]:
grid = sns.relplot(
    data=df,
    x="f1",
    y="fpr",
    hue="Model",
    hue_order=hue_order,
    palette=colors,
    col="Fewshot",
    col_wrap=2,
    col_order=fewshot_order,
    size="Parameters",
    # style="CoT",
    style="Family",
    style_order=["Llama", "Llama-CoT", "OpenAI", "OpenAI-CoT", "Random"],
    markers=["o", "s", "X", "P", "*"],
    sizes=[20, 40, 70, 80, 130, 250, 100],
    size_order=["1", "3", "7", "8", "13", "70", "unknown"],
    edgecolor="black"
)
grid.axes[0].set_xlim([-0.1, 1.1])
grid.axes[0].set_ylim([-0.1, 1.1])
grid.axes[1].set_visible(False)

In [None]:
grid.figure.savefig(figs_path / "results.png", dpi=300)

In [None]:
best = df[df["model"].isin(["GPT-4o"])]

In [None]:
new_row = temp.copy()
new_row["Fewshot"] = "Random"
best = pd.concat([best, new_row])

In [None]:
ax = sns.scatterplot(best, x="f1", y="fpr", style="Family", hue="Fewshot", s=100, markers=["X", "P", "*"])
# ax.set_xlim([-0.1, 1.1])
# ax.set_ylim([-0.1, 1.1])
ax.set_title("GPT-4o")
ax.figure.savefig(figs_path / "gpt-4o.png", dpi=300)