In [53]:
import pandas as pd
import numpy as np
import os
import glob
import plotly.express as px
from datasets import load_from_disk

print(os.listdir("Experiments"))
experiment = "Experiments/2_llm"
metrics_files = glob.glob(os.path.join(experiment, "metrics_*.csv"))
metrics_files

['1_anonymize_parties', '2_llm']


['Experiments/2_llm/metrics_2a_mixtral8x7b.csv',
 'Experiments/2_llm/metrics_2a_chatgpt35turbo.csv']

In [54]:
# Here we find answers where the RAG chain refused to answer
for file_name in metrics_files:
    df = pd.read_csv(file_name, header=[0, 1], index_col=[0, 1])
    experiment_run = "_".join(os.path.basename(
        file_name).split("_")[1:]).split(".")[0]
    dataset = load_from_disk(os.path.join(
        experiment, "dataset_" + experiment_run))

    counter = 0
    exclude_list = []
    parties = list(dataset.keys())
    num_questions = len(dataset[parties[0]])
    for party in parties:
        for q_nr in range(24):
            answer = dataset[party][q_nr]["answer"]
            question = dataset[party][q_nr]["question"]

            if "keine passende Antwort in den verfügbaren Daten gefunden" in answer:
                counter += 1
                exclude_list.append([question, party])

    print(experiment_run)
    print(
        f"Fraction of None answers={len(exclude_list)/(len(parties)*num_questions):.3f}"
    )

2a_mixtral8x7b
Fraction of None answers=0.000
2a_chatgpt35turbo
Fraction of None answers=0.000


In [55]:
metric_id = 0
df_plot = pd.DataFrame()
for file_name in metrics_files:
    df = pd.read_csv(file_name, header=[0, 1], index_col=[0, 1])
    metrics = list(set(df.columns.get_level_values(0)))
    experiment_run = "_".join(os.path.basename(file_name).split("_")[1:]).split(".")[0]
    df_plot[experiment_run] = df[metrics[metric_id]].mean(axis=0)


df = pd.DataFrame(df_plot).reset_index().rename(columns={"index": "party"})
df_melted = df.melt(id_vars=["party"], var_name="category", value_name="value")
print(df_melted.groupby("category")["value"].mean())
px.bar(
    df_melted,
    x="party",
    y="value",
    color="category",
    barmode="group",
    labels={"value": metrics[metric_id], "party": "Party"},
    title=f"{experiment}",
)

category
2a_chatgpt35turbo    0.793841
2a_mixtral8x7b       0.675956
Name: value, dtype: float64


In [56]:
metric_id = 1
df_plot = pd.DataFrame()
for file_name in metrics_files:
    df = pd.read_csv(file_name, header=[0, 1], index_col=[0, 1])
    metrics = list(set(df.columns.get_level_values(0)))
    experiment_run = "_".join(os.path.basename(file_name).split("_")[1:]).split(".")[0]
    df_plot[experiment_run] = df[metrics[metric_id]].mean(axis=0)


df = pd.DataFrame(df_plot).reset_index().rename(columns={"index": "party"})
df_melted = df.melt(id_vars=["party"], var_name="category", value_name="value")
print(df_melted.groupby("category")["value"].mean())
px.bar(
    df_melted,
    x="party",
    y="value",
    color="category",
    barmode="group",
    labels={"value": metrics[metric_id], "party": "Party"},
    title=f"{experiment}",
)

category
2a_chatgpt35turbo    0.809482
2a_mixtral8x7b       0.622741
Name: value, dtype: float64
