In [16]:
import numpy as np 
import seaborn as sns 
from matplotlib import pyplot as plt 
import pandas as pd 
import numpy as np 
from collections import defaultdict

from metrics import accuracy_report
import re 
import warnings
warnings.filterwarnings("ignore")

def get_variance(csv_groups, level, filtered = False):
    # consolidate data from different csvs 
    all_dfs = {g:[] for g in csv_groups.keys()}
    for group, csvs in csv_groups.items():
        for csv in csvs:
            df = pd.read_csv(csv)
            # print(csv, len(df))
            all_dfs[group].append(df)

    reports = {g: [accuracy_report(df) for df in dfs] for g, dfs in all_dfs.items()}
    # data = [report[level] for report in reports]
    data = {g: [report[level] for report in group_reports] for g, group_reports in reports.items()}
    
    to_ret = {g: defaultdict(int)  for g in data.keys()}
    for group, group_data in data.items():
        for i, data_dict in enumerate(group_data):
            df = all_dfs[group][i]
            model_name = df['model'][0]
            model_name = re.sub("_", "-", model_name)
            model_name = re.sub("-qa", "", model_name)
            df_to_analyze = pd.DataFrame(columns=["model", "acc", "type", "group"], dtype=object)
            for k, v in data_dict.items():
                if filtered: 
                    acc = float(v[2])
                else:
                    acc = float(v[0])
                type_name = k
                df_to_analyze = df_to_analyze.append({"model": model_name, "acc": acc, "type": type_name, "group": group}, ignore_index=True)
        
            to_ret[group][model_name] = df_to_analyze.var()['acc']

    return to_ret

In [18]:
oc_csvs=["../results_to_plot/gpt_object_control_swap_names.csv", 
     "../results_to_plot/gpt_neo_1.3B_object_control.csv", 
     "../results_to_plot/gpt_neo_2.7b_object_control.csv", 
     "../results_to_plot/gpt_j_object_control.csv", 
     "../results_to_plot/jurassic_object_control_swap_names.csv", 
     "../results_to_plot/jurassic_jumbo_object_control_swap_names.csv", 
     "../results_to_plot/t5_object_control.csv",
     "../results_to_plot/t0_object_control.csv"]

sc_csvs=["../results_to_plot/gpt_subject_control_swap_names.csv", 
     "../results_to_plot/gpt_neo_1.3B_subject_control.csv",
     "../results_to_plot/gpt_neo_2.7b_subject_control.csv", 
     "../results_to_plot/gpt_j_subject_control.csv",
     "../results_to_plot/jurassic_subject_control_swap_names.csv", 
     "../results_to_plot/jurassic_jumbo_subject_control_swap_names.csv", 
     "../results_to_plot/t5_subject_control.csv",
     "../results_to_plot/t0_subject_control.csv"]

poc_csvs=["../results_to_plot/gpt_passive_object_control_swap_names.csv", 
     "../results_to_plot/gpt_neo_1.3B_passive_object_control.csv",
     "../results_to_plot/gpt_neo_2.7b_passive_object_control.csv",
     "../results_to_plot/gpt_j_passive_object_control.csv",
     "../results_to_plot/jurassic_passive_object_control_swap_names.csv", 
     "../results_to_plot/jurassic_jumbo_passive_object_control_swap_names.csv", 
     "../results_to_plot/t5_passive_object_control.csv",
     "../results_to_plot/t0_passive_object_control.csv"]



csv_data = {"Object control": oc_csvs, "Passive object control": poc_csvs, "Subject control": sc_csvs}


for level in ['acc_by_swap', 'acc_by_name', "acc_by_action", "acc_by_verb"]:
    variance_data = get_variance(csv_data, level=level, filtered=False)
    variance_data_mean = {k: np.mean(list(v.values())) for k,v in variance_data.items()}
    print(f"{level}: {variance_data_mean}")



acc_by_swap: {'Object control': 0.06685474999999999, 'Passive object control': 0.0536631388888889, 'Subject control': 0.06750555555555554}
acc_by_name: {'Object control': 0.002499047619047619, 'Passive object control': 0.0067037500000000005, 'Subject control': 0.010133928571428572}
acc_by_action: {'Object control': 0.0011700694444444445, 'Passive object control': 0.010889965277777776, 'Subject control': 0.005052083333333334}
acc_by_verb: {'Object control': 0.002184429012345679, 'Passive object control': 0.0030200154320987648, 'Subject control': nan}


In [20]:
for level in ['acc_by_swap', 'acc_by_name', "acc_by_action", "acc_by_verb"]:
    variance_data = get_variance(csv_data, level=level, filtered=True)
    variance_data_mean = {k: np.mean(list(v.values())) for k,v in variance_data.items()}
    print(f"{level}: {variance_data_mean}")

acc_by_swap: {'Object control': 0.07073724462778007, 'Passive object control': 0.061759194170686885, 'Subject control': 0.07132172003035728}
acc_by_name: {'Object control': 0.002513778679560443, 'Passive object control': 0.011377208968828286, 'Subject control': 0.010127446780419654}
acc_by_action: {'Object control': 0.0011487795916267233, 'Passive object control': 0.0035076140595954957, 'Subject control': 0.004831895679918914}
acc_by_verb: {'Object control': 0.002193370506556329, 'Passive object control': 0.002259124793038023, 'Subject control': nan}


In [24]:

# make latex 
levels = ['acc_by_swap', 'acc_by_name', "acc_by_action", "acc_by_verb"]
level_lookup = {"acc_by_swap": "swap names", 
                "acc_by_name": "names", 
                "acc_by_action": "action",
                "acc_by_verb": "verb"}
# df_for_table = pd.DataFrame(columns = ['Level', "OC", "SC", "P-OC"], dtype=object)

header = """\\begin{tabular}{llll}
            \\toprule 
            Level & OC & SC & P-OC \\\\
            \\midrule
        """
print(header)
for level in levels: 
    variance_data_no_filter = get_variance(csv_data, level=level, filtered=False)
    variance_data_no_filter_mean = {k: np.mean(list(v.values())) for k,v in variance_data_no_filter.items()}
    variance_data_filter = get_variance(csv_data, level=level, filtered=True)
    variance_data_filter_mean = {k: np.mean(list(v.values())) for k,v in variance_data_filter.items()}

    row = f"{level_lookup[level]} & {variance_data_no_filter_mean['Object control']:.3f}/{variance_data_filter_mean['Object control']:.3f} "\
        f" & {variance_data_no_filter_mean['Subject control']:.3f}/{variance_data_filter_mean['Subject control']:.3f}" \
        f" & {variance_data_no_filter_mean['Passive object control']:.3f}/{variance_data_filter_mean['Passive object control']:.3f} \\\\" 
    print(row)


print("""\\bottomrule
        \\end{tabular}""")
        


\begin{tabular}{llll}
            \toprule 
            Level & OC & SC & P-OC \\
            \midrule
        
swap names & 0.067/0.071  & 0.068/0.071 & 0.054/0.062 \\
names & 0.002/0.003  & 0.010/0.010 & 0.007/0.011 \\
action & 0.001/0.001  & 0.005/0.005 & 0.011/0.004 \\
verb & 0.002/0.002  & nan/nan & 0.003/0.002 \\
\bottomrule
        \end{tabular}
