In [1]:
from vis import recompute_agent_patient, do_mcnemar 
import pathlib 
import os 
import pandas as pd 
import numpy as np 

from agent_patient_experiment import AgentPatientExperiment
np.random.seed(12)
path_to_file = pathlib.Path("").absolute()
parent = path_to_file.parent


def get_df(condition, is_two = False, thresh=30):
    cos_df = pd.DataFrame(columns=["model", "num_prompts", "accuracy", "num_valid"], dtype=object)
    if is_two:
        two_affix = "_2"
    else:
        two_affix = "_1"
    for prefix in range(0,4):
        
        change_of_state_csvs = [parent.joinpath(f'agent_patient_results{two_affix}/gpt_{condition}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/gpt_neo_1.3b_{condition}{two_affix}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/gpt_neo_2.7b_{condition}{two_affix}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/gpt_j_{condition}{two_affix}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/jurassic_{condition}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/jurassic_jumbo_{condition}_prefix_{prefix}.csv'),
                                # parent.joinpath(f'agent_patient_results{two_affix}/t5_{condition}{two_affix}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/t0_{condition}{two_affix}_prefix_{prefix}.csv') ]
        names = ["gpt", "gpt-neo-1.3b", "gpt-neo-2.7b", "gpt-j", "jurassic-large", "jurassic-jumbo", "t0"]
        prompt_files = [parent.joinpath(f"data/agent_patient/{condition}{two_affix}_prefix_{prefix}.json") for i in range(len(change_of_state_csvs))]
        cos_acc = recompute_agent_patient(change_of_state_csvs, prompt_files, names)

        for model in names:
            try:
                __, __, acc, num = cos_acc[model]
            except KeyError:
                acc, num = -1.0, 0
            if model in cos_df['model'].values:
                curr_acc = cos_df[cos_df['model'] == model]['accuracy'].values[0]
                curr_num = cos_df[cos_df['model'] == model]['num_valid'].values[0]

                curr_weighted = curr_acc * curr_num
                if num > thresh:
                    # if acc > curr_acc:
                    weighted = acc * num
                    if weighted > curr_weighted:
                        cos_df.loc[cos_df['model'] == model, 'num_prompts'] = prefix
                        cos_df.loc[cos_df['model'] == model, 'accuracy'] = acc
                        cos_df.loc[cos_df['model'] == model, 'num_valid'] = num 
            else:
                if num > thresh: 
                    cos_df = cos_df.append({"model": model, "num_prompts": prefix, "accuracy": acc, "num_valid": num}, ignore_index=True)
                else:
                    cos_df = cos_df.append({"model": model, "num_prompts": prefix, "accuracy": -1.0, "num_valid": 0}, ignore_index=True)
        # print(prefix, cos_acc)
    # print(cos_df)
    return cos_df

In [4]:
# if False: 
# print(f"change of state")
# cos_df1 = get_df("change_of_state")
# print(cos_df1)
# cos_df2 = get_df("change_of_state", is_two=True)
# print(cos_df2)


# models_levels_prefixes = [("gpt", 3, 2), 
#                           ("gpt_neo_1.3b", 2, 2), 
#                           ("gpt_neo_2.7b", 0, 2), 
#                           ("gpt_j", 0, 2), 
#                           ("jurassic", 1, 2), 
#                           ("jurassic_jumbo", 1, 1), 
#                           ("t0", 1, 2),
#                           ("random", 0,0)]

models_levels_prefixes = []
for model in ["gpt", "gpt_neo_1.3b", "gpt_neo_2.7b", "gpt_j", "jurassic", "jurassic_jumbo", "t0"]: 
    for level in range(4): 
        for aff in [1,2]: 
            models_levels_prefixes.append((model, level, aff))

models_levels_prefixes.append(("random", 0, 0))

results = []
done = []
for model1, level1, two_affix1 in models_levels_prefixes: 
    for model2, level2, two_affix2 in models_levels_prefixes: 
        if model1 == model2 or ((model1, model2, level1, level2, two_affix1, two_affix2) in done) or ((model2,model1, level2, level1, two_affix2, two_affix1) in done): 
            continue
        pval, stat, table = do_mcnemar(model1, model2, level1, level2, f"_{two_affix1}", f"_{two_affix2}", condition="change_of_state")
        model1_acc = len(table[model1]['correct'])/(len(table[model1]['correct']) + len(table[model1]['incorrect']))
        model2_acc = len(table[model2]['correct'])/(len(table[model2]['correct']) + len(table[model2]['incorrect']))
        results.append((model1, level1, two_affix1, model2, level2, two_affix2, model1_acc, model2_acc, pval, stat))  
        done.append((model1, model2, level1, level2, two_affix1, two_affix2))

for x in results:
    if x[-2] < 0.05 and x[-1] > 5:
        print(x)
# [print(x) for x in results]
# for level in range(3):
#     for model in ["gpt", "gpt-neo-1.3b", "gpt-neo-2.7b", "gpt-j", "jurassic-large", "jurassic-jumbo", "t0"]


# do_mcnemar("gpt_neo_1.3b", "gpt_neo_1.3b", "0", "0", "_1", "_1", "change_of_state")


('gpt', 1, 1, 'gpt_neo_2.7b', 0, 2, 0.6101694915254238, 0.45454545454545453, 0.011351591436778108, 14.0)
('gpt', 1, 1, 't0', 0, 1, 0.6101694915254238, 0.4661016949152542, 0.03961701489849968, 22.0)
('gpt', 1, 1, 't0', 3, 1, 0.6101694915254238, 0.4576271186440678, 0.0327657590988232, 23.0)
('gpt', 1, 2, 'gpt_neo_2.7b', 0, 2, 0.6101694915254238, 0.45454545454545453, 0.011351591436778108, 14.0)
('gpt', 1, 2, 't0', 0, 1, 0.6101694915254238, 0.4661016949152542, 0.03961701489849968, 22.0)
('gpt', 1, 2, 't0', 3, 1, 0.6101694915254238, 0.4576271186440678, 0.0327657590988232, 23.0)
('gpt', 2, 2, 'random', 0, 0, 0.5726495726495726, 0.3983050847457627, 0.011928139715763179, 19.0)
('gpt', 3, 1, 'gpt_neo_1.3b', 2, 2, 0.5862068965517241, 0.49382716049382713, 0.03355243967962451, 12.0)
('gpt', 3, 1, 'gpt_neo_1.3b', 3, 2, 0.5862068965517241, 0.4307692307692308, 0.0241195447742939, 8.0)
('gpt', 3, 1, 'jurassic_jumbo', 1, 1, 0.5862068965517241, 0.4835164835164835, 0.010673840530216694, 8.0)
('gpt', 3, 1

In [6]:
# if False:
# print(f"volition")
# vol_df1 = get_df("volition")
# print(vol_df1)
# vol_df2 = get_df("volition", is_two=True)
# print(vol_df2)

# models_levels_prefixes = [("gpt", 3, 2), 
#                         #   ("gpt_neo_1.3b", 2, 2), 
#                           ("gpt_neo_2.7b", 1, 1), 
#                           ("gpt_j", 0, 1), 
#                           ("jurassic", 2, 1), 
#                           ("jurassic_jumbo", 2, 1), 
#                           ("t0", 0, 1),
#                           ("random", 0, 0)]


models_levels_prefixes = []
for model in ["gpt", "gpt_neo_1.3b", "gpt_neo_2.7b", "gpt_j", "jurassic", "jurassic_jumbo", "t0"]: 
    for level in range(4): 
        for aff in [1,2]: 
            models_levels_prefixes.append((model, level, aff))

models_levels_prefixes.append(("random", 0, 0))

results = []
done = []
for model1, level1, two_affix1 in models_levels_prefixes: 
    for model2, level2, two_affix2 in models_levels_prefixes: 
        if model1 == model2 or ((model1, model2, level1, level2, two_affix1, two_affix2) in done) or ((model2,model1, level2, level1, two_affix2, two_affix1) in done): 
            continue
        pval, stat, table = do_mcnemar(model1, model2, level1, level2, f"_{two_affix1}", f"_{two_affix2}", condition="volition")
        model1_acc = len(table[model1]['correct'])/(len(table[model1]['correct']) + len(table[model1]['incorrect']))

        model2_acc = len(table[model2]['correct'])/(len(table[model2]['correct']) + len(table[model2]['incorrect']))
        results.append((model1, level1, two_affix1, model2, level2, two_affix2, model1_acc, model2_acc, pval, stat))  
        done.append((model1, model2, level1, level2, two_affix1, two_affix2))


for x in results:
    if x[-2] < 0.05 and x[-1] > 5:
        print(x)




ZeroDivisionError: division by zero

In [None]:
pval, stat, table = do_mcnemar("gpt", model2, level1, level2, f"_{two_affix1}", f"_{two_affix2}", condition="volition")


In [43]:
from vis import recompute_agent_patient 
import pathlib 
import os 
import pandas as pd 


path_to_file = pathlib.Path("").absolute()
parent = path_to_file.parent

change_of_state_csvs = [parent.joinpath(f'agent_patient_results_1/gpt_neo_1.3b_volition_1_prefix_1.csv')]
cos_acc = recompute_agent_patient(change_of_state_csvs, [parent.joinpath(f"data/agent_patient/volition_1_prefix_1.json")], ['gpt-neo-1.3b'])
print(cos_acc)

{'gpt-neo-1.3b': (0.07, 100, 0.6363636363636364, 11)}
