In [81]:
from vis import recompute_agent_patient 
import pathlib 
import os 
import pandas as pd 
import numpy as np 
from statsmodels.stats.contingency_tables import mcnemar
from agent_patient_experiment import AgentPatientExperiment
np.random.seed(12)
path_to_file = pathlib.Path("").absolute()
parent = path_to_file.parent



def do_mcnemar(model1, model2, prefix1, prefix2, two_affix1, two_affix2, condition): 
    ## Test to see if model1 is actually better than model2 
    parent_path1 = parent.joinpath(f"agent_patient_results_to_plot")
    parent_path2 = parent.joinpath(f"agent_patient_results_to_plot")
    if model1 == "gpt" or model1.startswith("jurassic"):
        two_affix1 = ""
    model1_path = parent_path1.joinpath(f"{model1}_{condition}{two_affix1}_prefix_{prefix1}.csv")

    is_random = False
    if model2 == "random": 
        is_random = True
    else:
        if model2 == "gpt" or model2.startswith("jurassic"):
            two_affix2 = ""
        model2_path = parent_path2.joinpath(f"{model2}_{condition}{two_affix2}_prefix_{prefix2}.csv")

    exp1  = AgentPatientExperiment(model1, "", None, None, 1, None)
    exp1.recover(model1_path)
    if not is_random:
        exp2 = AgentPatientExperiment(model2, "", None, None, 1, None) 
        exp2.recover(model2_path)
    else:
        exp2 = AgentPatientExperiment("random", "", None, None, 1, None) 
        # yes_no1 = ['Yes' for i in range(50)] + ["No" for i in range(50)]
        # np.random.seed(12)
        # np.random.shuffle(yes_no1)
        # yes_no2 = ['Yes' for i in range(50)] + ["No" for i in range(50)]
        # np.random.seed(31)
        # np.random.shuffle(yes_no2)
        yes_no = ["Yes", "No"]
        exp2.results = [{"pred": yes_no[np.random.choice([0, 1])], "true": yes_no[np.random.choice([0,1])]} for i in range(100)]


    table = {model1: {"correct": [], "incorrect": []},
            model2: {"correct": [], "incorrect": []}}
        
    for i, (m1, m2) in enumerate(zip(exp1.results, exp2.results)): 
        # drop other
        if m1['pred'] == m1['true'] and m1['pred'] != 'other':
            table[model1]['correct'].append(i)
        elif m1['pred'] != 'other': 
            table[model1]['incorrect'].append(i)
        else:
            pass 
        if m2['pred'] == m2['true'] and m2['pred'] != 'other':
            table[model2]['correct'].append(i)
        elif m2['pred'] != 'other':
            table[model2]['incorrect'].append(i)
        else:
            pass

    # ----------- table -------------------
    #               model 1
    #           |-------------|-----------|------------|
    #           |             |  correct  |  incorrect |
    #           |-------------|-----------|------------|
    # model 2   |  correct    |           |            |
    #           |incorrect    |           |            |
    #           |-------------|-----------|------------|

    table_arr = np.zeros((2,2))
    corr1_corr2 = set(table[model1]['correct']) & set(table[model2]['correct'])
    incorr1_corr2 = set(table[model1]['incorrect']) & set(table[model2]['correct'])
    corr1_incorr2 = set(table[model1]['correct']) & set(table[model2]['incorrect'])
    incorr1_incorr2 = set(table[model1]['incorrect']) & set(table[model2]['incorrect'])

    table_arr[0,0] = len(corr1_corr2)
    table_arr[1,0] = len(incorr1_corr2)
    table_arr[0,1] = len(corr1_incorr2)
    table_arr[1,1] = len(incorr1_incorr2)

    mac = mcnemar(table_arr)
    return mac.pvalue, mac.statistic, table
    # return table_arr

def get_df(condition, is_two = False, thresh=30):
    cos_df = pd.DataFrame(columns=["model", "num_prompts", "accuracy", "num_valid"], dtype=object)
    if is_two:
        two_affix = "_2"
    else:
        two_affix = "_1"
    for prefix in range(0,4):
        
        change_of_state_csvs = [parent.joinpath(f'agent_patient_results{two_affix}/gpt_{condition}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/gpt_neo_1.3b_{condition}{two_affix}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/gpt_neo_2.7b_{condition}{two_affix}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/gpt_j_{condition}{two_affix}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/jurassic_{condition}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/jurassic_jumbo_{condition}_prefix_{prefix}.csv'),
                                # parent.joinpath(f'agent_patient_results{two_affix}/t5_{condition}{two_affix}_prefix_{prefix}.csv'),
                                parent.joinpath(f'agent_patient_results{two_affix}/t0_{condition}{two_affix}_prefix_{prefix}.csv') ]
        names = ["gpt", "gpt-neo-1.3b", "gpt-neo-2.7b", "gpt-j", "jurassic-large", "jurassic-jumbo", "t0"]
        prompt_files = [parent.joinpath(f"data/agent_patient/{condition}{two_affix}_prefix_{prefix}.json") for i in range(len(change_of_state_csvs))]
        cos_acc = recompute_agent_patient(change_of_state_csvs, prompt_files, names)

        for model in names:
            try:
                __, __, acc, num = cos_acc[model]
            except KeyError:
                acc, num = -1.0, 0
            if model in cos_df['model'].values:
                curr_acc = cos_df[cos_df['model'] == model]['accuracy'].values[0]
                curr_num = cos_df[cos_df['model'] == model]['num_valid'].values[0]

                curr_weighted = curr_acc * curr_num
                if num > thresh:
                    # if acc > curr_acc:
                    weighted = acc * num
                    if weighted > curr_weighted:
                        cos_df.loc[cos_df['model'] == model, 'num_prompts'] = prefix
                        cos_df.loc[cos_df['model'] == model, 'accuracy'] = acc
                        cos_df.loc[cos_df['model'] == model, 'num_valid'] = num 
            else:
                if num > thresh: 
                    cos_df = cos_df.append({"model": model, "num_prompts": prefix, "accuracy": acc, "num_valid": num}, ignore_index=True)
                else:
                    cos_df = cos_df.append({"model": model, "num_prompts": prefix, "accuracy": -1.0, "num_valid": 0}, ignore_index=True)
        # print(prefix, cos_acc)
    # print(cos_df)
    return cos_df

In [82]:
if False: 
    print(f"change of state")
    cos_df1 = get_df("change_of_state")
    print(cos_df1)
    cos_df2 = get_df("change_of_state", is_two=True)
    print(cos_df2)


models_levels_prefixes = [("gpt", 3, 2), 
                          ("gpt_neo_1.3b", 2, 2), 
                          ("gpt_neo_2.7b", 0, 2), 
                          ("gpt_j", 0, 2), 
                          ("jurassic", 1, 2), 
                          ("jurassic_jumbo", 1, 1), 
                          ("t0", 1, 2),
                          ("random", 0,0)]

results = []
done = []
for model1, level1, two_affix1 in models_levels_prefixes: 
    for model2, level2, two_affix2 in models_levels_prefixes: 
        if model1 == model2 or ((model1, model2) in done) or ((model2,model1) in done): 
            continue
        pval, stat, table = do_mcnemar(model1, model2, level1, level2, f"_{two_affix1}", f"_{two_affix2}", condition="change_of_state")
        model1_acc = len(table[model1]['correct'])/(len(table[model1]['correct']) + len(table[model1]['incorrect']))
        model2_acc = len(table[model2]['correct'])/(len(table[model2]['correct']) + len(table[model2]['incorrect']))
        results.append((model1, model2, model1_acc, model2_acc, pval, stat))  
        done.append((model1, model2))

for x in results:
    if x[-2] < 0.05:
        print(x)
# [print(x) for x in results]
# for level in range(3):
#     for model in ["gpt", "gpt-neo-1.3b", "gpt-neo-2.7b", "gpt-j", "jurassic-large", "jurassic-jumbo", "t0"]


# do_mcnemar("gpt_neo_1.3b", "gpt_neo_1.3b", "0", "0", "_1", "_1", "change_of_state")


('gpt', 'gpt_neo_2.7b', 0.61, 0.39759036144578314, 0.03153949735315109, 14.0)
('gpt_neo_1.3b', 'gpt_neo_2.7b', 0.5645161290322581, 0.39759036144578314, 0.012725830078125, 3.0)
('gpt_j', 'random', 0.4117647058823529, 0.53, 0.043285250663757324, 7.0)


In [84]:
if False:
    print(f"volition")
    vol_df1 = get_df("volition")
    print(vol_df1)
    vol_df2 = get_df("volition", is_two=True)
    print(vol_df2)

models_levels_prefixes = [("gpt", 3, 2), 
                        #   ("gpt_neo_1.3b", 2, 2), 
                          ("gpt_neo_2.7b", 1, 1), 
                          ("gpt_j", 0, 1), 
                          ("jurassic", 2, 1), 
                          ("jurassic_jumbo", 2, 1), 
                          ("t0", 0, 1),
                          ("random", 0, 0)]

results = []
done = []
for model1, level1, two_affix1 in models_levels_prefixes: 
    for model2, level2, two_affix2 in models_levels_prefixes: 
        if model1 == model2 or ((model1, model2) in done) or ((model2,model1) in done): 
            continue
        pval, stat, table = do_mcnemar(model1, model2, level1, level2, f"_{two_affix1}", f"_{two_affix2}", condition="volition")
        model1_acc = len(table[model1]['correct'])/(len(table[model1]['correct']) + len(table[model1]['incorrect']))
        model2_acc = len(table[model2]['correct'])/(len(table[model2]['correct']) + len(table[model2]['incorrect']))
        results.append((model1, model2, model1_acc, model2_acc, pval, stat))  
        done.append((model1, model2))

for x in results:
    if x[-2] < 0.05:
        print(x)

('gpt', 'jurassic', 0.73, 0.49, 0.001841554788492544, 16.0)
('gpt', 'random', 0.73, 0.48, 0.00017015517460094998, 9.0)
('gpt_j', 'jurassic', 0.6727272727272727, 0.49, 0.040959591511636986, 11.0)
('jurassic', 't0', 0.49, 0.64, 0.035697803555194696, 15.0)
('t0', 'random', 0.64, 0.46, 0.01328328156224501, 15.0)


In [43]:
from vis import recompute_agent_patient 
import pathlib 
import os 
import pandas as pd 


path_to_file = pathlib.Path("").absolute()
parent = path_to_file.parent

change_of_state_csvs = [parent.joinpath(f'agent_patient_results_1/gpt_neo_1.3b_volition_1_prefix_1.csv')]
cos_acc = recompute_agent_patient(change_of_state_csvs, [parent.joinpath(f"data/agent_patient/volition_1_prefix_1.json")], ['gpt-neo-1.3b'])
print(cos_acc)

{'gpt-neo-1.3b': (0.07, 100, 0.6363636363636364, 11)}
