In [18]:
import glob
import json
import os
import pandas as pd 

In [7]:
trials = glob.glob('../tasks/quote_prediction/results/*')

In [46]:
trials[0]

'../tasks/quote_prediction/results/big-bird-base__ablated-top__source-and-text'

In [144]:
cols_to_keep = [
    'Other_accuracy',
    'Safety_accuracy',
    'Elections_accuracy',
    'Labor_accuracy',
    'Disaster_accuracy',
    'Environment_accuracy',
    'gold-label-train_accuracy',
    'gold-label-test_accuracy'
]

In [145]:
def get_results_callbacks(callback_folder):
    runs = glob.glob(os.path.join(callback_folder, 'call*'))
    all_runs = []
    for r in runs:
        runs_res = pd.Series(json.load(open(r)))
        all_runs.append(runs_res)
    all_runs_df = pd.concat(all_runs, axis=1).T[cols_to_keep]
    return all_runs_df.loc[lambda df: df['Other_accuracy'].astype(float).idxmax()]

In [146]:
all_results_dict = {}
for t in trials:
    all_results_dict[os.path.basename(t)] = get_results_callbacks(t)

In [147]:
bigbird_res_df = pd.DataFrame(all_results_dict).T

In [148]:
gpt3_prediction_files = glob.glob('cache/*ablated*results*')

In [149]:
one_file_df = pd.read_csv(gpt3_prediction_files[0])

In [150]:
gpt_trial_files = [
 ## gpt top with source
    ('gpt top with source',
     ('cache/2023-01-17__annotated-ablated-top-source-results.csv',
      'cache/2023-01-15__ablated-top-source-results.csv',),),
 ## top without source
    ('gpt top without source',
     ('cache/2023-01-17__annotated-ablated-top__sans-source__results.csv',
      'cache/2023-01-17__ablated-top__sans-source__results.csv',),),
 ## high perc with source
    ('gpt high perc with source',
     ('cache/2023-01-15__ablated-high-perc-source-results.csv',
      'cache/2023-01-17__annotated-ablated-high-perc-source-results.csv',),),
 ## high perc without source 
    ('gpt high perc without source',
     ('cache/2023-01-17__ablated-high-perc__sans-source__results.csv',
      'cache/2023-01-17__annotated-ablated-high-perc__sans-source__results.csv',) ,),
 ## any with source
    ('gpt any with source',
     ('cache/2023-01-15__ablated-any-source-results.csv',
      'cache/2023-01-17__annotated-ablated-any-source-results.csv',),),
 ## any without source
    ('gpt any without source',
     ('cache/2023-01-17__annotated-ablated-any__sans-source__results.csv',
      'cache/2023-01-17__ablated-any__sans-source__results.csv',),    )
]

In [151]:
def get_results_gpt(silver_file, gold_file):
    if 'annotated' in silver_file:
        gold_file, silver_file = silver_file, gold_file

    results_dict = {}
    gold_df = pd.read_csv(gold_file)
    gold_df['match'] = (gold_df['completion'].str.lower() == gold_df['preds'].str.lower())
    results_dict['gold-label-test_accuracy'] = gold_df.loc[lambda df: df['preds'].notnull()]['match'].mean()

    silver_df = pd.read_csv(silver_file)
    silver_df['match'] = (silver_df['completion'].str.lower() == silver_df['preds'].str.lower())

    results_dict['Other_accuracy'] =  silver_df.loc[lambda df: df['preds'].notnull()]['match'].mean()

    res = (silver_df
     .loc[lambda df: df['preds'].notnull()]
     .groupby('category')['match']
     .mean()
     .rename(lambda x: x + '_accuracy')
     .to_dict()
    )

    results_dict.update(res)
    return results_dict

In [152]:
gpt_trials = {}
for name, (s, g) in gpt_trial_files:
    gpt_trials[name] = get_results_gpt(s, g)

In [153]:
all_gpt3_res_df = pd.DataFrame(gpt_trials).T

In [160]:
combined_all_results_df = pd.concat([
    all_gpt3_res_df,
    bigbird_res_df
]).drop('Environment_accuracy', axis=1)
combined_all_results_df = (
    combined_all_results_df
    .drop(['gold-label-test_accuracy', 'gold-label-train_accuracy'], axis=1)
)

In [169]:
index_order = [
 'big-bird-base__ablated-top__text',
 'big-bird-base__ablated-top__source-and-text',
 'gpt top with source',
 'gpt top without source',
 #
 'big-bird-base__ablated-high-perc__text',
 'big-bird-base__ablated-high-perc__source-and-text',
 'gpt high perc with source',
 'gpt high perc without source',
 # 
 'big-bird-base__ablated-any__text',
 'big-bird-base__ablated-any__source-and-text',
 'gpt any without source',
 'gpt any with source',
]

index_mapper = {
 'big-bird-base__ablated-top__text': 'BigBird, Main Source',
 'big-bird-base__ablated-top__source-and-text': '+source',
 'gpt top with source': 'GPT3 1.3B',
 'gpt top without source': '+source',
 #
 'big-bird-base__ablated-high-perc__text': 'BigBird, HighPerc Source',
 'big-bird-base__ablated-high-perc__source-and-text': '+source',
 'gpt high perc with source': 'GPT3 1.3B',
 'gpt high perc without source': '+source',
 # 
 'big-bird-base__ablated-any__text': 'BigBird, Any Source',
 'big-bird-base__ablated-any__source-and-text': '+source',
 'gpt any without source': 'GPT3 1.3B',
 'gpt any with source': '+source',
}

In [180]:
final_res_df = (combined_all_results_df
 .loc[index_order]
 .rename(index=index_mapper)
 .pipe(lambda df: df*100)
 .astype(float)
 .round(1)
 .rename(columns=lambda x: x.replace('_accuracy', ''))
)

In [181]:
final_res_df

Unnamed: 0,Other,Disaster,Elections,Labor,Safety
"BigBird, Main Source",74.2,68.4,78.3,74.0,78.1
+source,73.9,69.7,74.9,73.4,73.4
GPT3 1.3B,78.3,75.5,81.5,72.7,80.0
+source,74.9,69.5,78.0,70.9,65.1
"BigBird, HighPerc Source",63.8,61.8,63.1,64.3,61.7
+source,65.1,69.7,65.7,64.9,62.5
GPT3 1.3B,67.1,67.9,72.9,58.8,65.6
+source,65.4,65.1,68.0,65.9,66.7
"BigBird, Any Source",57.5,53.9,55.5,55.8,57.8
+source,59.4,55.3,60.6,60.4,56.2


In [184]:
import pyperclip
pyperclip.copy(final_res_df.to_latex())

  pyperclip.copy(final_res_df.to_latex())
