In [1]:
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import json

In [2]:
# load csv as dataframe
df = pd.read_csv('../best_zero_shot_dev_results.csv')
df.head()

Unnamed: 0,model,prompt_type_index,GLEU,split,F0.5
0,facebook/opt-iml-max-30b,0-shot_10,0.501,jfleg-dev,
1,gpt-4-0613,0-shot_8,0.582,jfleg-dev,
2,google/flan-t5-xxl,0-shot_5,0.459,jfleg-dev,
3,gpt-3.5-turbo-0613,0-shot_10,0.574,jfleg-dev,
4,tiiuae/falcon-40b-instruct,0-shot_6,0.541,jfleg-dev,


In [3]:
all_models = df.model.unique()
all_models

array(['facebook/opt-iml-max-30b', 'gpt-4-0613', 'google/flan-t5-xxl',
       'gpt-3.5-turbo-0613', 'tiiuae/falcon-40b-instruct',
       'stabilityai/StableBeluga2', 'command',
       'meta-llama/Llama-2-70b-chat-hf', 'Writer/InstructPalmyra-20b',
       'bigscience/bloomz-7b1'], dtype=object)

In [4]:
splits = df.split.unique()
splits

array(['jfleg-dev', 'wibea-dev', 'fce-dev'], dtype=object)

In [5]:
# write a latex row using the format
# where each row is a model and each column is a split
def write_latex_row_subcript(df, model, metric):
    # metric = 'F0.5' if 'jfleg' not in split else 'GLEU'
    
    # get the metric value for the model from the df
    metric_value = df[df['model'] == model][metric].values[0]
    
    # check if metric_value is the max value for this dataframe
    max_metric_value = df[metric].max()
    if metric_value == max_metric_value:
        metric_value = '\\textbf{' + '{:.3f}'.format(metric_value) + '}'
    else:
        metric_value = '{:.3f}'.format(metric_value)

    # get the prompt_index for the model from the df
    prompt_type_index = df[df['model'] == model]['prompt_type_index'].values[0]
    prompt_index = prompt_type_index.split('_')[1]

    if prompt_index == '5':
        prompt_index = '\\textsc{elt}'
    elif prompt_index in ['6', '7']:
        prompt_index = '\\textsc{tool}'
    elif prompt_index == '8':
        prompt_index = '\\textsc{dn}'
    elif prompt_index == '10':
        prompt_index = '\\textsc{cyn}'

    # write the latex row
    return f'{str(metric_value)}$_{{{str(prompt_index)}}}$'

In [6]:
df[df['split'] == 'fce-dev']

Unnamed: 0,model,prompt_type_index,GLEU,split,F0.5
20,gpt-4-0613,0-shot_7,,fce-dev,0.474
21,google/flan-t5-xxl,0-shot_6,,fce-dev,0.424
22,gpt-3.5-turbo-0613,0-shot_5,,fce-dev,0.416
23,tiiuae/falcon-40b-instruct,0-shot_7,,fce-dev,0.406
24,stabilityai/StableBeluga2,0-shot_7,,fce-dev,0.403
25,facebook/opt-iml-max-30b,0-shot_7,,fce-dev,0.395
26,command,0-shot_6,,fce-dev,0.353
27,meta-llama/Llama-2-70b-chat-hf,0-shot_6,,fce-dev,0.323
28,Writer/InstructPalmyra-20b,0-shot_5,,fce-dev,0.309
29,bigscience/bloomz-7b1,0-shot_10,,fce-dev,0.282


In [7]:

# write the latex table
# print(' & '.join([''] + [split for split in sorted(splits)]) + ' \\\\')

model_order = [
    'bigscience/bloomz-7b1',
    'google/flan-t5-xxl',
    'Writer/InstructPalmyra-20b',
    'facebook/opt-iml-max-30b',
    'tiiuae/falcon-40b-instruct',
    'meta-llama/Llama-2-70b-chat-hf',
    'stabilityai/StableBeluga2',
    'command', 
    'gpt-3.5-turbo-0613',
    'gpt-4-0613',
    ]

for model in model_order:
    if model not in df.model.unique():
        continue
    model_name = model.split('/')[-1]
    row = [model_name]
    for split in sorted(splits):
        df_split_temp = df[df['split'] == split]
        metric = 'F0.5' if 'jfleg' not in split else 'GLEU'
        row.append(write_latex_row_subcript(df_split_temp, model, metric))
    
    print(' & '.join(row) + ' \\\\')
    # print('\\hline')

bloomz-7b1 & 0.282$_{\textsc{cyn}}$ & 0.402$_{\textsc{cyn}}$ & 0.226$_{\textsc{cyn}}$ \\
flan-t5-xxl & 0.424$_{\textsc{tool}}$ & 0.459$_{\textsc{elt}}$ & 0.408$_{\textsc{elt}}$ \\
InstructPalmyra-20b & 0.309$_{\textsc{elt}}$ & 0.517$_{\textsc{tool}}$ & 0.352$_{\textsc{elt}}$ \\
opt-iml-max-30b & 0.395$_{\textsc{tool}}$ & 0.501$_{\textsc{cyn}}$ & 0.372$_{\textsc{cyn}}$ \\
falcon-40b-instruct & 0.406$_{\textsc{tool}}$ & 0.541$_{\textsc{tool}}$ & 0.450$_{\textsc{tool}}$ \\
Llama-2-70b-chat-hf & 0.323$_{\textsc{tool}}$ & 0.500$_{\textsc{tool}}$ & 0.359$_{\textsc{tool}}$ \\
StableBeluga2 & 0.403$_{\textsc{tool}}$ & 0.563$_{\textsc{cyn}}$ & 0.447$_{\textsc{tool}}$ \\
command & 0.353$_{\textsc{tool}}$ & 0.535$_{\textsc{cyn}}$ & 0.391$_{\textsc{tool}}$ \\
gpt-3.5-turbo-0613 & 0.416$_{\textsc{elt}}$ & 0.574$_{\textsc{cyn}}$ & 0.434$_{\textsc{elt}}$ \\
gpt-4-0613 & \textbf{0.474}$_{\textsc{tool}}$ & \textbf{0.582}$_{\textsc{dn}}$ & \textbf{0.510}$_{\textsc{tool}}$ \\
