In [2]:
import os
from typing import defaultdict
import pandas as pd
from dotenv import load_dotenv

In [15]:
# Get the path to the directory this file is in
env_path = '../../../.env'
load_dotenv(dotenv_path=env_path)

True

In [2]:
# count lines in each .m2 cefr file
# split model hyp file into cefr-level files
# re-evaluate using appropriate .m2 cefr-level files?

In [None]:
corpora_path = os.path.join(os.environ.get('CORPORA'), 'wi+locness/m2')
corpora_path

In [18]:
a_level = os.path.join(corpora_path, 'A.dev.auto.m2')
b_level = os.path.join(corpora_path, 'B.dev.auto.m2')
c_level = os.path.join(corpora_path, 'C.dev.auto.m2')
n_level = os.path.join(corpora_path, 'N.dev.auto.m2')

In [19]:
# count sentences in a given .m2 file
# where each sentence block is separated by a blank line
def count_sentences(m2_file):
    num_sentences = 0
    with open(m2_file, 'r') as f:
        for line in f:
            if line == '\n':
                num_sentences += 1
    return num_sentences

In [20]:
cefr_sentence_count = {}
cefr_sentence_count['A'] = count_sentences(a_level)
cefr_sentence_count['B'] = count_sentences(b_level)
cefr_sentence_count['C'] = count_sentences(c_level)
cefr_sentence_count['N'] = count_sentences(n_level)

In [21]:
# sum cefr sentence counts
total_sentences = 0
for cefr_level in cefr_sentence_count:
    total_sentences += cefr_sentence_count[cefr_level]
total_sentences

4384

In [22]:
# create a sentence to cefr level mapping
# where each sentence is assigned to a cefr level
# based on the number of sentences in each cefr level
# and the total number of sentences
sentence_to_cefr_level = {}
sentence_index = 0
for cefr_level in cefr_sentence_count:
    for i in range(cefr_sentence_count[cefr_level]):
        sentence_to_cefr_level[sentence_index] = cefr_level
        sentence_index += 1

In [23]:
sentence_to_cefr_level[1036]

'A'

In [24]:
1037 + 1290

2327

In [25]:
# read best results
df = pd.read_csv('../best_dev_set_results.csv')
# select only rows where split == 'wibea-dev'
df = df[df['split'] == 'wibea-dev']

In [26]:
df.head()

Unnamed: 0,model,prompt_type_index,GLEU,Prec,Rec,split,F0.5
10,gpt-4-0613,0-shot_6,,0.5107,0.5083,wibea-dev,0.51
11,stabilityai/StableBeluga2,0-shot_7,,0.4416,0.4723,wibea-dev,0.447
12,command,0-shot_6,,0.4029,0.3497,wibea-dev,0.391
13,meta-llama/Llama-2-70b-chat-hf,0-shot_6,,0.3394,0.469,wibea-dev,0.359
14,tiiuae/falcon-40b-instruct,4-shot_2,,0.4669,0.4075,wibea-dev,0.454


In [27]:
df.prompt_type_index.unique()

array(['0-shot_6', '0-shot_7', '4-shot_2', '1-shot_2', '3-shot_2',
       '3-shot_3', '2-shot_1', '3-shot_1'], dtype=object)

In [28]:
open_source_model_template = 'wibea_dataset_dev_{model}_{prompt_type}_{prompt_index}_beams=1_temp=0.1_topk=50_topp=1.0'
gpt_model_template = 'wibea_dataset_dev_{model}_{prompt_type}_{prompt_index}_temp=0.1_topp=1.0'
cohere_model_template = 'wibea_dataset_dev_{model}_{prompt_type}_{prompt_index}_temp=0.1_topk=50_topp=1.0'

In [None]:
os.path.abspath('../../../paper_output')

In [30]:
few_shot_base_path = os.path.abspath('../../../paper_output/output_few_shot_dev')
zero_shot_base_path = os.path.abspath('../../../paper_output/output_zero_shot_dev')

model_paths = []
for index, row in df.iterrows():
    model = row['model'].split('/')[-1]

    template = open_source_model_template
    if 'gpt' in model:
        template = gpt_model_template
    elif 'command' in model:
        template = cohere_model_template

    prompt_type_index = row['prompt_type_index']
    prompt_type, prompt_index = prompt_type_index.split('_')

    prompt_type = prompt_type.replace('-', '_')

    # need to reduce prompt_index by 1 when mapping to file paths
    # yeah I know.
    prompt_index = int(prompt_index) - 1

    base_path = few_shot_base_path
    if prompt_type == '0_shot':
        prompt_type = 'zero_shot'
        base_path = zero_shot_base_path

    model_path = os.path.join(
        base_path,
        template.format(model=model, prompt_type=prompt_type, prompt_index=prompt_index),
        "run_1",)
    model_paths.append(model_path)

In [None]:
model_paths

In [None]:
for m in model_paths:
    print(m)
    assert os.path.exists(m)

In [33]:
hyp_m2_file = os.path.join(model_paths[0], 'hyp_post_errantv2-3-3.m2')

In [34]:
# read in hyp file
hyp_cefr_sentences = defaultdict(list)
sentence_i = 0
with open(hyp_m2_file, 'r') as f:
    sentence_block = []
    for line in f:
        if line == '\n':
            cefr_level_i = sentence_to_cefr_level[sentence_i]
            hyp_cefr_sentences[cefr_level_i].append(list(sentence_block))
            sentence_block = []
            sentence_i += 1
        else:
            sentence_block.append(line)

In [35]:
# print sentence count in hyp_cefr_sentences in each cefr level
for cefr_level in hyp_cefr_sentences:
    print(f'{cefr_level}: {len(hyp_cefr_sentences[cefr_level])}')

A: 1037
B: 1290
C: 1069
N: 988


In [36]:
# print sentence count in cefr_sentence_count
for cefr_level in cefr_sentence_count:
    print(f'{cefr_level}: {cefr_sentence_count[cefr_level]}')

A: 1037
B: 1290
C: 1069
N: 988


In [37]:
# calculate percentage of each cefr level as a percentage of the total
for cefr_level in cefr_sentence_count:
    print(f'{cefr_level}: {cefr_sentence_count[cefr_level] / total_sentences * 100}')

A: 23.654197080291972
B: 29.425182481751825
C: 24.384124087591243
N: 22.536496350364963


In [38]:

for cefr_level in hyp_cefr_sentences:
    cefr_level_file = os.path.join(model_paths[0], f'hyp_post_{cefr_level}.m2')
    # write sentences to cefr level file
    with open(cefr_level_file, 'w') as f:
        for sentence_block in hyp_cefr_sentences[cefr_level]:
            for line in sentence_block:
                f.write(line)
            f.write('\n')

In [39]:
def split_hyp_into_cefr_hyp(model_path):
    m2_file = os.path.join(model_path, 'hyp_post_errantv2-3-3.m2')

    # read in hyp file
    hyp_cefr_sentences = defaultdict(list)
    sentence_i = 0
    with open(m2_file, 'r') as f:
        sentence_block = []
        for line in f:
            if line == '\n':
                cefr_level_i = sentence_to_cefr_level[sentence_i]
                hyp_cefr_sentences[cefr_level_i].append(list(sentence_block))
                sentence_block = []
                sentence_i += 1
            else:
                sentence_block.append(line)
    
    for cefr_level in hyp_cefr_sentences:
        cefr_level_file = os.path.join(model_path, f'hyp_post_{cefr_level}.m2')
        # write sentences to cefr level file
        with open(cefr_level_file, 'w') as f:
            for sentence_block in hyp_cefr_sentences[cefr_level]:
                for line in sentence_block:
                    f.write(line)
                f.write('\n')

In [40]:
for mp in model_paths:
    print(os.path.basename(os.path.dirname(mp)))
    split_hyp_into_cefr_hyp(mp)

wibea_dataset_dev_gpt-4-0613_zero_shot_5_temp=0.1_topp=1.0
wibea_dataset_dev_StableBeluga2_zero_shot_6_beams=1_temp=0.1_topk=50_topp=1.0
wibea_dataset_dev_command_zero_shot_5_temp=0.1_topk=50_topp=1.0
wibea_dataset_dev_Llama-2-70b-chat-hf_zero_shot_5_beams=1_temp=0.1_topk=50_topp=1.0
wibea_dataset_dev_falcon-40b-instruct_4_shot_1_beams=1_temp=0.1_topk=50_topp=1.0
wibea_dataset_dev_gpt-3.5-turbo-0613_1_shot_1_temp=0.1_topp=1.0
wibea_dataset_dev_flan-t5-xxl_3_shot_1_beams=1_temp=0.1_topk=50_topp=1.0
wibea_dataset_dev_opt-iml-max-30b_3_shot_2_beams=1_temp=0.1_topk=50_topp=1.0
wibea_dataset_dev_InstructPalmyra-20b_2_shot_0_beams=1_temp=0.1_topk=50_topp=1.0
wibea_dataset_dev_bloomz-7b1_3_shot_0_beams=1_temp=0.1_topk=50_topp=1.0
