In [32]:
# hide the loading messages
import logging
import transformers

import pandas as pd

transformers.tokenization_utils.logger.setLevel(logging.ERROR)
transformers.configuration_utils.logger.setLevel(logging.ERROR)
transformers.modeling_utils.logger.setLevel(logging.ERROR)

from bert_score import BERTScorer

In [33]:
record = pd.read_csv('record.csv', index_col=False)
scorer = BERTScorer(lang="en", rescale_with_baseline=True)

In [34]:
record.columns

Index(['prompt', 'requirement', 'GPT4, FULL INSTRUCTION, K=2, MANUAL',
       'translation', 'GPT4, FULL INSTRUCTION, K=0',
       'GPT4, FULL INSTRUCTION, K=2, RANDOM', 'GPT4, MINIMAL INSTRUCTION, K=0',
       'conversation', 'GPT4, NO INSTRUCTION, K=2, MANUAL',
       'GPT4, NO INSTRUCTION, K=4, MANUAL',
       'GPT4, NO INSTRUCTION, K=6, MANUAL', 'KoAlpaca'],
      dtype='object')

In [36]:
result_dict = {}

for col in record.columns:
    refs = list(record['prompt'])
    cands = list(record[col])
    
    P, R, F1 = scorer.score(cands, refs)
    F1 = F1.sum() / len(F1)
    
    result_dict[col] = F1.item()
    print(f'{col}: {F1}')

prompt: 0.9999998807907104
requirement: -0.6917164921760559
GPT4, FULL INSTRUCTION, K=2, MANUAL: 0.36643362045288086
translation: 0.09029065072536469
GPT4, FULL INSTRUCTION, K=0: 0.37376314401626587
GPT4, FULL INSTRUCTION, K=2, RANDOM: 0.3415583670139313
GPT4, MINIMAL INSTRUCTION, K=0: 0.33272504806518555
conversation: -0.20070981979370117
GPT4, NO INSTRUCTION, K=2, MANUAL: 0.3579593896865845
GPT4, NO INSTRUCTION, K=4, MANUAL: 0.33854368329048157
GPT4, NO INSTRUCTION, K=6, MANUAL: 0.34931299090385437
KoAlpaca: -0.1780482828617096


In [37]:
import csv

with open('bertscore.csv', 'w') as f:
    w = csv.DictWriter(f, result_dict.keys())
    w.writeheader()
    w.writerow(result_dict)
