In [1]:
# the following file contains code for insights in the results

In [2]:
# import dependencies
import os
import pandas as pd

In [3]:
# calculate F1-score
def calc_f1(p, r):
    return 2 * ((p*r)/(p+r))

In [13]:
# calculate macro and micro scores
def calc_macro(p, r, f):    
    m_p = sum(p)/len(p)
    m_r = sum(r)/len(r)    
    m_f1 = sum(f)/len(f) 
    return round(m_p,4), round(m_r,4), round(m_f1,4)
    
def calc_micro(tp, fp, fn):
    m_p = sum(tp)/(sum(tp)+sum(fp))
    m_r = sum(tp)/(sum(tp)+sum(fn))
    m_f1 = calc_f1(m_p, m_r)
    return round(m_p,4), round(m_r,4), round(m_f1,4)

In [29]:
# select model + params
t = "bert-elmo-flair"
sentence_sizes = ['25', '50', '75', '100', '150', '200']

In [30]:
# loop through the results files
for sentence_size in sentence_sizes:
    temp = []

    for i in range(1,6):
        path = './data/results/sentence_sizes/' + t + '/' + sentence_size +'/results_' + str(i) + '.txt'
        print("Run #",  i)
        f = open(path, "r")

        precisions = []
        recalls = []
        f1 = []
        tps = []
        fps = []
        fns = []
        for line in f:
            if 'micro' in line:
                microf1 = line.split(" ")[3].strip()
            elif 'macro' in line:
                macrof1 = line.split(" ")[3].strip()
            elif 'tp' in line:
                split = line.split(":")
                # macro
                
                precision = split[4].split("-")[0].strip()
                recall = split[5].split("-")[0].strip()
                precisions.append(float(precision)) 
                recalls.append(float(recall))
                f1.append(float(split[6].strip()))

                #micro
                tp = split[1].split("-")[0].strip()
                fp = split[2].split("-")[0].strip()
                fn = split[3].split("-")[0].strip()

                tps.append(float(tp))
                fps.append(float(fp))
                fns.append(float(fn))


        micro_p, micro_r, micro_f1 = calc_micro(tps, fps, fns)
        print('Micro: p=',micro_p, ' r=', micro_r, ' f1=', micro_f1)

        macro_p, macro_r, macro_f1 = calc_macro(precisions, recalls, f1)
        print('Macro: p=',macro_p, ' r=', macro_r, ' f1=', macro_f1)

        temp.append([t, i, micro_p, micro_r, micro_f1, macro_p, macro_r, macro_f1])

        print('f1', microf1, macrof1)
        f.close()
        
    headers = ['Type', 'Run #', 'P (micro)', 'R (micro)', 'F1 (micro)', 'P (macro)', 'R (macro)', 'F1 (macro)']
    df = pd.DataFrame(data=temp, columns=headers)
    df.to_csv('./data/results/sentence_sizes/' + t + '/' + sentence_size +'/results.csv', index=False)

Run # 1
Micro: p= 0.8087  r= 0.9029  f1= 0.8532
Macro: p= 0.8161  r= 0.8895  f1= 0.8481
f1 0.8532 0.8481
Run # 2
Micro: p= 0.8529  r= 0.9255  f1= 0.8878
Macro: p= 0.7292  r= 0.8167  f1= 0.7527
f1 0.8878 0.7527
Run # 3
Micro: p= 0.819  r= 0.9348  f1= 0.8731
Macro: p= 0.7984  r= 0.795  f1= 0.788
f1 0.8731 0.7880
Run # 4
Micro: p= 0.8778  r= 0.8876  f1= 0.8827
Macro: p= 0.9084  r= 0.9177  f1= 0.9116
f1 0.8827 0.9116
Run # 5
Micro: p= 0.7885  r= 0.8039  f1= 0.7961
Macro: p= 0.7171  r= 0.7447  f1= 0.7262
f1 0.7961 0.7262
Run # 1
Micro: p= 0.7982  r= 0.91  f1= 0.8505
Macro: p= 0.8125  r= 0.9364  f1= 0.8623
f1 0.8505 0.8623
Run # 2
Micro: p= 0.8526  r= 0.8804  f1= 0.8663
Macro: p= 0.7255  r= 0.7896  f1= 0.7377
f1 0.8663 0.7377
Run # 3
Micro: p= 0.8269  r= 0.9451  f1= 0.8821
Macro: p= 0.9023  r= 0.8595  f1= 0.8603
f1 0.8821 0.8603
Run # 4
Micro: p= 0.8462  r= 0.8851  f1= 0.8652
Macro: p= 0.8837  r= 0.9152  f1= 0.898
f1 0.8652 0.8980
Run # 5
Micro: p= 0.8105  r= 0.7778  f1= 0.7938
Macro: p= 0.7