In [1]:
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

from src.reader import read_lusa, read_timebank

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial']
pd.set_option('display.max_rows', 500)

ROOT = Path().resolve().parent.parent

In [2]:
models = [
    'llama2-7b',
    'llama2-7b-chat',
    'llama2-13b',
    'llama2-13b-chat',
    'llama2-70b',
    'llama2-70b-chat',
    'gpt3',
    'chatgpt',
    'gpt4',
]

## Prompt Selection

In [3]:
results_path = ROOT / "results" / "prompt_selection"

In [4]:
df_pt = pd.read_csv(results_path / "portuguese"/ "results.csv")
df_pt["language"] = "Portuguese"

df_en = pd.read_csv(results_path / "english"/ "results.csv")
df_en["language"] = "English"

df = pd.concat([df_pt, df_en])
df.reset_index(inplace=True, drop=True)
df["model"] = pd.Categorical(df.model, ordered=True, categories=models)

In [5]:
languages = df.language.unique().tolist()
entities = df.entity.unique().tolist()
templates = ["ext", "cls",	"ext_def", "ext_exp", "cls_def", "ext_def_exp",  "cls_exp", "cls_def_exp"]
labels = ["_ _ _", "C _ _", "_ D _", "_ _ E", "C D _", "_ D E",  "C _ E", "C D E"]

In [6]:
data = df[["language", "model", "template", "entity", "f1"]].groupby(
    ["language", "entity", "model", "template"]).mean("f1")
data = data.unstack("template")
data = data[[('f1', "ext"),
            ('f1', "cls"),
            ('f1', "ext_def"),
            ('f1', "ext_exp"),
            ('f1', "cls_def"),
            ('f1', "ext_def_exp"),
            ('f1', "cls_exp"),
            ('f1', "cls_def_exp")]]
(data * 100).round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,f1,f1,f1,f1,f1,f1,f1,f1
Unnamed: 0_level_1,Unnamed: 1_level_1,template,ext,cls,ext_def,ext_exp,cls_def,ext_def_exp,cls_exp,cls_def_exp
language,entity,model,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
English,event triggers,llama2-7b,0.0,1.34,0.0,10.3,0.0,15.4,11.06,7.75
English,event triggers,llama2-7b-chat,6.41,0.0,10.9,28.8,0.0,31.15,28.3,29.67
English,event triggers,llama2-13b,0.0,1.91,2.93,8.85,0.0,10.53,6.28,2.1
English,event triggers,llama2-13b-chat,2.28,0.31,2.43,20.9,0.3,16.79,18.68,18.15
English,event triggers,llama2-70b,8.34,3.31,14.96,26.44,3.1,25.45,13.77,26.29
English,event triggers,llama2-70b-chat,1.17,3.3,2.29,41.22,4.95,42.13,18.12,18.74
English,event triggers,gpt3,0.3,3.92,3.04,41.12,4.85,42.12,24.59,38.01
English,event triggers,chatgpt,8.29,33.93,35.96,55.08,40.6,56.32,57.3,59.28
English,event triggers,gpt4,20.45,57.82,21.36,72.81,33.33,72.93,72.6,74.68
English,participants,llama2-7b,,,,,,,,


## Test

In [7]:
results_path = ROOT / "results" / "test"

models = [
    "tei2go",
    "heideltime",
    "tefe",
    "srl",
    "ner",
    "tieval_baseline",
    'llama2-7b',
    'llama2-7b-chat',
    'llama2-13b',
    'llama2-13b-chat',
    'llama2-70b',
    'llama2-70b-chat',
    'gpt3',
    'chatgpt',
    'gpt4',
]

In [8]:
df_pt = pd.read_csv(results_path / "portuguese"/ "results.csv")
df_pt["language"] = "Portuguese"

df_en = pd.read_csv(results_path / "english"/ "results.csv")
df_en["language"] = "English"

df = pd.concat([df_pt, df_en])
df.reset_index(inplace=True, drop=True)
df["model"] = pd.Categorical(df.model, ordered=True, categories=models)

In [9]:
data = df[["language", "entity", "model", "precision", "recall", "f1", "f1_r"]]

data = data.groupby(["language", "entity", "model"]).mean()
(data * 100).round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,precision,recall,f1,f1_r
language,entity,model,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
English,event triggers,tei2go,,,,
English,event triggers,heideltime,,,,
English,event triggers,tefe,,,,
English,event triggers,srl,74.52,73.83,74.17,73.61
English,event triggers,ner,,,,
English,event triggers,tieval_baseline,65.37,65.48,65.43,63.94
English,event triggers,llama2-7b,21.74,12.5,15.87,11.63
English,event triggers,llama2-7b-chat,31.2,27.46,29.21,36.78
English,event triggers,llama2-13b,22.16,11.07,14.77,7.83
English,event triggers,llama2-13b-chat,20.3,22.92,21.53,22.18
