In [24]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

sns.set_theme(style="whitegrid", palette="Set2")
sns.set_context("paper")
plt.rcParams['savefig.dpi'] = 300

CSV_PATH = Path("data", "wandb_export_classification_report_uir8bnu0_cross_corpus_1.csv")
df = pd.read_csv(CSV_PATH)
df = df[
    (df['run_type'] == 'contrastive')
    & (df['metric'] == 'f1-score')
]

In [48]:
df[df["run_corpus"] == "NER_ENGLISH_RESTAURANT"].dropna(axis=1, how='all')

Unnamed: 0.1,Unnamed: 0,metric,Location,Restaurant_Name,Cuisine,Amenity,Dish,Hours,Rating,Price,micro avg,macro avg,weighted avg,run_name,run_corpus,run_type
2,2,f1-score,0.438048,0.429887,0.618449,0.344444,0.494475,0.393258,0.533333,0.567164,0.468098,0.477382,0.466861,iconic-sweep-60,NER_ENGLISH_RESTAURANT,contrastive
6,2,f1-score,0.438048,0.429887,0.618449,0.344444,0.494475,0.393258,0.533333,0.567164,0.468098,0.477382,0.466861,serene-sweep-59,NER_ENGLISH_RESTAURANT,contrastive
10,2,f1-score,0.438048,0.429887,0.618449,0.344444,0.494475,0.393258,0.533333,0.567164,0.468098,0.477382,0.466861,dainty-sweep-58,NER_ENGLISH_RESTAURANT,contrastive
14,2,f1-score,0.438048,0.429887,0.618449,0.344444,0.494475,0.393258,0.533333,0.567164,0.468098,0.477382,0.466861,true-sweep-57,NER_ENGLISH_RESTAURANT,contrastive
18,2,f1-score,0.438048,0.429887,0.618449,0.344444,0.494475,0.393258,0.533333,0.567164,0.468098,0.477382,0.466861,fancy-sweep-56,NER_ENGLISH_RESTAURANT,contrastive
22,2,f1-score,0.438048,0.429887,0.618449,0.344444,0.494475,0.393258,0.533333,0.567164,0.468098,0.477382,0.466861,lemon-sweep-55,NER_ENGLISH_RESTAURANT,contrastive
26,2,f1-score,0.438048,0.429887,0.618449,0.344444,0.494475,0.393258,0.533333,0.567164,0.468098,0.477382,0.466861,winter-sweep-54,NER_ENGLISH_RESTAURANT,contrastive
30,2,f1-score,0.438048,0.429887,0.618449,0.344444,0.494475,0.393258,0.533333,0.567164,0.468098,0.477382,0.466861,charmed-sweep-53,NER_ENGLISH_RESTAURANT,contrastive
34,2,f1-score,0.452878,0.324701,0.58658,0.289044,0.426554,0.533333,0.492693,0.603774,0.446065,0.463695,0.445122,golden-sweep-52,NER_ENGLISH_RESTAURANT,contrastive
38,2,f1-score,0.437912,0.244641,0.561886,0.256716,0.431034,0.539095,0.501961,0.639344,0.429022,0.451574,0.424732,worthy-sweep-51,NER_ENGLISH_RESTAURANT,contrastive


In [109]:
df_means_conll = df.loc[df["run_corpus"] == "CONLL03", ["PER", "LOC", "ORG", "MISC"]] \
    .mean(axis=0, numeric_only=True) \
    .to_frame() \
    .reset_index()
df_means_conll.columns = ["NE", "F1"]
df_means_conll = df_means_conll.sort_values(by="F1", ascending=False).reset_index(drop=True)
df_means_conll

Unnamed: 0,NE,F1
0,PER,0.72389
1,LOC,0.639023
2,MISC,0.455215
3,ORG,0.419205


In [110]:
df_means_wnut = df.loc[df["run_corpus"] == "WNUT17", ["person",	"creative-work", "location", "group", "product", "corporation"]] \
    .mean(axis=0, numeric_only=True) \
    .to_frame() \
    .reset_index()
df_means_wnut.columns = ["NE", "F1"]
df_means_wnut = df_means_wnut.sort_values(by="F1", ascending=False).reset_index(drop=True)
df_means_wnut

Unnamed: 0,NE,F1
0,person,0.390635
1,location,0.352417
2,group,0.129644
3,creative-work,0.120204
4,corporation,0.099967
5,product,0.090891


In [111]:
df_means_mit = df.loc[df["run_corpus"] == "NER_ENGLISH_RESTAURANT", ["Location","Restaurant_Name","Cuisine","Amenity","Dish","Hours","Rating","Price"]] \
    .mean(axis=0, numeric_only=True) \
    .to_frame() \
    .reset_index()
df_means_mit.columns = ["NE", "F1"]
df_means_mit = df_means_mit.sort_values(by="F1", ascending=False).reset_index(drop=True)
df_means_mit

Unnamed: 0,NE,F1
0,Price,0.571725
1,Cuisine,0.568328
2,Rating,0.5212
3,Location,0.468647
4,Dish,0.464814
5,Hours,0.435663
6,Restaurant_Name,0.390586
7,Amenity,0.30724


In [112]:
out = pd.concat([df_means_conll, df_means_wnut, df_means_mit], axis=1).fillna('-')
out

Unnamed: 0,NE,F1,NE.1,F1.1,NE.2,F1.2
0,PER,0.72389,person,0.390635,Price,0.571725
1,LOC,0.639023,location,0.352417,Cuisine,0.568328
2,MISC,0.455215,group,0.129644,Rating,0.5212
3,ORG,0.419205,creative-work,0.120204,Location,0.468647
4,-,-,corporation,0.099967,Dish,0.464814
5,-,-,product,0.090891,Hours,0.435663
6,-,-,-,-,Restaurant_Name,0.390586
7,-,-,-,-,Amenity,0.30724


In [113]:
print(out.style.format(precision=4).hide(axis=0).to_latex(hrules=True))

\begin{tabular}{lllllr}
\toprule
NE & F1 & NE & F1 & NE & F1 \\
\midrule
PER & 0.7239 & person & 0.3906 & Price & 0.5717 \\
LOC & 0.6390 & location & 0.3524 & Cuisine & 0.5683 \\
MISC & 0.4552 & group & 0.1296 & Rating & 0.5212 \\
ORG & 0.4192 & creative-work & 0.1202 & Location & 0.4686 \\
- & - & corporation & 0.1000 & Dish & 0.4648 \\
- & - & product & 0.0909 & Hours & 0.4357 \\
- & - & - & - & Restaurant_Name & 0.3906 \\
- & - & - & - & Amenity & 0.3072 \\
\bottomrule
\end{tabular}

