In [15]:
from pathlib import Path

import conllu
import plotly.express as px
import pandas as pd
import numpy as np


porttinari_gold_sents = conllu.parse(open("../data/UD_Portuguese-Porttinari/pt_porttinari-ud-test.conllu", encoding="utf-8").read())
models_preds = {}
for results_path in Path("../porttinari/outputs/").iterdir():
    model_name = results_path.stem
    models_preds[model_name] = []
    for pred_filename in results_path.glob("*"):
        models_preds[model_name].append( conllu.parse(open(pred_filename, encoding="utf-8").read()) )

In [16]:
for model_name in models_preds.keys():
    assert len(models_preds[model_name]) == 10, f"Error on {model_name} with {len(models_preds[model_name])} files."

In [24]:
def get_sent_errors(gold_sents, pred_sents) -> pd.DataFrame:
    data = {
        "sent_id": [],
        "errors": [],
    }
    for gold_sent, pred_sent in zip(gold_sents, pred_sents):
        errors = 0
        for gold_token, pred_token in zip(gold_sent, pred_sent):
            if isinstance(gold_token["id"], int):
                if gold_token["upos"] != pred_token["upos"]:
                    errors += 1
        data["sent_id"].append(gold_sent.metadata["sent_id"])
        data["errors"].append(errors)
    return pd.DataFrame(data)

def get_sent_level_acc(gold_sents, pred_sents) -> float:
    total = len(gold_sents)
    total_match = 0
    for gold_sent, pred_sent in zip(gold_sents, pred_sents):
        errors = 0
        for gold_token, pred_token in zip(gold_sent, pred_sent):
            if isinstance(gold_token["id"], int):
                if gold_token["upos"] != pred_token["upos"]:
                    errors += 1
        if errors == 0:
            total_match += 1
    return total_match / total
        
def get_average_sent_level_acc(gold_sents, pred_sents):
    total = len(gold_sents)
    accs = []
    for gold_sent, pred_sent in zip(gold_sents, pred_sents):
        errors = 0
        n_tokens = 0
        for gold_token, pred_token in zip(gold_sent, pred_sent):
            if isinstance(gold_token["id"], int):
                n_tokens += 1
                if gold_token["upos"] != pred_token["upos"]:
                    errors += 1
        accs.append((n_tokens - errors) / n_tokens)
    return np.mean(accs)

results = {
    "model_name": [],
    "id": [],
    "acc": [],
}
for model_name in models_preds.keys():
    for i, experiment in enumerate(models_preds[model_name]):
        acc = get_sent_level_acc(porttinari_gold_sents, experiment)
        # acc = get_average_sent_level_acc(porttinari_gold_sents, experiment)
        results["model_name"].append(model_name)
        results["id"].append(i)
        results["acc"].append(acc)

results_df = pd.DataFrame(results)

In [26]:
final_df = pd.DataFrame(results_df["model_name"].unique(), columns=["model_name"])
final_df["mean"] = results_df.groupby("model_name")["acc"].mean().values
final_df["stddev"] = results_df.groupby("model_name")["acc"].std().values

In [27]:
final_df

Unnamed: 0,model_name,mean,stddev
0,BERTimbau,0.838249,0.006829
1,CNCSR,0.709053,0.006598
2,DeBERTa-v3,0.832554,0.007161
3,Meta-BiLSTM,0.751079,0.010007
4,Stanza,0.714748,0.006968
5,UDPipe,0.698261,0.005064
6,XLM-R,0.829736,0.007054


In [28]:
results_df.groupby("model_name")["acc"].std()

model_name
BERTimbau      0.006829
CNCSR          0.006598
DeBERTa-v3     0.007161
Meta-BiLSTM    0.010007
Stanza         0.006968
UDPipe         0.005064
XLM-R          0.007054
Name: acc, dtype: float64

In [14]:
results_df.to_csv("accuracy_level_porttinari.csv", index=False)

In [3]:
porttinari_gold_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_porttinari-ud-test.conllu", encoding="utf-8").read())
porttinari_pred_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
dante_pred_sents = conllu.parse(open("../tmp/dante/model-1b7kowrx-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
petrogold_pred_sents = conllu.parse(open("../tmp/petrogold/model-2v9qux4u-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
multi_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())

porttinari_df = get_sent_errors(porttinari_gold_sents, porttinari_pred_sents)
dante_df = get_sent_errors(porttinari_gold_sents, dante_pred_sents)
petrogold_df = get_sent_errors(porttinari_gold_sents, petrogold_pred_sents)
multi_df = get_sent_errors(porttinari_gold_sents, multi_pred_sents)

porttinari_df["Modelo"] = "porttinari"
dante_df["Modelo"] = "dante"
petrogold_df["Modelo"] = "petrogold"
multi_df["Modelo"] = "Multigênero"


for name, df in zip([
    "Porttinari", "DANTE", "PetroGold", "Multigênero"
], [porttinari_df, dante_df, petrogold_df, multi_df]):
    display(name)
    display(df["errors"].value_counts()/df.shape[0])

final_df = porttinari_df.append(dante_df, ignore_index=True).append(petrogold_df, ignore_index=True).append(multi_df, ignore_index=True)

# new_df = 
fig = px.histogram(final_df, x="errors", barmode="group", color="Modelo", width=800, height=400)
fig.write_image("porttinari_errors.png", scale=2)
fig.show()

'Porttinari'

0    0.845324
1    0.132494
2    0.020384
3    0.001799
Name: errors, dtype: float64

'DANTE'

0     0.583333
1     0.281175
2     0.094125
3     0.027578
4     0.006595
5     0.004796
6     0.001199
8     0.000600
13    0.000600
Name: errors, dtype: float64

'PetroGold'

0    0.552158
1    0.314748
2    0.099520
3    0.022782
4    0.007194
5    0.003597
Name: errors, dtype: float64

'Multigênero'

0    0.832734
1    0.142686
2    0.023381
3    0.001199
Name: errors, dtype: float64

  final_df = porttinari_df.append(dante_df, ignore_index=True).append(petrogold_df, ignore_index=True).append(multi_df, ignore_index=True)


In [4]:
for sent_id in dante_df[dante_df["errors"] > 6]["sent_id"]:
    for true_sent in porttinari_gold_sents:
        if true_sent.metadata["sent_id"] == sent_id:
            for sent in dante_pred_sents:
                if sent.metadata["sent_id"] == sent_id:
                    print(sent.metadata["sent_id"])
                    for pred_token, gold_token in zip(sent, true_sent):
                        print("{}\t{}\t{}".format(pred_token["form"], gold_token["upos"], pred_token["upos"]))
    print()

FOLHA_DOC003071_SENT011
Em	ADP	ADP
um	NUM	DET
dos	_	_
de	ADP	ADP
os	DET	DET
momentos	NOUN	NOUN
dignos	ADJ	ADJ
de	ADP	ADP
menção	NOUN	NOUN
,	PUNCT	PUNCT
a	DET	DET
backing	X	NOUN
vocal	X	X
Whitney	PROPN	PROPN
se	PRON	PRON
esgoelou	VERB	VERB
ao	_	_
a	ADP	ADP
o	DET	DET
cantar	VERB	VERB
"	PUNCT	PUNCT
How	PROPN	INTJ
Come	PROPN	VERB
You	PROPN	PRON
Dont	PROPN	X
Call	PROPN	X
Me	PROPN	PRON
"	PUNCT	PUNCT
e	CCONJ	CCONJ
foi	AUX	AUX
ovacionada	VERB	VERB
.	PUNCT	PUNCT

FOLHA_DOC003084_SENT009
Abriu	VERB	VERB
com	ADP	ADP
"	PUNCT	PUNCT
I've	PROPN	SYM
Got	PROPN	X
You	PROPN	PRON
Under	PROPN	ADP
My	PROPN	DET
Skin	PROPN	PROPN
"	PUNCT	PUNCT
e	CCONJ	CCONJ
teve	VERB	VERB
ótimos	ADJ	ADJ
momentos	NOUN	NOUN
,	PUNCT	PUNCT
como	ADP	ADP
"	PUNCT	PUNCT
The	PROPN	DET
Lady	PROPN	NOUN
is	X	AUX
a	X	DET
Tramp	PROPN	NOUN
"	PUNCT	PUNCT
e	CCONJ	CCONJ
"	PUNCT	PUNCT
They	PROPN	X
Can't	PROPN	X
Take	PROPN	VERB
"	PUNCT	PUNCT
.	PUNCT	PUNCT



In [6]:
dante_gold_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_dante-ud-test.conllu", encoding="utf-8").read())
porttinari_pred_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_dante-ud-test_pred.conllu", encoding="utf-8").read())
# dante_gold_sents = conllu.parse(open("../tmp/dante/model-1b7kowrx-v0/pt_dante-ud-test.conllu", encoding="utf-8").read())
dante_pred_sents = conllu.parse(open("../tmp/dante/model-1b7kowrx-v0/pt_dante-ud-test_pred.conllu", encoding="utf-8").read())
# petrogold_gold_sents = conllu.parse(open("../tmp/petrogold/model-2v9qux4u-v0/pt_petrogold-ud-test.conllu", encoding="utf-8").read())
petrogold_pred_sents = conllu.parse(open("../tmp/petrogold/model-2v9qux4u-v0/pt_dante-ud-test_pred.conllu", encoding="utf-8").read())
multi_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_dante-ud-test_pred.conllu", encoding="utf-8").read())

porttinari_df = get_sent_errors(dante_gold_sents, porttinari_pred_sents)
dante_df = get_sent_errors(dante_gold_sents, dante_pred_sents)
petrogold_df = get_sent_errors(dante_gold_sents, petrogold_pred_sents)
multi_df = get_sent_errors(dante_gold_sents, multi_pred_sents)

porttinari_df["Modelo"] = "porttinari"
dante_df["Modelo"] = "dante"
petrogold_df["Modelo"] = "petrogold"
multi_df["Modelo"] = "Multigênero"
dante_final_df = porttinari_df.append(dante_df, ignore_index=True).append(petrogold_df, ignore_index=True).append(multi_df, ignore_index=True)

for name, df in zip([
    "Porttinari", "DANTE", "PetroGold", "Multigênero"
], [porttinari_df, dante_df, petrogold_df, multi_df]):
    display(name)
    display(df["errors"].value_counts()/df.shape[0])


# new_df = 
fig = px.histogram(dante_final_df, x="errors", barmode="group", color="Modelo", width=800, height=400)
fig.write_image("dante_errors.png", scale=2)


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



'Porttinari'

1     0.273067
2     0.238155
3     0.167082
0     0.107232
4     0.098504
5     0.063591
6     0.019950
7     0.013716
8     0.008728
9     0.004988
10    0.002494
11    0.001247
12    0.001247
Name: errors, dtype: float64

'DANTE'

0    0.734414
1    0.189526
2    0.058603
3    0.012469
4    0.002494
5    0.001247
7    0.001247
Name: errors, dtype: float64

'PetroGold'

2     0.240648
1     0.185786
3     0.158354
4     0.120948
5     0.115960
0     0.062344
6     0.059850
7     0.031172
8     0.009975
9     0.008728
10    0.006234
Name: errors, dtype: float64

'Multigênero'

0    0.724439
1    0.194514
2    0.067332
4    0.006234
3    0.006234
8    0.001247
Name: errors, dtype: float64

In [9]:
petrogold_gold_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_petrogold-ud-test.conllu", encoding="utf-8").read())
porttinari_pred_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_petrogold-ud-test_pred.conllu", encoding="utf-8").read())
dante_pred_sents = conllu.parse(open("../tmp/dante/model-1b7kowrx-v0/pt_petrogold-ud-test_pred.conllu", encoding="utf-8").read())
petrogold_pred_sents = conllu.parse(open("../tmp/petrogold/model-2v9qux4u-v0/pt_petrogold-ud-test_pred.conllu", encoding="utf-8").read())
multi_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_petrogold-ud-test_pred.conllu", encoding="utf-8").read())

porttinari_df = get_sent_errors(petrogold_gold_sents, porttinari_pred_sents)
dante_df = get_sent_errors(petrogold_gold_sents, dante_pred_sents)
petrogold_df = get_sent_errors(petrogold_gold_sents, petrogold_pred_sents)
multi_df = get_sent_errors(petrogold_gold_sents, multi_pred_sents)

porttinari_df["Modelo"] = "porttinari"
dante_df["Modelo"] = "dante"
petrogold_df["Modelo"] = "petrogold"
multi_df["Modelo"] = "Multigênero"
petrogold_final_df = porttinari_df.append(dante_df, ignore_index=True).append(petrogold_df, ignore_index=True).append(multi_df, ignore_index=True)

for name, df in zip([
    "Porttinari", "DANTE", "PetroGold", "Multigênero"
], [porttinari_df, dante_df, petrogold_df, multi_df]):
    display(name)
    display(df["errors"].value_counts()/df.shape[0])


# new_df = 
fig = px.histogram(petrogold_final_df, x="errors", barmode="group", color="Modelo", width=800, height=400)
fig.write_image("petrogold_errors.png", scale=2)


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



'Porttinari'

0     0.483146
1     0.269663
2     0.152809
3     0.056180
4     0.024719
5     0.011236
17    0.002247
Name: errors, dtype: float64

'DANTE'

0     0.366292
1     0.289888
2     0.188764
3     0.078652
4     0.035955
5     0.022472
7     0.011236
6     0.004494
17    0.002247
Name: errors, dtype: float64

'PetroGold'

0     0.802247
1     0.141573
2     0.042697
3     0.006742
4     0.004494
14    0.002247
Name: errors, dtype: float64

'Multigênero'

0     0.782022
1     0.150562
2     0.044944
3     0.015730
4     0.004494
13    0.002247
Name: errors, dtype: float64

In [36]:
petrogold_gold_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_porttinari-ud-test.conllu", encoding="utf-8").read())
porttinari_pred_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
dante_pred_sents = conllu.parse(open("../tmp/dante/model-1b7kowrx-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
petrogold_pred_sents = conllu.parse(open("../tmp/petrogold/model-2v9qux4u-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
multi_pred_sents = conllu.parse(open("../tmp/dante_petrogold/model-3h6mbbp2-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())

porttinari_df = get_sent_errors(petrogold_gold_sents, porttinari_pred_sents)
dante_df = get_sent_errors(petrogold_gold_sents, dante_pred_sents)
petrogold_df = get_sent_errors(petrogold_gold_sents, petrogold_pred_sents)
multi_df = get_sent_errors(petrogold_gold_sents, multi_pred_sents)

porttinari_df["Modelo"] = "porttinari"
dante_df["Modelo"] = "dante"
petrogold_df["Modelo"] = "petrogold"
multi_df["Modelo"] = "Multigênero"
petrogold_final_df = porttinari_df.append(dante_df, ignore_index=True).append(petrogold_df, ignore_index=True).append(multi_df, ignore_index=True)

for name, df in zip([
    "Porttinari", "DANTE", "PetroGold", "Multigênero"
], [porttinari_df, dante_df, petrogold_df, multi_df]):
    display(name)
    display(df["errors"].value_counts()/df.shape[0])


# new_df = 
px.histogram(petrogold_final_df, x="errors", barmode="group", color="Modelo")


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



'Porttinari'

0    0.845324
1    0.132494
2    0.020384
3    0.001799
Name: errors, dtype: float64

'DANTE'

0     0.583333
1     0.281175
2     0.094125
3     0.027578
4     0.006595
5     0.004796
6     0.001199
8     0.000600
13    0.000600
Name: errors, dtype: float64

'PetroGold'

0    0.552158
1    0.314748
2    0.099520
3    0.022782
4    0.007194
5    0.003597
Name: errors, dtype: float64

'Multigênero'

0    0.684652
1    0.233213
2    0.065947
3    0.013789
4    0.002398
Name: errors, dtype: float64

### Porttinari

In [3]:
porttinari_total_correct = porttinari_df[porttinari_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.10f}%".format(100*porttinari_total_correct/porttinari_df.shape[0]))
display(porttinari_df["errors"].value_counts() / porttinari_df.shape[0])
px.histogram(porttinari_df, x="errors")

Total percentage of totally correct sentences: 84.5323741007%


0    0.845324
1    0.132494
2    0.020384
3    0.001799
Name: errors, dtype: float64

### DANTE

In [36]:
dante_total_correct = dante_df[dante_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.4f}%".format(100*dante_total_correct/dante_df.shape[0]))
display(dante_df["errors"].value_counts() / dante_df.shape[0])
px.histogram(dante_df, x="errors")

Total percentage of totally correct sentences: 73.4414%


0    0.734414
1    0.189526
2    0.058603
3    0.012469
4    0.002494
5    0.001247
7    0.001247
Name: errors, dtype: float64

### PetroGold

In [37]:
petrogold_total_correct = petrogold_df[petrogold_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.4f}%".format(100*petrogold_total_correct/petrogold_df.shape[0]))
display(petrogold_df["errors"].value_counts() / petrogold_df.shape[0])
px.histogram(petrogold_df, x="errors")

Total percentage of totally correct sentences: 80.2247%


0     0.802247
1     0.141573
2     0.042697
3     0.006742
4     0.004494
14    0.002247
Name: errors, dtype: float64

## Repetindo análise com melhor modelo multigênero

In [38]:
porttinari_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
dante_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_dante-ud-test_pred.conllu", encoding="utf-8").read())
petrogold_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_petrogold-ud-test_pred.conllu", encoding="utf-8").read())

porttinari_df = get_sent_errors(porttinari_gold_sents, porttinari_pred_sents)
dante_df = get_sent_errors(dante_gold_sents, dante_pred_sents)
petrogold_df = get_sent_errors(petrogold_gold_sents, petrogold_pred_sents)

### Porttinari

In [39]:
porttinari_total_correct = porttinari_df[porttinari_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.10f}%".format(100*porttinari_total_correct/porttinari_df.shape[0]))
display(porttinari_df["errors"].value_counts() / porttinari_df.shape[0])
px.histogram(porttinari_df, x="errors")

Total percentage of totally correct sentences: 83.2733812950%


0    0.832734
1    0.142686
2    0.023381
3    0.001199
Name: errors, dtype: float64

### DANTE

In [40]:
dante_total_correct = dante_df[dante_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.4f}%".format(100*dante_total_correct/dante_df.shape[0]))
display(dante_df["errors"].value_counts() / dante_df.shape[0])
px.histogram(dante_df, x="errors")

Total percentage of totally correct sentences: 72.4439%


0    0.724439
1    0.194514
2    0.067332
4    0.006234
3    0.006234
8    0.001247
Name: errors, dtype: float64

### PetroGold

In [41]:
petrogold_total_correct = petrogold_df[petrogold_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.4f}%".format(100*petrogold_total_correct/petrogold_df.shape[0]))
display(petrogold_df["errors"].value_counts() / petrogold_df.shape[0])
px.histogram(petrogold_df, x="errors")

Total percentage of totally correct sentences: 78.2022%


0     0.782022
1     0.150562
2     0.044944
3     0.015730
4     0.004494
13    0.002247
Name: errors, dtype: float64