In [10]:
from pathlib import Path

import conllu
import plotly.express as px
import pandas as pd
import numpy as np


porttinari_gold_sents = conllu.parse(open("../data/UD_Portuguese-Porttinari/pt_porttinari-ud-test.conllu", encoding="utf-8").read())
porttinari_gold_train_sents = conllu.parse(open("../data/UD_Portuguese-Porttinari/pt_porttinari-ud-train.conllu", encoding="utf-8").read())
train_vocab = set()

for sent in porttinari_gold_train_sents:
    for token in sent:
        if isinstance(token["id"], int):
            train_vocab.add(token["form"])

models_preds = {}
for results_path in Path("../porttinari/outputs/").iterdir():
    model_name = results_path.stem
    models_preds[model_name] = []
    for pred_filename in results_path.glob("*"):
        models_preds[model_name].append( conllu.parse(open(pred_filename, encoding="utf-8").read()) )

In [11]:
for model_name in models_preds.keys():
    assert len(models_preds[model_name]) == 10, f"Error on {model_name} with {len(models_preds[model_name])} files."

In [16]:
from sklearn.metrics import accuracy_score


def get_tags(sents, min_tokens = 0, max_tokens = 60, get_n_sents = False):
    true_tags = []
    n_sents = 0
    for sent in sents:
        tags = []
        n_tokens = 0
        for token in sent:
            if isinstance(token["id"], int):
                tags.append(token["upos"])
                n_tokens += 1
        if n_tokens > min_tokens and n_tokens <= max_tokens:
            true_tags += tags
            n_sents += 1
    if get_n_sents:
        return true_tags, n_sents
    return true_tags

def get_acc_oovs_divided(gold_sents, pred_sents, train_vocab):
    true_tags = get_tags(gold_sents)
    pred_tags = get_tags(pred_sents)
    acc = accuracy_score(true_tags, pred_tags)

    oov_true_tags = []
    oov_pred_tags = []
    noov_true_tags = []
    noov_pred_tags = []
    for true_sent, pred_sent in zip(gold_sents, pred_sents):
        for true_token, pred_token in zip(true_sent, pred_sent):
            if isinstance(true_token["id"], int):
                if true_token["form"] not in train_vocab:
                    oov_true_tags.append(true_token["upos"])
                    oov_pred_tags.append(pred_token["upos"])
                else:
                    noov_true_tags.append(true_token["upos"])
                    noov_pred_tags.append(pred_token["upos"])

    acc_oov = accuracy_score(oov_true_tags, oov_pred_tags)
    acc_noov = accuracy_score(noov_true_tags, noov_pred_tags)

    return acc, acc_oov, acc_noov

results = {
    "model_name": [],
    "Acc total": [],
    "Acc OOV": [],
    "Acc NOOV": [],
}
for model_name in models_preds.keys():
    accs = []
    accs_oov = []
    accs_noov = []
    for i, experiment in enumerate(models_preds[model_name]):
        acc, acc_oov, acc_noov = get_acc_oovs_divided(porttinari_gold_sents, experiment, train_vocab)
        accs.append(acc)
        accs_oov.append(acc_oov)
        accs_noov.append(acc_noov)
    
    results["model_name"].append(model_name)
    mean, std = np.mean(accs, axis=0), np.std(accs, axis=0)
    results["Acc total"].append(f"{100*mean:.4f}  {100*std:.4f}")
    # results["Acc total"].append(f"{100*mean:.4f}")
    mean, std = np.mean(accs_oov, axis=0), np.std(accs_oov, axis=0)
    results["Acc OOV"].append(f"{100*mean:.4f}  {100*std:.4f}")
    # results["Acc OOV"].append(f"{100*mean:.4f}")
    mean, std = np.mean(accs_noov, axis=0), np.std(accs_noov, axis=0)
    results["Acc NOOV"].append(f"{100*mean:.4f}  {100*std:.4f}")
    # results["Acc NOOV"].append(f"{100*mean:.4f}")

results_df = pd.DataFrame(results)

In [17]:
results_df

Unnamed: 0,model_name,Acc total,Acc OOV,Acc NOOV
0,BERTimbau,99.0709 0.0294,96.4264 0.1555,99.2990 0.0269
1,CNCSR,98.0953 0.0639,92.5113 0.2254,98.5769 0.0652
2,DeBERTa-v3,99.0157 0.0429,96.3626 0.2603,99.2446 0.0409
3,Meta-BiLSTM,98.4726 0.0578,94.5308 0.1965,98.8127 0.0721
4,Stanza,98.2172 0.0510,94.7973 0.2057,98.5122 0.0492
5,UDPipe,98.0112 0.0326,91.2462 0.1287,98.5947 0.0291
6,XLM-R,98.9979 0.0404,96.2575 0.1211,99.2342 0.0421


In [35]:
train_sents = conllu.parse(open("../data/UD_Portuguese-Porttinari/pt_porttinari-ud-train.conllu", encoding="utf-8").read())
vocab = set()
for sent in train_sents:
    for token in sent:
        if isinstance(token["id"], int):
            vocab.add(token["form"])
len(vocab)

17117

In [46]:
sents = models_preds["BERTimbau"][0]
min_tokens = 50
max_tokens = 60
get_n_sents = False
true_tags = []
n_sents = 0
sent_ids = []
preds = []
oov_cnt_total = 0
total_tokens = 0
for sent in sents:
    tags = []
    n_tokens = 0
    oov_cnt = 0
    for token in sent:
        if isinstance(token["id"], int):
            tags.append(token["upos"])
            if token["form"] not in vocab:
                oov_cnt += 1
            n_tokens += 1
            
    if n_tokens > min_tokens and n_tokens <= max_tokens:
        true_tags += tags
        n_sents += 1
        sent_ids.append(sent.metadata["sent_id"])
        preds.append(tags)
        total_tokens += n_tokens
        oov_cnt_total += oov_cnt
oov_cnt_total / total_tokens

0.07647058823529412

In [75]:
results = {
    "model_name": [],
    "1 a 10": [],
    "11 a 20": [],
    "21 a 30": [],
    "31 a 40": [],
    "41 a 50": [],
    "51 a 60": [],
}
for model_name in models_preds.keys(): 
    results["model_name"].append(model_name)
    total_accs = []
    for sents in models_preds[model_name]: # Para cada experimento
        accs = []
        for k in range(6): # Para cada intervalo de senteças (10 em 10)
            min_tokens = k * 10
            max_tokens = (k + 1) * 10
            get_n_sents = False
            total_pred_tags = []
            total_true_tags = []
            n_sents = 0
            sent_ids = []
            preds = []
            oov_cnt_total = 0
            total_tokens = 0
            max_size = 0
            for i, sent in enumerate(sents):
                pred_tags = []
                true_tags = []
                n_tokens = 0
                oov_cnt = 0
                for j, token in enumerate(sent):
                    if isinstance(token["id"], int):
                        if token["form"] not in vocab:
                            oov_cnt += 1
                            pred_tags.append(token["upos"])
                            true_tags.append(porttinari_gold_sents[i][j]["upos"])
                        n_tokens += 1
                        
                if n_tokens > min_tokens and n_tokens <= max_tokens:
                    total_pred_tags += pred_tags
                    total_true_tags += true_tags
                    n_sents += 1
                    preds.append(tags)
                    total_tokens += n_tokens
                    oov_cnt_total += oov_cnt
            curr_acc = accuracy_score(total_true_tags, total_pred_tags)
            accs.append(curr_acc)
        total_accs.append(accs)
    mean, std = np.mean(total_accs, axis=0), np.std(total_accs, axis=0)
    # print(mean, std)
    for k in range(6):
        results[f"{(k) * 10 + 1} a {(k+1) * 10}"].append(f"{100*mean[k]:.4f}  {100*std[k]:.4f}")
pd.DataFrame(results)

Unnamed: 0,model_name,1 a 10,11 a 20,21 a 30,31 a 40,41 a 50,51 a 60
0,BERTimbau,91.5315 0.8257,96.4548 0.2452,96.9740 0.3130,96.4717 0.3642,97.0000 3.7859,100.0000 0.0000
1,CNCSR,88.0180 0.7036,92.9340 0.4553,92.4286 0.7114,92.5341 0.3268,90.6667 2.4944,99.2308 2.3077
2,DeBERTa-v3,92.7928 0.9009,96.3733 0.2452,97.0390 0.3664,95.9649 0.3268,97.0000 2.7689,100.0000 0.0000
3,Meta-BiLSTM,88.6486 0.9187,94.5314 0.3166,94.8701 0.3863,95.0097 0.3285,97.0000 1.7951,100.0000 0.0000
4,Stanza,90.7207 0.9054,94.8492 0.2023,94.9351 0.4928,94.9318 0.4613,99.6667 1.0000,100.0000 0.0000
5,UDPipe,84.6847 1.0660,91.2795 0.4250,92.0130 0.5072,91.2281 0.5008,93.0000 1.0000,95.3846 3.7684
6,XLM-R,91.3514 1.6216,96.1369 0.1901,97.0000 0.2987,96.4133 0.2917,99.0000 2.1344,93.0769 2.3077


In [28]:
results_df.groupby("model_name")["acc"].std()

model_name
BERTimbau      0.006829
CNCSR          0.006598
DeBERTa-v3     0.007161
Meta-BiLSTM    0.010007
Stanza         0.006968
UDPipe         0.005064
XLM-R          0.007054
Name: acc, dtype: float64

In [14]:
results_df.to_csv("accuracy_level_porttinari.csv", index=False)

In [3]:
porttinari_gold_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_porttinari-ud-test.conllu", encoding="utf-8").read())
porttinari_pred_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
dante_pred_sents = conllu.parse(open("../tmp/dante/model-1b7kowrx-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
petrogold_pred_sents = conllu.parse(open("../tmp/petrogold/model-2v9qux4u-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
multi_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())

porttinari_df = get_sent_errors(porttinari_gold_sents, porttinari_pred_sents)
dante_df = get_sent_errors(porttinari_gold_sents, dante_pred_sents)
petrogold_df = get_sent_errors(porttinari_gold_sents, petrogold_pred_sents)
multi_df = get_sent_errors(porttinari_gold_sents, multi_pred_sents)

porttinari_df["Modelo"] = "porttinari"
dante_df["Modelo"] = "dante"
petrogold_df["Modelo"] = "petrogold"
multi_df["Modelo"] = "Multigênero"


for name, df in zip([
    "Porttinari", "DANTE", "PetroGold", "Multigênero"
], [porttinari_df, dante_df, petrogold_df, multi_df]):
    display(name)
    display(df["errors"].value_counts()/df.shape[0])

final_df = porttinari_df.append(dante_df, ignore_index=True).append(petrogold_df, ignore_index=True).append(multi_df, ignore_index=True)

# new_df = 
fig = px.histogram(final_df, x="errors", barmode="group", color="Modelo", width=800, height=400)
fig.write_image("porttinari_errors.png", scale=2)
fig.show()

'Porttinari'

0    0.845324
1    0.132494
2    0.020384
3    0.001799
Name: errors, dtype: float64

'DANTE'

0     0.583333
1     0.281175
2     0.094125
3     0.027578
4     0.006595
5     0.004796
6     0.001199
8     0.000600
13    0.000600
Name: errors, dtype: float64

'PetroGold'

0    0.552158
1    0.314748
2    0.099520
3    0.022782
4    0.007194
5    0.003597
Name: errors, dtype: float64

'Multigênero'

0    0.832734
1    0.142686
2    0.023381
3    0.001199
Name: errors, dtype: float64

  final_df = porttinari_df.append(dante_df, ignore_index=True).append(petrogold_df, ignore_index=True).append(multi_df, ignore_index=True)


In [4]:
for sent_id in dante_df[dante_df["errors"] > 6]["sent_id"]:
    for true_sent in porttinari_gold_sents:
        if true_sent.metadata["sent_id"] == sent_id:
            for sent in dante_pred_sents:
                if sent.metadata["sent_id"] == sent_id:
                    print(sent.metadata["sent_id"])
                    for pred_token, gold_token in zip(sent, true_sent):
                        print("{}\t{}\t{}".format(pred_token["form"], gold_token["upos"], pred_token["upos"]))
    print()

FOLHA_DOC003071_SENT011
Em	ADP	ADP
um	NUM	DET
dos	_	_
de	ADP	ADP
os	DET	DET
momentos	NOUN	NOUN
dignos	ADJ	ADJ
de	ADP	ADP
menção	NOUN	NOUN
,	PUNCT	PUNCT
a	DET	DET
backing	X	NOUN
vocal	X	X
Whitney	PROPN	PROPN
se	PRON	PRON
esgoelou	VERB	VERB
ao	_	_
a	ADP	ADP
o	DET	DET
cantar	VERB	VERB
"	PUNCT	PUNCT
How	PROPN	INTJ
Come	PROPN	VERB
You	PROPN	PRON
Dont	PROPN	X
Call	PROPN	X
Me	PROPN	PRON
"	PUNCT	PUNCT
e	CCONJ	CCONJ
foi	AUX	AUX
ovacionada	VERB	VERB
.	PUNCT	PUNCT

FOLHA_DOC003084_SENT009
Abriu	VERB	VERB
com	ADP	ADP
"	PUNCT	PUNCT
I've	PROPN	SYM
Got	PROPN	X
You	PROPN	PRON
Under	PROPN	ADP
My	PROPN	DET
Skin	PROPN	PROPN
"	PUNCT	PUNCT
e	CCONJ	CCONJ
teve	VERB	VERB
ótimos	ADJ	ADJ
momentos	NOUN	NOUN
,	PUNCT	PUNCT
como	ADP	ADP
"	PUNCT	PUNCT
The	PROPN	DET
Lady	PROPN	NOUN
is	X	AUX
a	X	DET
Tramp	PROPN	NOUN
"	PUNCT	PUNCT
e	CCONJ	CCONJ
"	PUNCT	PUNCT
They	PROPN	X
Can't	PROPN	X
Take	PROPN	VERB
"	PUNCT	PUNCT
.	PUNCT	PUNCT



In [6]:
dante_gold_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_dante-ud-test.conllu", encoding="utf-8").read())
porttinari_pred_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_dante-ud-test_pred.conllu", encoding="utf-8").read())
# dante_gold_sents = conllu.parse(open("../tmp/dante/model-1b7kowrx-v0/pt_dante-ud-test.conllu", encoding="utf-8").read())
dante_pred_sents = conllu.parse(open("../tmp/dante/model-1b7kowrx-v0/pt_dante-ud-test_pred.conllu", encoding="utf-8").read())
# petrogold_gold_sents = conllu.parse(open("../tmp/petrogold/model-2v9qux4u-v0/pt_petrogold-ud-test.conllu", encoding="utf-8").read())
petrogold_pred_sents = conllu.parse(open("../tmp/petrogold/model-2v9qux4u-v0/pt_dante-ud-test_pred.conllu", encoding="utf-8").read())
multi_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_dante-ud-test_pred.conllu", encoding="utf-8").read())

porttinari_df = get_sent_errors(dante_gold_sents, porttinari_pred_sents)
dante_df = get_sent_errors(dante_gold_sents, dante_pred_sents)
petrogold_df = get_sent_errors(dante_gold_sents, petrogold_pred_sents)
multi_df = get_sent_errors(dante_gold_sents, multi_pred_sents)

porttinari_df["Modelo"] = "porttinari"
dante_df["Modelo"] = "dante"
petrogold_df["Modelo"] = "petrogold"
multi_df["Modelo"] = "Multigênero"
dante_final_df = porttinari_df.append(dante_df, ignore_index=True).append(petrogold_df, ignore_index=True).append(multi_df, ignore_index=True)

for name, df in zip([
    "Porttinari", "DANTE", "PetroGold", "Multigênero"
], [porttinari_df, dante_df, petrogold_df, multi_df]):
    display(name)
    display(df["errors"].value_counts()/df.shape[0])


# new_df = 
fig = px.histogram(dante_final_df, x="errors", barmode="group", color="Modelo", width=800, height=400)
fig.write_image("dante_errors.png", scale=2)


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



'Porttinari'

1     0.273067
2     0.238155
3     0.167082
0     0.107232
4     0.098504
5     0.063591
6     0.019950
7     0.013716
8     0.008728
9     0.004988
10    0.002494
11    0.001247
12    0.001247
Name: errors, dtype: float64

'DANTE'

0    0.734414
1    0.189526
2    0.058603
3    0.012469
4    0.002494
5    0.001247
7    0.001247
Name: errors, dtype: float64

'PetroGold'

2     0.240648
1     0.185786
3     0.158354
4     0.120948
5     0.115960
0     0.062344
6     0.059850
7     0.031172
8     0.009975
9     0.008728
10    0.006234
Name: errors, dtype: float64

'Multigênero'

0    0.724439
1    0.194514
2    0.067332
4    0.006234
3    0.006234
8    0.001247
Name: errors, dtype: float64

In [9]:
petrogold_gold_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_petrogold-ud-test.conllu", encoding="utf-8").read())
porttinari_pred_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_petrogold-ud-test_pred.conllu", encoding="utf-8").read())
dante_pred_sents = conllu.parse(open("../tmp/dante/model-1b7kowrx-v0/pt_petrogold-ud-test_pred.conllu", encoding="utf-8").read())
petrogold_pred_sents = conllu.parse(open("../tmp/petrogold/model-2v9qux4u-v0/pt_petrogold-ud-test_pred.conllu", encoding="utf-8").read())
multi_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_petrogold-ud-test_pred.conllu", encoding="utf-8").read())

porttinari_df = get_sent_errors(petrogold_gold_sents, porttinari_pred_sents)
dante_df = get_sent_errors(petrogold_gold_sents, dante_pred_sents)
petrogold_df = get_sent_errors(petrogold_gold_sents, petrogold_pred_sents)
multi_df = get_sent_errors(petrogold_gold_sents, multi_pred_sents)

porttinari_df["Modelo"] = "porttinari"
dante_df["Modelo"] = "dante"
petrogold_df["Modelo"] = "petrogold"
multi_df["Modelo"] = "Multigênero"
petrogold_final_df = porttinari_df.append(dante_df, ignore_index=True).append(petrogold_df, ignore_index=True).append(multi_df, ignore_index=True)

for name, df in zip([
    "Porttinari", "DANTE", "PetroGold", "Multigênero"
], [porttinari_df, dante_df, petrogold_df, multi_df]):
    display(name)
    display(df["errors"].value_counts()/df.shape[0])


# new_df = 
fig = px.histogram(petrogold_final_df, x="errors", barmode="group", color="Modelo", width=800, height=400)
fig.write_image("petrogold_errors.png", scale=2)


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



'Porttinari'

0     0.483146
1     0.269663
2     0.152809
3     0.056180
4     0.024719
5     0.011236
17    0.002247
Name: errors, dtype: float64

'DANTE'

0     0.366292
1     0.289888
2     0.188764
3     0.078652
4     0.035955
5     0.022472
7     0.011236
6     0.004494
17    0.002247
Name: errors, dtype: float64

'PetroGold'

0     0.802247
1     0.141573
2     0.042697
3     0.006742
4     0.004494
14    0.002247
Name: errors, dtype: float64

'Multigênero'

0     0.782022
1     0.150562
2     0.044944
3     0.015730
4     0.004494
13    0.002247
Name: errors, dtype: float64

In [36]:
petrogold_gold_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_porttinari-ud-test.conllu", encoding="utf-8").read())
porttinari_pred_sents = conllu.parse(open("../tmp/porttinari/model-31361v0k-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
dante_pred_sents = conllu.parse(open("../tmp/dante/model-1b7kowrx-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
petrogold_pred_sents = conllu.parse(open("../tmp/petrogold/model-2v9qux4u-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
multi_pred_sents = conllu.parse(open("../tmp/dante_petrogold/model-3h6mbbp2-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())

porttinari_df = get_sent_errors(petrogold_gold_sents, porttinari_pred_sents)
dante_df = get_sent_errors(petrogold_gold_sents, dante_pred_sents)
petrogold_df = get_sent_errors(petrogold_gold_sents, petrogold_pred_sents)
multi_df = get_sent_errors(petrogold_gold_sents, multi_pred_sents)

porttinari_df["Modelo"] = "porttinari"
dante_df["Modelo"] = "dante"
petrogold_df["Modelo"] = "petrogold"
multi_df["Modelo"] = "Multigênero"
petrogold_final_df = porttinari_df.append(dante_df, ignore_index=True).append(petrogold_df, ignore_index=True).append(multi_df, ignore_index=True)

for name, df in zip([
    "Porttinari", "DANTE", "PetroGold", "Multigênero"
], [porttinari_df, dante_df, petrogold_df, multi_df]):
    display(name)
    display(df["errors"].value_counts()/df.shape[0])


# new_df = 
px.histogram(petrogold_final_df, x="errors", barmode="group", color="Modelo")


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



'Porttinari'

0    0.845324
1    0.132494
2    0.020384
3    0.001799
Name: errors, dtype: float64

'DANTE'

0     0.583333
1     0.281175
2     0.094125
3     0.027578
4     0.006595
5     0.004796
6     0.001199
8     0.000600
13    0.000600
Name: errors, dtype: float64

'PetroGold'

0    0.552158
1    0.314748
2    0.099520
3    0.022782
4    0.007194
5    0.003597
Name: errors, dtype: float64

'Multigênero'

0    0.684652
1    0.233213
2    0.065947
3    0.013789
4    0.002398
Name: errors, dtype: float64

### Porttinari

In [3]:
porttinari_total_correct = porttinari_df[porttinari_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.10f}%".format(100*porttinari_total_correct/porttinari_df.shape[0]))
display(porttinari_df["errors"].value_counts() / porttinari_df.shape[0])
px.histogram(porttinari_df, x="errors")

Total percentage of totally correct sentences: 84.5323741007%


0    0.845324
1    0.132494
2    0.020384
3    0.001799
Name: errors, dtype: float64

### DANTE

In [36]:
dante_total_correct = dante_df[dante_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.4f}%".format(100*dante_total_correct/dante_df.shape[0]))
display(dante_df["errors"].value_counts() / dante_df.shape[0])
px.histogram(dante_df, x="errors")

Total percentage of totally correct sentences: 73.4414%


0    0.734414
1    0.189526
2    0.058603
3    0.012469
4    0.002494
5    0.001247
7    0.001247
Name: errors, dtype: float64

### PetroGold

In [37]:
petrogold_total_correct = petrogold_df[petrogold_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.4f}%".format(100*petrogold_total_correct/petrogold_df.shape[0]))
display(petrogold_df["errors"].value_counts() / petrogold_df.shape[0])
px.histogram(petrogold_df, x="errors")

Total percentage of totally correct sentences: 80.2247%


0     0.802247
1     0.141573
2     0.042697
3     0.006742
4     0.004494
14    0.002247
Name: errors, dtype: float64

## Repetindo análise com melhor modelo multigênero

In [38]:
porttinari_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_porttinari-ud-test_pred.conllu", encoding="utf-8").read())
dante_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_dante-ud-test_pred.conllu", encoding="utf-8").read())
petrogold_pred_sents = conllu.parse(open("../tmp/porttinari_dante_petrogold/model-3nr9mytc-v0/pt_petrogold-ud-test_pred.conllu", encoding="utf-8").read())

porttinari_df = get_sent_errors(porttinari_gold_sents, porttinari_pred_sents)
dante_df = get_sent_errors(dante_gold_sents, dante_pred_sents)
petrogold_df = get_sent_errors(petrogold_gold_sents, petrogold_pred_sents)

### Porttinari

In [39]:
porttinari_total_correct = porttinari_df[porttinari_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.10f}%".format(100*porttinari_total_correct/porttinari_df.shape[0]))
display(porttinari_df["errors"].value_counts() / porttinari_df.shape[0])
px.histogram(porttinari_df, x="errors")

Total percentage of totally correct sentences: 83.2733812950%


0    0.832734
1    0.142686
2    0.023381
3    0.001199
Name: errors, dtype: float64

### DANTE

In [40]:
dante_total_correct = dante_df[dante_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.4f}%".format(100*dante_total_correct/dante_df.shape[0]))
display(dante_df["errors"].value_counts() / dante_df.shape[0])
px.histogram(dante_df, x="errors")

Total percentage of totally correct sentences: 72.4439%


0    0.724439
1    0.194514
2    0.067332
4    0.006234
3    0.006234
8    0.001247
Name: errors, dtype: float64

### PetroGold

In [41]:
petrogold_total_correct = petrogold_df[petrogold_df["errors"] == 0].shape[0]
print("Total percentage of totally correct sentences: {:.4f}%".format(100*petrogold_total_correct/petrogold_df.shape[0]))
display(petrogold_df["errors"].value_counts() / petrogold_df.shape[0])
px.histogram(petrogold_df, x="errors")

Total percentage of totally correct sentences: 78.2022%


0     0.782022
1     0.150562
2     0.044944
3     0.015730
4     0.004494
13    0.002247
Name: errors, dtype: float64