## Inspecting result CSV files

In [1]:
import os, os.path
import re

from pandas import DataFrame as df
from pandas import read_csv
import pandas

In [2]:
input_dir = 'edt_2.6'   # Experiments done on Estonian Dependency Treebank version 2.6

for fname in os.listdir(input_dir):
    if fname.startswith('results_') and fname.endswith('.csv'):
        print(fname)

results_crossvalidation.csv
results_ensemble_majority_voting.csv
results_full_data_malt_udpipe1.csv
results_gap_experiments.csv
results_gold_and_auto_ud_morph.csv
results_half_data.csv
results_smaller_data.csv
results_stanza_basic.csv
results_stanza_ME_error_types.csv
results_stanza_ME_on_clauses.csv
results_stanza_ME_on_clauses_error_types.csv
results_stanza_ME_sketches_5groups_knockout.csv
results_stanza_ME_sketches_5groups_knockout_matrix.csv
results_stanza_ME_sketches_5randomgroups_knockout.csv
results_stanza_ME_sketches_5randomgroups_knockout_matrix.csv


In [3]:
pandas.set_option('display.max_colwidth', None)

###  MaltParser and UDPipe-1

In [4]:
# MaltParser and UDPipe-1 results
read_csv('edt_2.6/results_full_data_malt_udpipe1.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_malt_morph_extended_full,0.7186,0.8446,0.126,0.7681,0.8715
1,eval_udpipe1_default_morph_extended,0.756,0.8501,0.0941,0.8009,0.8811
2,eval_udpipe1_embeddings_morph_extended,0.7713,0.9147,0.1434,0.8135,0.933


###  Basic stanza (morph_analysis, morph_extended)

In [5]:
# Basic Stanza parser results
read_csv('edt_2.6/results_stanza_basic.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train,test_words,train_words
0,eval_stanza_morph_analysis_full_data,0.8507,0.9231,0.0724,0.8802,0.9422,48491,389278
1,eval_stanza_morph_extended_full_data,0.8486,0.9176,0.0689,0.8782,0.9378,48491,389278


In [6]:
# Stanza parser error types on test set
read_csv('edt_2.6/results_stanza_ME_error_types.csv')

Unnamed: 0,experiment,E1,E2,E3,E1_impact,E2_impact,E3_impact,E1_rel_error,E2_rel_error,E3_rel_error,total_no_punct,correct,gold_in_clause,gold_out_of_clause,total_words,punct,unequal_length
0,stanza_ME_error_types_on_test,4821,398,1162,0.7555,0.0624,0.1821,0.1413,0.0117,0.1733,40815,34434,34108,6707,48491,7676,0


### Stanza with crossvalidation

In [7]:
# Stanza parser crossvalidation
read_csv('edt_2.6/results_crossvalidation.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_morph_extended_crossvalidation_01,0.8485,0.9292,0.0807,0.8778,0.9476
1,eval_stanza_morph_extended_crossvalidation_02,0.848,0.9217,0.0737,0.8767,0.9412
2,eval_stanza_morph_extended_crossvalidation_03,0.846,0.9274,0.0813,0.876,0.9463
3,eval_stanza_morph_extended_crossvalidation_04,0.8468,0.9263,0.0795,0.8763,0.9452
4,eval_stanza_morph_extended_crossvalidation_05,0.8514,0.9308,0.0793,0.8797,0.9484
5,eval_stanza_morph_extended_crossvalidation_06,0.8473,0.9203,0.073,0.8772,0.9405
6,eval_stanza_morph_extended_crossvalidation_07,0.8465,0.9201,0.0735,0.8759,0.9402
7,eval_stanza_morph_extended_crossvalidation_08,0.8482,0.9297,0.0815,0.8779,0.9479
8,eval_stanza_morph_extended_crossvalidation_09,0.8482,0.9241,0.0758,0.8782,0.943
9,eval_stanza_morph_extended_crossvalidation_10,0.8503,0.9322,0.0819,0.88,0.9499


### Stanza ablation experiments

In [8]:
# Stanza parser gap experiments, trained on full data, results
read_csv('edt_2.6/results_gap_experiments.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_morph_extended_gap_experiments_01_no_wordforms,0.8396,0.9236,0.084,0.8734,0.9461
1,eval_stanza_morph_extended_gap_experiments_02_no_lemmas,0.8483,0.9308,0.0825,0.8789,0.9496
2,eval_stanza_morph_extended_gap_experiments_02_no_pos,0.8492,0.9284,0.0792,0.8781,0.9471
3,eval_stanza_morph_extended_gap_experiments_03_no_wordforms_adj_noun_lemmas,0.82,0.9109,0.0909,0.861,0.9396
4,eval_stanza_morph_extended_gap_experiments_04_no_wordforms_verb_adpos_lemmas,0.7822,0.9106,0.1285,0.8181,0.937
5,eval_stanza_morph_extended_gap_experiments_05_only_cg_list_wordforms_lemmas,0.8158,0.9153,0.0995,0.8572,0.9433
6,eval_stanza_morph_extended_gap_experiments_06_no_wordform_lemma_pos_keep_conj,0.6869,0.81,0.123,0.7572,0.8701
7,eval_stanza_morph_extended_gap_experiments_07_no_wordform_lemma_pos,0.6723,0.7648,0.0925,0.7532,0.8351
8,eval_stanza_morph_extended_gap_experiments_08_only_wordforms,0.8407,0.9202,0.0795,0.8742,0.9391
9,eval_stanza_morph_extended_gap_experiments_09_only_pos_feats,0.7018,0.8213,0.1195,0.7709,0.8799


### Stanza half training data experiments

In [9]:
# Stanza parser trained on half-data (i.e training data halved)
read_csv('edt_2.6/results_half_data.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_morph_extended_half_data_001,0.8295,0.9448,0.1153,0.8625,0.9564
1,eval_stanza_morph_extended_half_data_002,0.8278,0.9203,0.0925,0.8607,0.9392
2,eval_stanza_morph_extended_half_data_003,0.8276,0.9453,0.1177,0.8602,0.9568
3,eval_stanza_morph_extended_half_data_004,0.8265,0.9384,0.1119,0.858,0.9521
4,eval_stanza_morph_extended_half_data_005,0.8264,0.9289,0.1024,0.8601,0.9443
5,eval_stanza_morph_extended_half_data_006,0.8248,0.9255,0.1007,0.8586,0.9425
6,eval_stanza_morph_extended_half_data_007,0.8275,0.9271,0.0996,0.8602,0.9438
7,eval_stanza_morph_extended_half_data_008,0.8287,0.9353,0.1065,0.8617,0.9495
8,eval_stanza_morph_extended_half_data_009,0.8304,0.9368,0.1065,0.8627,0.9504
9,eval_stanza_morph_extended_half_data_010,0.8252,0.9275,0.1023,0.8579,0.9445


### Stanza increasing training data experiments

In [10]:
# Stanza parser trained while incrementally increasing training set sizes
# 01_08_only_wordforms                -- trained on form (text); deleted: lemma, upos, xpos, feats;
# 02_keep_all                         -- trained on all fields: form (text), lemma, upos, xpos, feats;
# 03_only_pos_feats_09_only_pos_feats -- trained on upos, xpos, feats; deleted: form(text), lemma;
read_csv('edt_2.6/results_smaller_data.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train,test_words,train_words
0,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_010,0.6864,0.9491,0.2628,0.7605,0.9591,48491,39886
1,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_020,0.749,0.9636,0.2146,0.8004,0.97,48491,77759
2,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_030,0.7819,0.9365,0.1546,0.8271,0.9489,48491,117642
3,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_040,0.7989,0.9366,0.1377,0.8388,0.9502,48491,157491
4,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_050,0.8152,0.9234,0.1082,0.8543,0.9406,48491,195477
5,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_060,0.825,0.9299,0.1049,0.8595,0.9458,48491,235468
6,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_070,0.8241,0.9179,0.0938,0.86,0.937,48491,273422
7,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_080,0.8327,0.9194,0.0867,0.8674,0.9383,48491,311383
8,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_090,0.8343,0.9151,0.0808,0.868,0.9362,48491,349322
9,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_100,0.8377,0.9104,0.0728,0.8723,0.932,48491,389278


### Ensemble of Stanza's models

In [11]:
# Ensemble results with aggregation_algorithm = 'las_coherence' (default aggregation method)
crossvalidation = read_csv('edt_2.6/results_crossvalidation.csv')
cv_ensemble = crossvalidation.loc[crossvalidation['experiment'].str.contains('ensemble')]
half_data = read_csv('edt_2.6/results_half_data.csv')
half_data_ensemble = half_data.loc[half_data['experiment'].str.contains('ensemble')]
pandas.concat([cv_ensemble,half_data_ensemble])

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
11,eval_stanza_ensemble_tagger_morph_extended_default_full_data,0.8568,0.9337,0.0769,0.8851,0.9515
11,eval_stanza_morph_extended_half_data_ensemble,0.8446,0.9151,0.0705,0.876,0.9344


In [12]:
# Ensemble results with aggregation_algorithm = 'majority_voting'
read_csv('edt_2.6/results_ensemble_majority_voting.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_morph_extended_full_data_ensemble_majority_voting,0.8571,0.9338,0.0767,0.8854,0.9517
1,eval_stanza_morph_extended_half_data_ensemble_majority_voting,0.846,0.9131,0.0671,0.8768,0.9326


### Stanza on UD morph analysis (auto, gold)

In [13]:
# Stanza parser trained on UD annotations:
# ud_auto -- UD annotations automatically converted from morph_analysis layer;
# ud_gold -- original gold standard UD annotations from the corpus;
read_csv('edt_2.6/results_gold_and_auto_ud_morph.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_ud_auto_morph_full_data,0.8523,0.9333,0.081,0.8808,0.9508
1,eval_stanza_ud_gold_morph_full_data,0.881,0.938,0.057,0.8987,0.9502


### Stanza basic on the latest version of the corpus (2.11)

In [14]:
# Stanza trained and evaluated on the latest version of the corpus (EDT 2.11)
read_csv('edt_2.11/results_stanza_basic.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_morph_extended_full_data,0.8484,0.9247,0.0764,0.8775,0.943
