## Inspecting result CSV files

In [1]:
import os, os.path
import re

from pandas import DataFrame as df
from pandas import read_csv
import pandas

In [2]:
input_dir = 'edt_2.6'   # Experiments done on Estonian Dependency Treebank version 2.6

for fname in os.listdir(input_dir):
    if fname.startswith('results_') and fname.endswith('.csv'):
        print(fname)

results_crossvalidation.csv
results_full_data_malt_udpipe1.csv
results_gap_experiments.csv
results_gold_and_auto_ud_morph.csv
results_half_data.csv
results_smaller_data.csv
results_stanza_basic.csv
results_stanza_ME_error_types.csv
results_stanza_ME_on_clauses.csv
results_stanza_ME_on_clauses_error_types.csv
results_stanza_ME_sketches_5groups_knockout.csv
results_stanza_ME_sketches_5groups_knockout_matrix.csv
results_stanza_ME_sketches_5randomgroups_knockout.csv
results_stanza_ME_sketches_5randomgroups_knockout_matrix.csv


In [3]:
pandas.set_option('display.max_colwidth', None)

In [4]:
# MaltParser and UDPipe-1 results
read_csv('edt_2.6/results_full_data_malt_udpipe1.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_malt_morph_extended_full,0.7136,0.8423,0.1288,0.7722,0.8744
1,eval_udpipe1_default_morph_extended,0.7477,0.842,0.0943,0.8009,0.879
2,eval_udpipe1_embeddings_morph_extended,0.7627,0.9112,0.1484,0.8127,0.9331


In [5]:
# Basic Stanza parser results
read_csv('edt_2.6/results_stanza_basic.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_morph_analysis_full_data,0.8458,0.9178,0.072,0.8807,0.9406
1,eval_stanza_morph_extended_full_data,0.8437,0.912,0.0684,0.8786,0.9362


In [6]:
# Stanza parser error types on test set
read_csv('edt_2.6/results_stanza_ME_error_types.csv')

Unnamed: 0,experiment,E1,E2,E3,E1_impact,E2_impact,E3_impact,E1_rel_error,E2_rel_error,E3_rel_error,total_no_punct,correct,gold_in_clause,gold_out_of_clause,total_words,punct,unequal_length
0,stanza_ME_error_types_on_test,4821,398,1162,0.7555,0.0624,0.1821,0.1413,0.0117,0.1733,40815,34434,34108,6707,48491,7676,0


In [7]:
# Stanza parser crossvalidation
read_csv('edt_2.6/results_crossvalidation.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_morph_extended_crossvalidation_01,0.8435,0.924,0.0805,0.8781,0.946
1,eval_stanza_morph_extended_crossvalidation_02,0.8431,0.9161,0.0731,0.877,0.9395
2,eval_stanza_morph_extended_crossvalidation_03,0.8404,0.9221,0.0818,0.8757,0.9447
3,eval_stanza_morph_extended_crossvalidation_04,0.8414,0.921,0.0796,0.8762,0.9436
4,eval_stanza_morph_extended_crossvalidation_05,0.8466,0.9258,0.0792,0.88,0.9469
5,eval_stanza_morph_extended_crossvalidation_06,0.8421,0.9147,0.0727,0.8774,0.9388
6,eval_stanza_morph_extended_crossvalidation_07,0.8416,0.9144,0.0728,0.8762,0.9384
7,eval_stanza_morph_extended_crossvalidation_08,0.8428,0.9244,0.0816,0.878,0.9461
8,eval_stanza_morph_extended_crossvalidation_09,0.8433,0.919,0.0757,0.8787,0.9416
9,eval_stanza_morph_extended_crossvalidation_10,0.8452,0.9273,0.082,0.8803,0.9484


In [8]:
# Stanza parser gap experiments, trained on full data, results
read_csv('edt_2.6/results_gap_experiments.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_morph_extended_gap_experiments_01_no_wordforms,0.8338,0.9171,0.0833,0.8737,0.9439
1,eval_stanza_morph_extended_gap_experiments_02_no_lemmas,0.8431,0.9251,0.082,0.8793,0.9476
2,eval_stanza_morph_extended_gap_experiments_02_no_pos,0.8438,0.9232,0.0794,0.878,0.9455
3,eval_stanza_morph_extended_gap_experiments_03_no_wordforms_adj_noun_lemmas,0.8111,0.902,0.0909,0.8597,0.9364
4,eval_stanza_morph_extended_gap_experiments_04_no_wordforms_verb_adpos_lemmas,0.7777,0.903,0.1253,0.8201,0.9344
5,eval_stanza_morph_extended_gap_experiments_05_only_cg_list_wordforms_lemmas,0.8109,0.9083,0.0974,0.86,0.9417
6,eval_stanza_morph_extended_gap_experiments_06_no_wordform_lemma_pos_keep_conj,0.6869,0.81,0.123,0.7572,0.8701
7,eval_stanza_morph_extended_gap_experiments_07_no_wordform_lemma_pos,0.6723,0.7648,0.0925,0.7532,0.8351
8,eval_stanza_morph_extended_gap_experiments_08_only_wordforms,0.8341,0.9144,0.0804,0.8737,0.937
9,eval_stanza_morph_extended_gap_experiments_09_only_pos_feats,0.6923,0.8068,0.1145,0.7742,0.8769


In [9]:
# Stanza parser trained on half-data (i.e training data halved)
read_csv('edt_2.6/results_half_data.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_morph_extended_half_data_001,0.8239,0.9412,0.1173,0.863,0.9551
1,eval_stanza_morph_extended_half_data_002,0.8216,0.9144,0.0928,0.8606,0.9371
2,eval_stanza_morph_extended_half_data_003,0.8218,0.9415,0.1197,0.8603,0.9553
3,eval_stanza_morph_extended_half_data_004,0.8208,0.9338,0.113,0.858,0.9501
4,eval_stanza_morph_extended_half_data_005,0.8211,0.9245,0.1034,0.8609,0.943
5,eval_stanza_morph_extended_half_data_006,0.8178,0.9205,0.1027,0.858,0.9408
6,eval_stanza_morph_extended_half_data_007,0.8214,0.9221,0.1007,0.8601,0.9421
7,eval_stanza_morph_extended_half_data_008,0.823,0.9307,0.1078,0.862,0.9477
8,eval_stanza_morph_extended_half_data_009,0.8248,0.933,0.1083,0.863,0.9493
9,eval_stanza_morph_extended_half_data_010,0.8189,0.922,0.1031,0.8577,0.9423


In [10]:
# Stanza parser trained while incrementally increasing training set sizes
# 01_08_only_wordforms                -- trained on form (text); deleted: lemma, upos, xpos, feats;
# 02_keep_all                         -- trained on all fields: form (text), lemma, upos, xpos, feats;
# 03_only_pos_feats_09_only_pos_feats -- trained on upos, xpos, feats; deleted: form(text), lemma;
read_csv('edt_2.6/results_smaller_data.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train,test_words,train_words
0,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_010,0.6699,0.9456,0.2757,0.7577,0.9574,40815,33549
1,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_020,0.7386,0.961,0.2224,0.7994,0.9686,40815,65394
2,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_030,0.7723,0.9314,0.1592,0.8258,0.9462,40815,98696
3,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_040,0.7903,0.9321,0.1418,0.8376,0.9483,40815,132067
4,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_050,0.8075,0.917,0.1095,0.8537,0.9376,40815,163644
5,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_060,0.8177,0.9241,0.1064,0.8586,0.9432,40815,197109
6,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_070,0.8162,0.9115,0.0954,0.8588,0.9343,40815,229138
7,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_080,0.825,0.9131,0.0881,0.8661,0.9356,40815,260607
8,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_090,0.8274,0.9084,0.081,0.8674,0.9336,40815,292517
9,eval_stanza_morph_extended_smaller_data_01_08_only_wordforms_100,0.8313,0.904,0.0727,0.8724,0.9297,40815,325531


In [11]:
# Stanza parser trained on UD annotations:
# ud_auto -- UD annotations automatically converted from morph_analysis layer;
# ud_gold -- original gold standard UD annotations from the corpus;
read_csv('edt_2.6/results_gold_and_auto_ud_morph.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_ud_auto_morph_full_data,0.8472,0.9284,0.0812,0.8809,0.9493
1,eval_stanza_ud_gold_morph_full_data,0.877,0.9347,0.0578,0.898,0.9493


In [12]:
# Stanza trained and evaluated on the latest version of the corpus (EDT 2.11)
read_csv('edt_2.11/results_stanza_basic.csv')

Unnamed: 0,experiment,LAS_test,LAS_train,LAS_gap,UAS_test,UAS_train
0,eval_stanza_morph_extended_full_data,0.8442,0.92,0.0758,0.8786,0.9418
