In [16]:
try:
    from sklearn.metrics import cohen_kappa_score
except Exception as e:
    print(str(e))
    ! pip3 install sklearn
import os
if not os.path.isfile("estrutura_ud.py"):
    ! wget https://raw.githubusercontent.com/alvelvis/ACDC-UD/master/estrutura_ud.py
import estrutura_ud
import itertools
import json
import pprint

In [61]:
annotators = ["Aline", "Elvis", "Tati", "Wogue", "Maria_Clara"]
attributes = ["lemma", "feats", "dephead", "deprel", "upos"]
file_name = "50R"

In [62]:
# Load files
annotators_files = {}
system_file = 0
golden_file = 0
for annotator in annotators:
    if os.path.isfile("{}_{}.conllu".format(file_name, annotator)):
        corpus = estrutura_ud.Corpus()
        corpus.load("{}_{}.conllu".format(file_name, annotator))
        annotators_files[annotator] = corpus
if os.path.isfile("{}_golden.conllu".format(file_name)):
    corpus = estrutura_ud.Corpus()
    corpus.load("{}_golden.conllu".format(file_name))
    golden_file = corpus
if os.path.isfile("{}_system.conllu".format(file_name)):
    corpus = estrutura_ud.Corpus()
    corpus.load("{}_system.conllu".format(file_name))
    system_file = corpus
has_system_and_golden = system_file and golden_file

build: 0.16484498977661133build: 0.03490161895751953build: 0.27196502685546875build: 0.020577430725097656

In [63]:
# Remove sentences with tokenization issues
if has_system_and_golden:
    golden_sentences = set([x for x in golden_file.sentences])
    for annotator in annotators_files:
        for sent_id in list(annotators_files[annotator].sentences.keys()):
            if not sent_id in golden_sentences:
                del annotators_files[annotator].sentences[sent_id]
                print("Not exist anymore: {} Annotator {} sent_id {}".format(file_name, annotator, sent_id))
            else:
                if sent_id not in golden_file.sentences or sent_id not in system_file.sentences:
                    del annotators_files[annotator].sentences[sent_id]
                    print("Not in golden or system: {} Annotator {} sent_id {}".format(file_name, annotator, sent_id))
                else:
                    if len(golden_file.sentences[sent_id].tokens) != len(system_file.sentences[sent_id].tokens):
                        del annotators_files[annotator].sentences[sent_id]
                        print("Different n. of tokens: {} Annotator {} sent_id {}".format(file_name, annotator, sent_id))

In [64]:
# Experiment
statistics = {'tokens': 0, 'sentences': 0, 'tokens_changed': 0}
people = {annotator: {'tokens_changed': 0, 'correct_changes': 0, 'tokens_correct': 0} for annotator in annotators_files}
correctedness = {annotator: 0 for annotator in annotators_files}

def have_same_attributes(token_1, token_2, attributes=attributes):
    return [token_1.__dict__[x] for x in attributes] == [token_2.__dict__[x] for x in attributes]

for i, annotator in enumerate(annotators_files):
    if i == 0:
        for sent_id, sentence in annotators_files[annotator].sentences.items():
            statistics['sentences'] += 1
            for t, token in enumerate(sentence.tokens):
                if not '-' in token.id:
                    statistics['tokens'] += 1
                    if has_system_and_golden:
                        if not have_same_attributes(golden_file.sentences[sent_id].tokens[t], system_file.sentences[sent_id].tokens[t]):
                            statistics['tokens_changed'] += 1
    if has_system_and_golden:
        for sent_id, sentence in annotators_files[annotator].sentences.items():
            for t, token in enumerate(sentence.tokens):
                if not '-' in token.id:
                    if have_same_attributes(token, golden_file.sentences[sent_id].tokens[t]):
                        people[annotator]['tokens_correct'] += 1
                    if not have_same_attributes(system_file.sentences[sent_id].tokens[t], token):
                        people[annotator]['tokens_changed'] += 1
                        if have_same_attributes(token, golden_file.sentences[sent_id].tokens[t]):
                            people[annotator]['correct_changes'] += 1

kappa = {}
kappa_changed = {}
concordance_by_col = {}
observed_concordance = {}
for combination in itertools.combinations(list(annotators_files.keys()), r=2):
    kappa[combination] = {}
    kappa_changed[combination] = {}
    concordance_by_col[combination] = {}
    observed_concordance[combination] = {}
    tags = {}
    tags_changed = {}
    tags_by_col = {}
    for annotator in combination:
        tags[annotator] = {}
        tags_changed[annotator] = {}
        tags_by_col[annotator] = {}
        for attribute in attributes:
            tags[annotator][attribute] = []
            tags_changed[annotator][attribute] = []
        for sent_id, sentence in annotators_files[annotator].sentences.items():
            for t, token in enumerate(sentence.tokens):
                if not '-' in token.id:
                    for attribute in attributes:
                        tags[annotator][attribute].append(token.__dict__[attribute])
                        if has_system_and_golden:
                            if not have_same_attributes(golden_file.sentences[sent_id].tokens[t], system_file.sentences[sent_id].tokens[t]):
                                tags_changed[annotator][attribute].append(token.__dict__[attribute])
                    if has_system_and_golden:
                        if not golden_file.sentences[sent_id].tokens[t].upos in tags_by_col[annotator]:
                            tags_by_col[annotator][golden_file.sentences[sent_id].tokens[t].upos] = []
                        tags_by_col[annotator][golden_file.sentences[sent_id].tokens[t].upos].append(token.deprel)
    for attribute in attributes:
        kappa[combination][attribute] = cohen_kappa_score(tags[combination[0]][attribute], tags[combination[1]][attribute])
        if has_system_and_golden:
            kappa_changed[combination][attribute] = cohen_kappa_score(tags_changed[combination[0]][attribute], tags_changed[combination[1]][attribute])
        observed_concordance[combination][attribute] = len([x for i, x in enumerate(tags[combination[0]][attribute]) if tags[combination[1]][attribute][i] == x]) / len(tags[combination[0]][attribute])
    for tag in tags_by_col[combination[0]]:
        concordance_by_col[combination][tag] = len([x for i, x in enumerate(tags_by_col[combination[0]][tag]) if tags_by_col[combination[1]][tag][i] == x]) / len(tags_by_col[combination[0]][tag])

In [65]:
print(file_name)
print(json.dumps(statistics, ensure_ascii=False, indent=4))
if has_system_and_golden:
    for annotator in annotators_files:
        people[annotator]['_precision'] = people[annotator]['correct_changes'] / people[annotator]['tokens_changed'] if people[annotator]['tokens_changed'] else 0
        people[annotator]['_recall'] = people[annotator]['correct_changes'] / statistics['tokens_changed'] if people[annotator]['tokens_changed'] else 0
        people[annotator]['_F1'] = 2 * (people[annotator]['_precision'] * people[annotator]['_recall']) / (people[annotator]['_precision'] + people[annotator]['_recall']) if people[annotator]['tokens_changed'] else 0
    print(json.dumps(people, ensure_ascii=False, indent=4))

50R
{
    "tokens": 762,
    "sentences": 20,
    "tokens_changed": 0
}


In [66]:
pprint.pprint(observed_concordance)

{('Aline', 'Elvis'): {'dephead': 0.931758530183727,
                      'deprel': 0.9514435695538058,
                      'feats': 0.9868766404199475,
                      'lemma': 0.9908136482939632,
                      'upos': 0.979002624671916},
 ('Aline', 'Tati'): {'dephead': 0.9448818897637795,
                     'deprel': 0.958005249343832,
                     'feats': 0.9934383202099738,
                     'lemma': 0.994750656167979,
                     'upos': 0.9881889763779528},
 ('Aline', 'Wogue'): {'dephead': 0.9461942257217848,
                      'deprel': 0.952755905511811,
                      'feats': 0.989501312335958,
                      'lemma': 0.9908136482939632,
                      'upos': 0.9803149606299213},
 ('Elvis', 'Tati'): {'dephead': 0.9278215223097113,
                     'deprel': 0.9448818897637795,
                     'feats': 0.9855643044619422,
                     'lemma': 0.9855643044619422,
                     'upos': 0.975

In [67]:
pprint.pprint(concordance_by_col)

{('Aline', 'Elvis'): {},
 ('Aline', 'Tati'): {},
 ('Aline', 'Wogue'): {},
 ('Elvis', 'Tati'): {},
 ('Elvis', 'Wogue'): {},
 ('Tati', 'Wogue'): {}}


In [68]:
pprint.pprint(kappa)

{('Aline', 'Elvis'): {'dephead': 0.9301881839308084,
                      'deprel': 0.9455555577011832,
                      'feats': 0.9841041035194182,
                      'lemma': 0.9904510765363939,
                      'upos': 0.9744296910883531},
 ('Aline', 'Tati'): {'dephead': 0.9436154206644797,
                     'deprel': 0.9529821909250076,
                     'feats': 0.9920504763454725,
                     'lemma': 0.9945434625382208,
                     'upos': 0.9856432285778883},
 ('Aline', 'Wogue'): {'dephead': 0.9449616923372343,
                      'deprel': 0.9471267823894716,
                      'feats': 0.9872786121667053,
                      'lemma': 0.99045109363084,
                      'upos': 0.976070294295602},
 ('Elvis', 'Tati'): {'dephead': 0.9261551147846587,
                     'deprel': 0.9381683504025317,
                     'feats': 0.9825089677536482,
                     'lemma': 0.9849947637418212,
                     'upos': 0.

In [69]:
pprint.pprint(kappa_changed)

{('Aline', 'Elvis'): {},
 ('Aline', 'Tati'): {},
 ('Aline', 'Wogue'): {},
 ('Elvis', 'Tati'): {},
 ('Elvis', 'Wogue'): {},
 ('Tati', 'Wogue'): {}}
