In [13]:
# Matplotlib
%matplotlib inline
import matplotlib as mpl
from matplotlib import pylab
import matplotlib.pyplot as plt
pylab.rcParams['figure.figsize'] = (10.0, 7.0)
mpl.style.use('ggplot')
from matplotlib.backends.backend_pgf import FigureCanvasPgf
mpl.backend_bases.register_backend('pgf', FigureCanvasPgf)

# Remove warnings
import warnings
#warnings.filterwarnings('ignore')

import sys, os
from os.path import join
sys.path.append("../tools/")
from collections import defaultdict

# Data management libraries
import pandas as pd
#import seaborn as sns
import scipy as sp
import scipy.stats

# Treebank utils, found in "../tools"
import udeval, udtree, lang_utils, dependency_classes

file_format = "conllu"
train_type = "train"
fine_grained_deprels = False
gold_output_base = "/Users/jimmy/dev/edu/nlp-rod/udeval/resources/universaldependencies1-2/universal-dependencies-1.2/"



def attachment_score_per_tree(system_output_path, gold_path, labeled=True, fine_grained_deprels=False, include_punct=False):
    if not include_punct:
        puncts = set.union(udeval.is_only_punctuation(gold_path), {'punct'})
    system = udtree.from_files(system_output_path)
    gold = udtree.from_files(gold_path)
    correct, incorrect = 0, 0
    res = []
    for system_tree, gold_tree in zip(system, gold):
        correct, incorrect = 0, 0
        (tree_correct,
         tree_incorrect) = udeval.match_tree_attachments(system_tree, gold_tree, labeled,
                                                         fine_grained_deprels=fine_grained_deprels,
                                                         ignore_deprels=puncts)
        correct += len(tree_correct)
        incorrect += len(tree_incorrect)

        if (correct + incorrect) == 0:
            res.append(float("NaN"))
        
        res.append(correct / (correct + incorrect))
    return res
    

def weighted_las(system_output_path, gold_path, weights, include_punct=False):
    if not include_punct:
        puncts = set.union(udeval.is_only_punctuation(gold_path), {'punct'})
    system = udtree.from_files(system_output_path)
    gold = udtree.from_files(gold_path)
    res = []
    for system_tree, gold_tree in zip(system, gold):
        correct, incorrect = 0, 0
        (tree_correct,
           tree_incorrect) = udeval.match_tree_attachments(system_tree, gold_tree, True,
                                                  fine_grained_deprels=False,
                                                          ignore_deprels=puncts)
        for _, _, _, gold_label in tree_correct:
            correct += weights[gold_label]

        for _, _, _, gold_label in tree_incorrect:
            incorrect += weights[gold_label]

        if (correct + incorrect) == 0:
            res.append(float("NaN"))
        res.append(correct / (correct + incorrect))
    return res


    
def labels_precision_per_tree(system_output_path,
                            gold_path,
                            labels=["nsubj", "nsubjpass"],
                            fine_grained_deprels=True):
    system = udtree.from_files(system_output_path)
    gold = udtree.from_files(gold_path)
    results = []
    for system_tree, gold_tree in zip(system, gold):
        system_correct, system_incorrect, gold_count = 0, 0, 0
        (tree_correct,
         tree_incorrect) = udeval.match_tree_attachments(system_tree, gold_tree, True,
                                                  fine_grained_deprels=fine_grained_deprels)
        for _, system_label, _, _ in tree_correct:
            if system_label in labels:
                system_correct += 1
                gold_count += 1
        for _, system_label, _, gold_label in tree_incorrect:
            if gold_label in labels:
                gold_count += 1
            if system_label in labels:
                system_incorrect += 1

        if system_correct + system_incorrect == 0:
            precision = float("NaN")
        else:
            precision = system_correct / (system_correct + system_incorrect)
        results.append((precision))


    return results

def labels_recall_per_tree(system_output_path,
                            gold_path,
                            labels=["nsubj", "nsubjpass"],
                            fine_grained_deprels=True):
    system = udtree.from_files(system_output_path)
    gold = udtree.from_files(gold_path)
    results = []
    for system_tree, gold_tree in zip(system, gold):
        system_correct, system_incorrect, gold_count = 0, 0, 0
        (tree_correct,
         tree_incorrect) = udeval.match_tree_attachments(system_tree, gold_tree, True,
                                                  fine_grained_deprels=fine_grained_deprels)
        for _, system_label, _, _ in tree_correct:
            if system_label in labels:
                system_correct += 1
                gold_count += 1
        for _, system_label, _, gold_label in tree_incorrect:
            if gold_label in labels:
                gold_count += 1
            if system_label in labels:
                system_incorrect += 1

        if gold_count == 0:
            recall = float("NaN")
        else:
            recall = system_correct / gold_count
        results.append(recall)
    return results

def root_distance(tree, index):
    if tree.heads[index-1] == 0:
        return 1
    else:
        return 1 + root_distance(tree, tree.heads[index-1])

def root_las(system_output_path, gold_path, include_punct=False):
    if not include_punct:
        puncts = set.union(udeval.is_only_punctuation(gold_path), {'punct'})
    system = udtree.from_files(system_output_path)
    gold = udtree.from_files(gold_path)
    res = []
    for system_tree, gold_tree in zip(system, gold):
        correct, incorrect = 0, 0
        (tree_correct,
           tree_incorrect) = udeval.match_tree_attachments(system_tree, gold_tree, True,
                                                  fine_grained_deprels=False,
                                                          ignore_deprels=puncts)
        for index, (_, _, _, gold_label) in enumerate(tree_correct, 1):
            correct += 1 / root_distance(gold_tree, index)**2

        for index, (_, _, _, gold_label) in enumerate(tree_incorrect, 1):
            incorrect += 1 / root_distance(gold_tree, index)**2

        if (correct + incorrect) == 0:
            res.append(float("NaN"))
        res.append(correct / (correct + incorrect))
    return res

In [17]:
avg_weights = pd.Series.from_csv('../data/function_content_degree.wde.csv')
#ind_weights = pd.DataFrame.from_csv('../data/function_content_degree.individual_languages.wde.csv')
langs = [('en', 'English'), ('es', 'Spanish'), ('de', 'German')]
annotators = {}
for langcode, lang in langs:
    annotators[lang] = pd.Series.from_csv('../resources/plank_conll2015/datapackage/{}/prediction'.format(langcode), header=None, index_col=None).map({'blue': 0, 'red': 1})

las = lambda system, gold: attachment_score_per_tree(system, gold)
avg_wlas = lambda system, gold: weighted_las(system, gold, weights=avg_weights)
#ind_wlas = lambda system, gold, lang: weighted_las(system, gold, weights=ind_weights[lang])
content_precision = lambda system, gold: labels_precision_per_tree(system, gold, labels=dependency_classes.content_dependents)
content_recall = lambda system, gold: labels_recall_per_tree(system, gold, labels=dependency_classes.content_dependents)
metrics = [('Root', root_las), ('LAS', las), ('WLAS', avg_wlas), ('Content precision', content_precision), ('Content recall', content_recall)]

corrs = defaultdict(dict)
for langcode, lang in langs:
    for mname, metric in metrics:
        gold = '../resources/plank_conll2015/datapackage/{}/gold.conll'.format(langcode)
        parser1 = metric('../resources/plank_conll2015/datapackage/{}/parser1.conll'.format(langcode), gold)
        parser2 = metric('../resources/plank_conll2015/datapackage/{}/parser2.conll'.format(langcode), gold)
        diffs = pd.DataFrame({"Parser 1": parser1, "Parser 2": parser2})
        diffs = (diffs['Parser 2'].subtract(diffs['Parser 1'])).map(lambda x: 0 if x < 0 else 1)
        diffs = pd.concat([diffs, annotators[lang]], axis=1)
        corrs[mname][lang] = diffs.corr('pearson').ix[0,1]



corrs = pd.DataFrame(corrs)
#corrs.to_csv('../data/human_judgment_corr.csv')
corrs

Unnamed: 0,Content precision,Content recall,LAS,Root,WLAS
English,0.508442,0.581423,0.545157,0.545157,0.546863
German,0.371537,0.437564,0.463736,0.463736,0.463736
Spanish,0.448713,0.463425,0.490216,0.490216,0.47896


In [18]:
diffs

Unnamed: 0,0,1
0,1,0
1,0,0
2,0,0
3,0,0
4,1,1
5,0,0
6,1,1
7,1,0
8,1,0
9,1,1


In [19]:
udeval.function_correct_head('')

<function udeval.function_correct_head>