In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 22 days


### Read in gold data and submissions

In [2]:
def read_dimsum_data(filename):
    results = pd.read_csv(filename, sep="\t", names=['token_offset', 'form', 'lemma', 'pos', 'mwe', 
                                    'parent_offset', 'strength', 'supersense', 'sent_id'],
                      quoting=3, na_filter=False)
    results.supersense = results.supersense.str.replace('^$', 'O')

    return results

gold = read_dimsum_data("dimsum16.test")
tweet_mask = gold.sent_id.str.startswith("twe")
ted_mask = gold.sent_id.str.startswith("ted")
trustpilot_mask = gold.sent_id.str.startswith("trustpilot")

supersense_labels = sorted(set(gold.supersense.unique()) - {'O'})
mwe_labels = sorted({'I', 'i'})

submission_ids = "106 108 211 214 227 248 249 254 255".split(" ")
submissions = {"S" + submission_id: read_dimsum_data(submission_id + ".test.pred")
               for submission_id in submission_ids}

Masks for relevant predictions with respect to gold data

In [3]:
has_supersense = gold.supersense.isin(supersense_labels)
has_mwe = gold.mwe.isin(mwe_labels)

### (Extra) Reproduce scoring from results table

In [5]:
pred1 = submissions['S214']

In [6]:
from sklearn.metrics import precision_score, recall_score, f1_score
def score_entry(y_gold, y_pred, labels):
    scores = {
        'precision': precision_score(y_gold, y_pred, 
                                     average='micro', labels=labels),
        'recall': recall_score(y_gold, y_pred, 
                               average='micro', labels=labels),
        'f1': f1_score(y_gold, y_pred, 
                       average='micro', labels=labels)
    }
    
    return scores

In [7]:
score_entry(gold['supersense'], pred1['supersense'], supersense_labels)

{'f1': 0.57551440329218106,
 'precision': 0.56221105527638193,
 'recall': 0.58946259220231823}

In [8]:
score_entry(np.concatenate([gold.supersense, gold.mwe]), np.concatenate([pred1.supersense, pred1.mwe]), 
            labels=supersense_labels + mwe_labels)

{'f1': 0.57496109285837804,
 'precision': 0.58271994391868209,
 'recall': 0.56740614334470985}

In [9]:
print(precision_score(gold.mwe, pred1.mwe, average='micro', labels=mwe_labels))
print(recall_score(gold.mwe, pred1.mwe, average='micro', labels=mwe_labels))
print(f1_score(gold.mwe, pred1.mwe, average='micro', labels=mwe_labels))

0.722298221614
0.473542600897
0.572047670639
