In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn_crfsuite import CRF
from sklearn_crfsuite.metrics import flat_f1_score
from sklearn_crfsuite.metrics import flat_classification_report
from nltk.tokenize.treebank import TreebankWordDetokenizer

from scripts import *

%matplotlib inline
%load_ext autoreload
%autoreload 2

# Basic Reports

### Fixed (I)

In [2]:
X_train, X_val, X_test, y_train, y_val, y_test = load_data()
y_train_corrupted, error_train_array = blur_labels(y_train, frac=1, num_blur=3)

In [13]:
X_comb = X_train + X_val
y_comb = y_train_corrupted + y_val
basic_report(X_comb, y_comb, X_test, y_test, 'combined')

combined 0.7043154134361833
              precision    recall  f1-score   support

       B-geo       0.86      0.87      0.86      7555
       B-gpe       0.97      0.91      0.94      3095
       B-org       0.81      0.66      0.73      3968
       B-per       0.87      0.73      0.79      3358
       B-tim       0.93      0.79      0.85      4018
       I-geo       0.77      0.80      0.79      1484
       I-gpe       0.28      0.60      0.38        35
       I-org       0.34      0.78      0.47      3306
       I-per       0.29      0.82      0.43      3388
       I-tim       0.43      0.72      0.54      1259
           O       0.99      0.94      0.96    177703

    accuracy                           0.92    209169
   macro avg       0.69      0.78      0.70    209169
weighted avg       0.95      0.92      0.93    209169



In [37]:
basic_report(X_train, y_train, X_test, y_test, 'clean (upper bound)')
basic_report(X_train, y_train_corrupted, X_test, y_test, 'corrupted')
basic_report(X_val, y_val, X_test, y_test, 'gold_only')
pseudolabel_report(X_train, X_val, y_val, X_test, y_test, 'pseudo')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=True, name='ecn_x_y')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=False, name='ecn_x')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=False, expand_y=True, name='ecn_y')

clean (upper bound) 0.8410254697782122
              precision    recall  f1-score   support

       B-geo       0.85      0.91      0.88      7431
       B-gpe       0.97      0.94      0.95      3220
       B-org       0.81      0.73      0.77      4181
       B-per       0.85      0.83      0.84      3360
       B-tim       0.92      0.89      0.90      4086
       I-geo       0.81      0.79      0.80      1472
       I-gpe       0.89      0.51      0.65        47
       I-org       0.80      0.80      0.80      3375
       I-per       0.84      0.90      0.87      3340
       I-tim       0.83      0.75      0.79      1322
           O       0.99      0.99      0.99    177875

    accuracy                           0.97    209709
   macro avg       0.87      0.82      0.84    209709
weighted avg       0.97      0.97      0.97    209709

corrupted 0.6922881171268749
              precision    recall  f1-score   support

       B-geo       0.84      0.87      0.86      7431
       B-g

In [40]:
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=False, expand_y=True, name='ecn_y')

ecn_y 0.8003621965158175
              precision    recall  f1-score   support

       B-geo       0.84      0.89      0.87      7503
       B-gpe       0.96      0.94      0.95      3146
       B-org       0.76      0.67      0.72      4002
       B-per       0.83      0.78      0.81      3380
       B-tim       0.91      0.84      0.87      4160
       I-geo       0.79      0.77      0.78      1514
       I-gpe       1.00      0.37      0.54        27
       I-org       0.71      0.74      0.73      3458
       I-per       0.82      0.86      0.84      3467
       I-tim       0.84      0.62      0.71      1426
           O       0.99      0.99      0.99    178239

    accuracy                           0.97    210322
   macro avg       0.86      0.77      0.80    210322
weighted avg       0.96      0.97      0.96    210322

ecn_none 0.8016729373421243
              precision    recall  f1-score   support

       B-geo       0.84      0.90      0.87      7503
       B-gpe       0.96  

### Stochastic (I)

In [14]:
X_train, X_val, X_test, y_train, y_val, y_test = load_data()
y_train_corrupted, error_train_array = blur_labels(y_train, frac=0.5, num_blur=3)

In [15]:
X_comb = X_train + X_val
y_comb = y_train_corrupted + y_val
basic_report(X_comb, y_comb, X_test, y_test, 'combined')

combined 0.7786478563046152
              precision    recall  f1-score   support

       B-geo       0.86      0.90      0.88      7733
       B-gpe       0.97      0.93      0.95      3259
       B-org       0.80      0.69      0.74      4024
       B-per       0.87      0.77      0.82      3462
       B-tim       0.93      0.85      0.89      4052
       I-geo       0.80      0.80      0.80      1479
       I-gpe       0.53      0.49      0.51        41
       I-org       0.55      0.79      0.65      3398
       I-per       0.56      0.85      0.67      3494
       I-tim       0.63      0.73      0.68      1238
           O       0.99      0.98      0.98    177862

    accuracy                           0.95    210042
   macro avg       0.77      0.80      0.78    210042
weighted avg       0.96      0.95      0.96    210042



In [41]:
basic_report(X_train, y_train, X_test, y_test, 'clean (upper bound)')
basic_report(X_train, y_train_corrupted, X_test, y_test, 'corrupted')
basic_report(X_val, y_val, X_test, y_test, 'gold_only')
pseudolabel_report(X_train, X_val, y_val, X_test, y_test, 'pseudo')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=True, name='ecn_x_y')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=False, name='ecn_x')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=False, expand_y=True, name='ecn_y')

clean (upper bound) 0.8445887884260053
              precision    recall  f1-score   support

       B-geo       0.86      0.91      0.89      7562
       B-gpe       0.97      0.94      0.95      3151
       B-org       0.81      0.75      0.78      4096
       B-per       0.86      0.83      0.84      3337
       B-tim       0.92      0.89      0.90      4000
       I-geo       0.82      0.81      0.82      1479
       I-gpe       0.80      0.53      0.64        30
       I-org       0.82      0.81      0.81      3411
       I-per       0.85      0.90      0.88      3359
       I-tim       0.82      0.76      0.79      1267
           O       0.99      0.99      0.99    178711

    accuracy                           0.97    210403
   macro avg       0.87      0.83      0.84    210403
weighted avg       0.97      0.97      0.97    210403

corrupted 0.7791512820120624
              precision    recall  f1-score   support

       B-geo       0.86      0.90      0.88      7562
       B-g

### Variable (I)

In [16]:
X_train, X_val, X_test, y_train, y_val, y_test = load_data()
y_train_corrupted, error_train_array = blur_labels(y_train, frac=1, num_blur=None)

In [17]:
X_comb = X_train + X_val
y_comb = y_train_corrupted + y_val
basic_report(X_comb, y_comb, X_test, y_test, 'combined')

combined 0.6922431005894679
              precision    recall  f1-score   support

       B-geo       0.86      0.87      0.86      7591
       B-gpe       0.97      0.90      0.94      3187
       B-org       0.80      0.68      0.73      3963
       B-per       0.86      0.71      0.77      3320
       B-tim       0.93      0.80      0.86      4070
       I-geo       0.78      0.76      0.77      1508
       I-gpe       0.24      0.50      0.33        40
       I-org       0.32      0.78      0.45      3269
       I-per       0.29      0.81      0.42      3348
       I-tim       0.40      0.70      0.51      1237
           O       0.99      0.94      0.96    178506

    accuracy                           0.92    210039
   macro avg       0.68      0.77      0.69    210039
weighted avg       0.95      0.92      0.93    210039



In [43]:
X_train, X_val, X_test, y_train, y_val, y_test = load_data()
y_train_corrupted, error_train_array = blur_labels(y_train, frac=1, num_blur=None)
basic_report(X_train, y_train, X_test, y_test, 'clean (upper bound)')
basic_report(X_train, y_train_corrupted, X_test, y_test, 'corrupted')
basic_report(X_val, y_val, X_test, y_test, 'gold_only')
pseudolabel_report(X_train, X_val, y_val, X_test, y_test, 'pseudo')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=True, name='ecn_x_y')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=False, name='ecn_x')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=False, expand_y=True, name='ecn_y')

clean (upper bound) 0.856469030515941
              precision    recall  f1-score   support

       B-geo       0.87      0.91      0.89      7632
       B-gpe       0.96      0.95      0.95      3152
       B-org       0.81      0.73      0.77      3927
       B-per       0.84      0.83      0.83      3330
       B-tim       0.93      0.88      0.90      3977
       I-geo       0.82      0.81      0.82      1443
       I-gpe       0.90      0.68      0.77        40
       I-org       0.83      0.80      0.81      3298
       I-per       0.85      0.89      0.87      3437
       I-tim       0.86      0.76      0.81      1269
           O       0.99      0.99      0.99    178632

    accuracy                           0.97    210137
   macro avg       0.88      0.84      0.86    210137
weighted avg       0.97      0.97      0.97    210137

corrupted 0.7010656954202155
              precision    recall  f1-score   support

       B-geo       0.86      0.87      0.87      7632
       B-gp

### Stochastic Variable (I)

In [20]:
X_train, X_val, X_test, y_train, y_val, y_test = load_data()
y_train_corrupted, error_train_array = blur_labels(y_train, frac=0.75, num_blur=None)

In [21]:
X_comb = X_train + X_val
y_comb = y_train_corrupted + y_val
basic_report(X_comb, y_comb, X_test, y_test, 'combined')

combined 0.7657306732377491
              precision    recall  f1-score   support

       B-geo       0.85      0.91      0.88      7511
       B-gpe       0.97      0.93      0.95      3114
       B-org       0.81      0.71      0.76      4032
       B-per       0.84      0.80      0.82      3369
       B-tim       0.93      0.85      0.89      4126
       I-geo       0.79      0.79      0.79      1480
       I-gpe       0.50      0.44      0.47        45
       I-org       0.53      0.77      0.63      3170
       I-per       0.52      0.87      0.65      3471
       I-tim       0.52      0.74      0.61      1304
           O       0.99      0.97      0.98    177929

    accuracy                           0.95    209551
   macro avg       0.75      0.80      0.77    209551
weighted avg       0.96      0.95      0.95    209551



In [44]:
X_train, X_val, X_test, y_train, y_val, y_test = load_data()
y_train_corrupted, error_train_array = blur_labels(y_train, frac=0.75, num_blur=None)
basic_report(X_train, y_train, X_test, y_test, 'clean (upper bound)')
basic_report(X_train, y_train_corrupted, X_test, y_test, 'corrupted')
basic_report(X_val, y_val, X_test, y_test, 'gold_only')
pseudolabel_report(X_train, X_val, y_val, X_test, y_test, 'pseudo')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=True, name='ecn_x_y')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=False, name='ecn_x')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=False, expand_y=True, name='ecn_y')

clean (upper bound) 0.8470840821878785
              precision    recall  f1-score   support

       B-geo       0.87      0.91      0.89      7566
       B-gpe       0.97      0.94      0.96      3250
       B-org       0.81      0.73      0.77      4085
       B-per       0.85      0.84      0.84      3393
       B-tim       0.93      0.88      0.90      4112
       I-geo       0.82      0.80      0.81      1504
       I-gpe       0.95      0.50      0.65        36
       I-org       0.83      0.78      0.81      3391
       I-per       0.84      0.90      0.87      3388
       I-tim       0.87      0.77      0.82      1360
           O       0.99      0.99      0.99    178278

    accuracy                           0.97    210363
   macro avg       0.88      0.82      0.85    210363
weighted avg       0.97      0.97      0.97    210363

corrupted 0.7335626031643556
              precision    recall  f1-score   support

       B-geo       0.85      0.88      0.87      7566
       B-g

# Previous Runs (Ignore below)

In [7]:
X_train, X_val, X_test, y_train, y_val, y_test = load_data()
y_train_corrupted, error_train_array = blur_labels(y_train)
basic_report(X_train, y_train, X_test, y_test, 'clean (upper bound)')
basic_report(X_train, y_train_corrupted, X_test, y_test, 'corrupted')
basic_report(X_val, y_val, X_test, y_test, 'gold_only')
pseudolabel_report(X_train, X_val, y_val, X_test, y_test, 'pseudo')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=True, name='ecn_x_y')

report 0.8421686827344661
              precision    recall  f1-score   support

       B-geo       0.86      0.91      0.89      7543
       B-gpe       0.97      0.94      0.96      3179
       B-org       0.80      0.74      0.77      4066
       B-per       0.85      0.84      0.85      3447
       B-tim       0.93      0.88      0.90      4066
       I-geo       0.82      0.81      0.81      1519
       I-gpe       0.83      0.49      0.62        41
       I-org       0.82      0.78      0.80      3397
       I-per       0.86      0.90      0.88      3545
       I-tim       0.83      0.77      0.80      1335
           O       0.99      0.99      0.99    177467

    accuracy                           0.97    209605
   macro avg       0.87      0.82      0.84    209605
weighted avg       0.97      0.97      0.97    209605



In [14]:
pseudolabel_report(X_train, X_val, y_val, X_test, y_test)

pseudolabel 0.7143000176250942
              precision    recall  f1-score   support

       B-geo       0.79      0.86      0.82      7543
       B-gpe       0.91      0.88      0.90      3179
       B-org       0.71      0.61      0.66      4066
       B-per       0.76      0.76      0.76      3447
       B-tim       0.88      0.80      0.84      4066
       I-geo       0.76      0.64      0.69      1519
       I-gpe       0.03      0.02      0.03        41
       I-org       0.66      0.69      0.68      3397
       I-per       0.78      0.87      0.82      3545
       I-tim       0.78      0.60      0.68      1335
           O       0.99      0.99      0.99    177467

    accuracy                           0.96    209605
   macro avg       0.73      0.70      0.71    209605
weighted avg       0.96      0.96      0.96    209605



# Missing Labels

In [31]:
X_train, X_val, X_test, y_train, y_val, y_test = load_data()
y_train_corrupted, error_train_array = missing_labels(y_train, frac=0.3)
X_comb = X_train + X_val
y_comb = y_train_corrupted + y_val

In [32]:
# basic_report(X_train, y_train, X_test, y_test, 'clean (upper bound)')
basic_report(X_train, y_train_corrupted, X_test, y_test, 'corrupted')
# basic_report(X_val, y_val, X_test, y_test, 'gold_only')
basic_report(X_comb, y_comb, X_test, y_test, 'combined')
# pseudolabel_report(X_train, X_val, y_val, X_test, y_test, 'pseudo')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=True, name='ecn_x_y')

corrupted 0.7097044883510626
              precision    recall  f1-score   support

       B-geo       0.85      0.91      0.88      7664
       B-gpe       0.97      0.95      0.96      3186
       B-org       0.87      0.49      0.63      4074
       B-per       0.91      0.52      0.66      3472
       B-tim       0.96      0.68      0.80      3948
       I-geo       0.78      0.78      0.78      1533
       I-gpe       0.71      0.51      0.60        39
       I-org       0.87      0.29      0.44      3520
       I-per       0.89      0.42      0.57      3517
       I-tim       0.90      0.37      0.52      1240
           O       0.95      1.00      0.97    178256

    accuracy                           0.94    210449
   macro avg       0.88      0.63      0.71    210449
weighted avg       0.94      0.94      0.93    210449

combined 0.7156647315235071
              precision    recall  f1-score   support

       B-geo       0.85      0.91      0.88      7664
       B-gpe       0.

In [33]:
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=False, name='ecn_x')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=False, expand_y=True, name='ecn_y')

ecn_x 0.7899309948574171
              precision    recall  f1-score   support

       B-geo       0.85      0.89      0.87      7664
       B-gpe       0.96      0.94      0.95      3186
       B-org       0.75      0.65      0.69      4074
       B-per       0.79      0.74      0.76      3472
       B-tim       0.93      0.77      0.84      3948
       I-geo       0.78      0.76      0.77      1533
       I-gpe       0.90      0.46      0.61        39
       I-org       0.74      0.68      0.71      3520
       I-per       0.78      0.85      0.81      3517
       I-tim       0.83      0.57      0.68      1240
           O       0.99      0.99      0.99    178256

    accuracy                           0.96    210449
   macro avg       0.84      0.76      0.79    210449
weighted avg       0.96      0.96      0.96    210449

ecn_y 0.787589076377822
              precision    recall  f1-score   support

       B-geo       0.85      0.89      0.87      7664
       B-gpe       0.96      

In [2]:
X_train, X_val, X_test, y_train, y_val, y_test = load_data()
y_train_corrupted, error_train_array = missing_systematic_labels(X_train, y_train)
X_comb = X_train + X_val
y_comb = y_train_corrupted + y_val

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000


In [3]:
# basic_report(X_train, y_train, X_test, y_test, 'clean (upper bound)')
# basic_report(X_train, y_train_corrupted, X_test, y_test, 'corrupted')
# basic_report(X_val, y_val, X_test, y_test, 'gold_only')
# basic_report(X_comb, y_comb, X_test, y_test, 'combined')
# pseudolabel_report(X_train, X_val, y_val, X_test, y_test, 'pseudo')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=True, name='ecn_x_y')

ecn_x_y 0.8105328488052863
              precision    recall  f1-score   support

       B-geo       0.83      0.88      0.85      7407
       B-gpe       0.96      0.94      0.95      3115
       B-org       0.74      0.68      0.71      4041
       B-per       0.83      0.75      0.79      3379
       B-tim       0.92      0.84      0.88      4086
       I-geo       0.81      0.69      0.75      1468
       I-gpe       0.94      0.55      0.69        31
       I-org       0.73      0.72      0.72      3365
       I-per       0.85      0.85      0.85      3437
       I-tim       0.84      0.65      0.73      1314
           O       0.99      0.99      0.99    176549

    accuracy                           0.96    208192
   macro avg       0.86      0.78      0.81    208192
weighted avg       0.96      0.96      0.96    208192



In [4]:
basic_report(X_train, y_train_corrupted, X_test, y_test, 'corrupted')

corrupted 0.7784316308565725
              precision    recall  f1-score   support

       B-geo       0.87      0.84      0.85      7407
       B-gpe       0.96      0.94      0.95      3115
       B-org       0.84      0.55      0.66      4041
       B-per       0.81      0.36      0.50      3379
       B-tim       0.94      0.84      0.89      4086
       I-geo       0.84      0.70      0.76      1468
       I-gpe       0.90      0.61      0.73        31
       I-org       0.87      0.50      0.64      3365
       I-per       0.88      0.82      0.85      3437
       I-tim       0.90      0.63      0.74      1314
           O       0.96      1.00      0.98    176549

    accuracy                           0.95    208192
   macro avg       0.89      0.71      0.78    208192
weighted avg       0.95      0.95      0.95    208192



In [5]:
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=True, expand_y=False, name='ecn_x')
ecn_report(X_train, y_train_corrupted, X_val, y_val, X_test, y_test, expand_x=False, expand_y=True, name='ecn_y')

ecn_x 0.8122405933716279
              precision    recall  f1-score   support

       B-geo       0.83      0.88      0.85      7407
       B-gpe       0.95      0.94      0.95      3115
       B-org       0.74      0.68      0.71      4041
       B-per       0.83      0.76      0.79      3379
       B-tim       0.93      0.85      0.88      4086
       I-geo       0.81      0.69      0.74      1468
       I-gpe       1.00      0.55      0.71        31
       I-org       0.73      0.72      0.72      3365
       I-per       0.84      0.85      0.85      3437
       I-tim       0.84      0.66      0.74      1314
           O       0.99      0.99      0.99    176549

    accuracy                           0.96    208192
   macro avg       0.86      0.78      0.81    208192
weighted avg       0.96      0.96      0.96    208192

ecn_y 0.8105328488052863
              precision    recall  f1-score   support

       B-geo       0.83      0.88      0.85      7407
       B-gpe       0.96     

In [6]:
basic_report(X_comb, y_comb, X_test, y_test, 'combined')

combined 0.7807173888165572
              precision    recall  f1-score   support

       B-geo       0.87      0.84      0.86      7407
       B-gpe       0.96      0.94      0.95      3115
       B-org       0.84      0.56      0.67      4041
       B-per       0.81      0.36      0.50      3379
       B-tim       0.93      0.85      0.89      4086
       I-geo       0.84      0.71      0.77      1468
       I-gpe       0.86      0.61      0.72        31
       I-org       0.87      0.52      0.65      3365
       I-per       0.88      0.83      0.85      3437
       I-tim       0.89      0.65      0.75      1314
           O       0.96      1.00      0.98    176549

    accuracy                           0.95    208192
   macro avg       0.88      0.72      0.78    208192
weighted avg       0.95      0.95      0.95    208192

