In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn_crfsuite import CRF
from sklearn_crfsuite.metrics import flat_f1_score
from sklearn_crfsuite.metrics import flat_classification_report

from scripts import missing_labels

from nltk.tokenize.treebank import TreebankWordDetokenizer
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Downloading and Preprocessing the Data

In [7]:
df = pd.read_csv('../data/ner_dataset.csv', encoding = "ISO-8859-1")
df.describe()

Unnamed: 0,Sentence #,Word,POS,Tag
count,47959,1048575,1048575,1048575
unique,47959,35178,42,17
top,Sentence: 4020,the,NN,O
freq,1,52573,145807,887908


In [8]:
df.head(10)

Unnamed: 0,Sentence #,Word,POS,Tag
0,Sentence: 1,Thousands,NNS,O
1,,of,IN,O
2,,demonstrators,NNS,O
3,,have,VBP,O
4,,marched,VBN,O
5,,through,IN,O
6,,London,NNP,B-geo
7,,to,TO,O
8,,protest,VB,O
9,,the,DT,O


In [9]:
print(df['Tag'].unique())

['O' 'B-geo' 'B-gpe' 'B-per' 'I-geo' 'B-org' 'I-org' 'B-tim' 'B-art'
 'I-art' 'I-per' 'I-gpe' 'I-tim' 'B-nat' 'B-eve' 'I-eve' 'I-nat']


In [10]:
df = df.fillna(method = 'ffill')

In [11]:
# This is a class te get sentence. The each sentence will be list of tuples with its tag and pos.
class sentence(object):
    def __init__(self, df):
        self.n_sent = 1
        self.df = df
        self.empty = False
        agg = lambda s : [(w, p, t) for w, p, t in zip(s['Word'].values.tolist(),
                                                       s['POS'].values.tolist(),
                                                       s['Tag'].values.tolist())]
        self.grouped = self.df.groupby("Sentence #").apply(agg)
        self.sentences = [s for s in self.grouped]
        
    def get_text(self):
        try:
            s = self.grouped['Sentence: {}'.format(self.n_sent)]
            self.n_sent +=1
            return s
        except:
            return None

In [12]:
#Displaying one full sentence
getter = sentence(df)
sentences = [" ".join([s[0] for s in sent]) for sent in getter.sentences]
sentences[0]

'Thousands of demonstrators have marched through London to protest the war in Iraq and demand the withdrawal of British troops from that country .'

In [13]:
sent = getter.get_text()
print(sent)

[('Thousands', 'NNS', 'O'), ('of', 'IN', 'O'), ('demonstrators', 'NNS', 'O'), ('have', 'VBP', 'O'), ('marched', 'VBN', 'O'), ('through', 'IN', 'O'), ('London', 'NNP', 'B-geo'), ('to', 'TO', 'O'), ('protest', 'VB', 'O'), ('the', 'DT', 'O'), ('war', 'NN', 'O'), ('in', 'IN', 'O'), ('Iraq', 'NNP', 'B-geo'), ('and', 'CC', 'O'), ('demand', 'VB', 'O'), ('the', 'DT', 'O'), ('withdrawal', 'NN', 'O'), ('of', 'IN', 'O'), ('British', 'JJ', 'B-gpe'), ('troops', 'NNS', 'O'), ('from', 'IN', 'O'), ('that', 'DT', 'O'), ('country', 'NN', 'O'), ('.', '.', 'O')]


In [14]:
sentences = getter.sentences

In [15]:
def word2features(sent, i):
    word = sent[i][0]
    postag = sent[i][1]

    features = {
        'bias': 1.0,
        'word.lower()': word.lower(),
        'word[-3:]': word[-3:],
        'word[-2:]': word[-2:],
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
        'postag': postag,
        'postag[:2]': postag[:2],
    }
    if i > 0:
        word1 = sent[i-1][0]
        postag1 = sent[i-1][1]
        features.update({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
            '-1:postag': postag1,
            '-1:postag[:2]': postag1[:2],
        })
    else:
        features['BOS'] = True

    if i < len(sent)-1:
        word1 = sent[i+1][0]
        postag1 = sent[i+1][1]
        features.update({
            '+1:word.lower()': word1.lower(),
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
            '+1:postag': postag1,
            '+1:postag[:2]': postag1[:2],
        })
    else:
        features['EOS'] = True

    return features


def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

def sent2labels(sent):
    return [label for token, postag, label in sent]

def sent2tokens(sent):
    return [token for token, postag, label in sent]

In [16]:
reduced_tag_set = ['B-geo', 'B-gpe', 'B-org', 'B-per', 'B-tim', 'I-geo',
                   'I-gpe', 'I-org', 'I-per', 'I-tim', 'O']

In [17]:
X = [sent2features(s) for s in sentences]
y = [sent2labels(s) for s in sentences]
y = [[label if label in reduced_tag_set else 'O' for label in y_i] for y_i in y]  # reduce tag set

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.025)

In [14]:
crf = CRF(algorithm = 'lbfgs',
         c1 = 0.1,
         c2 = 0.1,
         max_iterations = 100,
         all_possible_transitions = False)

In [34]:
y_train_new, error_train_array = missing_labels(y_train, frac=0.4)

In [35]:
t = sum([len(a) for a in error_train_array]); print('Num tags:', t)
e = sum([sum(a) for a in error_train_array]); print('Num errs:', e)
e/t

Num tags: 819062
Num errs: 62398


0.07618226703228816

As compared to 0.04 for RV

In [36]:
print(len(X_train))

37407


In [18]:
crf.fit(X_train, y_train)
y_pred = crf.predict(X_test)
f1_score = flat_f1_score(y_test, y_pred, average = 'macro')
print(f1_score)

0.8526267386625075


In [19]:
report = flat_classification_report(y_test, y_pred, output_dict=True)
clean_precision, clean_recall, clean_f1 = report['B-geo']['precision'], report['B-geo']['recall'], report['B-geo']['f1-score'] 
print(flat_classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       B-geo       0.86      0.91      0.89      7475
       B-gpe       0.97      0.95      0.96      3015
       B-org       0.80      0.74      0.77      3989
       B-per       0.85      0.83      0.84      3348
       B-tim       0.93      0.87      0.90      3996
       I-geo       0.82      0.82      0.82      1489
       I-gpe       0.89      0.66      0.76        47
       I-org       0.82      0.80      0.81      3257
       I-per       0.85      0.89      0.87      3405
       I-tim       0.83      0.73      0.78      1301
           O       0.99      0.99      0.99    177299

    accuracy                           0.97    208621
   macro avg       0.87      0.83      0.85    208621
weighted avg       0.97      0.97      0.97    208621



In [37]:
crf.fit(X_train, y_train_new)
y_pred = crf.predict(X_test)
f1_score = flat_f1_score(y_test, y_pred, average = 'macro')
print(f1_score)

0.49415149246335804


In [38]:
report = flat_classification_report(y_test, y_pred, output_dict=True)
dirty_precision, dirty_recall, dirty_f1 = report['B-geo']['precision'], report['B-geo']['recall'], report['B-geo']['f1-score'] 
print(flat_classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       B-geo       0.91      0.56      0.69      7475
       B-gpe       0.99      0.67      0.80      3015
       B-org       0.88      0.31      0.46      3989
       B-per       0.92      0.30      0.46      3348
       B-tim       0.97      0.51      0.67      3996
       I-geo       0.91      0.17      0.29      1489
       I-gpe       1.00      0.19      0.32        47
       I-org       0.85      0.10      0.19      3257
       I-per       0.88      0.18      0.30      3405
       I-tim       0.93      0.19      0.31      1301
           O       0.90      1.00      0.95    177299

    accuracy                           0.91    208621
   macro avg       0.92      0.38      0.49    208621
weighted avg       0.91      0.91      0.88    208621



# Understanding the Mistakes

### Seeing the training data

In [39]:
for i in range(10):
    print("-", end="")
    for w, word in enumerate(X_train[i]):
        word = word['word.lower()']
        if error_train_array[i][w]:
            print('*', end='')            
        if y_train_new[i][w] == 'O':
            print(word.lower(), end=' ')
        else:
            print(word.upper(), end=' ')            
    print()

-on SATURDAY , witnesses said at least five people were killed . 
-the crimes took place in suburban *maryland , outside of WASHINGTON , *d.c. 
-its session was mostly ceremonial , as talks are still continuing on power-sharing in the new government . 
-*today 's attacks took place despite an intensified security clampdown in BAGHDAD by U.S. and *iraqi forces . 
-in order for the merger to overcome anti-trust concerns , rival SOUTHWEST *airlines had to be given take-off and landing rights at the international airport in *newark , NEW JERSEY - just outside NEW YORK CITY . 
-BRITAIN 's international development *secretary *hilary *benn promised to push ahead with the plan , saying a solution will be found by 2006 with or without the UNITED STATES . 
-at least one man was killed in the violence . 
-the website says other campaign workers had no information about the detainees for several hours , but then learned they had been taken to a *minsk prison . 
-ISRAELI forces have been operating

### Seeing the predictions on the test data

In [40]:
for i in range(10):
    print("-", end="")
    for w, word in enumerate(X_test[i]):
        word = word['word.lower()']
        if y_pred[i][w] != y_test[i][w]:
            print('*', end='')
            
        if y_pred[i][w] == 'O':
            print(word.lower(), end=' ')
        else:
            print(word.upper(), end=' ')            
    print()

-it funds its operations mainly through drug trafficking . 
-NATO officials say alliance troops *friday raided the home of *mladjen *kenjic , a suspected backer of fugitive wartime *bosnian *serb military commander *general *ratko *mladic . 
-family members carry coffin of police officer *mohamed *badr for burial in *baghdad insurgents in *iraq launched a third straight day of stepped-up attacks SUNDAY , killing at least nine *iraqis . 
-defense *minister *carme *chacon said the first 200 troops will leave for the *gulf *of *aden on a frigate and supply vessel this FRIDAY . 
-the policy allows *cuban refugees who reach U.S. soil to stay in the *united *states , but those intercepted at sea are sent back home . 
-IRAN 's top nuclear official says his country will not abandon its uranium enrichment program , and that the MIDDLE EAST could become even more unstable if *tehran is referred to the *united *nations *security *council for its nuclear activities . 
-in particular , the rate of 

# Let's Try to Fix the Mistakes

For reference, the clean F1 is: **0.84** 

In [38]:
def measure_method(error_pred, error_array, X, y_corrected):
    # measure what percent of errors are fixed
    np = 0; nn=0; tp = 0; fp = 0;
    for i in range(len(error_pred)):
        for j in range(len(error_pred[i])):
            if error_pred[i][j] and error_pred[i][j] == error_array[i][j]:
                tp += 1
            elif error_pred[i][j]:
                fp += 1
            if error_array[i][j]:
                np += 1
            else:
                nn += 1

    
    print("TP errors detected: {}".format(tp/np))
    print("FP errors detected: {}".format(fp/nn)) 

    # measure accuracy
    crf.fit(X, y_corrected)
    y_pred = crf.predict(X_test)
    f1_score = flat_f1_score(y_test, y_pred, average = 'macro')
    print("F1 score on trained model: {}".format(f1_score))
    
    report = flat_classification_report(y_test, y_pred, output_dict=True)
    precision, recall, f1 = report['B-geo']['precision'], report['B-geo']['recall'], report['B-geo']['f1-score'] 
    print(flat_classification_report(y_test, y_pred))

    return precision, recall, f1

## Pseudolabeled from Validation

In [39]:
def pseudolabeled_on_validation(X_val, y_val):
    
    crf.fit(X_val, y_val)
    y_pred = crf.predict(X_train)

    error_pred = []
    y_corrected = []

    for i in range(len(y_pred)):    
        error_pred.append([])
        y_corrected.append([])

        for j in range(len(y_pred[i])):
            if not(y_pred[i][j]==y_train_new[i][j]):
                error_pred[i].append(True)
            else:
                error_pred[i].append(False)

    return measure_method(error_pred, error_train_array, X_train, y_pred)

### GTC with X, y, neighboring y

In [52]:
def gtc_with_x_y_neighboring_y(X_val, y_val, num_x_keys=1):
    keys_prioritized = ['word.isupper()', 'word.istitle()', 'word.isdigit()', '+1:word.istitle()', 
                        '+1:word.isupper()', 'BOS', 'bias',  'word.lower()', 'word[-3:]', 'word[-2:]',
                        '+1:word.lower()', 'postag', 'postag[:2]', '+1:postag', '+1:postag[:2]', 
                        '-1:postag', '-1:postag[:2]', '-1:word.istitle()', '-1:word.lower()',
                        '-1:word.isupper()', 'EOS']
    
    crf.fit(X_train, y_train_new)
    y_val_pred = crf.predict(X_val)

    correction_network_input = []

    for i in range(len(y_val_pred)):
        correction_network_input.append([])
        for j in range(len(y_val_pred[i])):
            if num_x_keys is None:
                X_val_sub = X_val[i][j].copy()
            else:
                X_val_sub = {k: X_val[i][j][k] for k in keys_prioritized[:num_x_keys] if k in X_val[i][j]}
            correction_network_input[i].append(X_val_sub)
            correction_network_input[i][j]['y'] = y_val_pred[i][j]     
            if j >= 1:
                correction_network_input[i][j]['y-1'] = y_val_pred[i][j-1]
            else:
                correction_network_input[i][j]['y-1'] = 'N'
            if j >= 2:
                correction_network_input[i][j]['y-2'] = y_val_pred[i][j-2]
            else:
                correction_network_input[i][j]['y-2'] = 'N'
            if j < len(y_val_pred[i]) - 1:
                correction_network_input[i][j]['y+1'] = y_val_pred[i][j+1]
            else:
                correction_network_input[i][j]['y+1'] = 'N'
            if j < len(y_val_pred[i]) - 2:
                correction_network_input[i][j]['y+2'] = y_val_pred[i][j+2]
            else:
                correction_network_input[i][j]['y+2'] = 'N'

    crf.fit(correction_network_input, y_val)

    X_expanded = []

    for i in range(len(y_train_new)):
        X_expanded.append([])
        for j in range(len(y_train_new[i])):
            if num_x_keys is None:
                X_train_sub = X_train[i][j].copy()
            else:
                X_train_sub = {k: X_train[i][j][k] for k in keys_prioritized[:num_x_keys] if k in X_train[i][j]}
            X_expanded[i].append(X_train_sub)
            X_expanded[i][j]['y'] = y_train_new[i][j]     
            if j >= 1:
                X_expanded[i][j]['y-1'] = y_train_new[i][j-1]
            else:
                X_expanded[i][j]['y-1'] = 'N'
            if j >= 2:
                X_expanded[i][j]['y-2'] = y_train_new[i][j-2]
            else:
                X_expanded[i][j]['y-2'] = 'N'
            if j < len(y_train_new[i]) - 1:
                X_expanded[i][j]['y+1'] = y_train_new[i][j+1]
            else:
                X_expanded[i][j]['y+1'] = 'N'
            if j < len(y_train_new[i]) - 2:
                X_expanded[i][j]['y+2'] = y_train_new[i][j+2]
            else:
                X_expanded[i][j]['y+2'] = 'N'

    # Go from X_expanded to X_corrected
    y_corrected = crf.predict(X_expanded)

    error_pred = []

    for i in range(len(y_corrected)):    
        error_pred.append([])    
        for j in range(len(y_corrected[i])):
            if not(y_corrected[i][j]==y_train_new[i][j]):
                error_pred[i].append(True)
            else:
                error_pred[i].append(False)

    return measure_method(error_pred, error_train_array, X_train, y_corrected)

### GTC with y, neighboring y only

In [51]:
def gtc_with_y_neighboring_y(X_val, y_val, num_ys=1):
    
    crf.fit(X_train, y_train_new)
    y_val_pred = crf.predict(X_val)

    correction_network_input = []

    for i in range(len(y_val_pred)):
        correction_network_input.append([])
        for j in range(len(y_val_pred[i])):
            correction_network_input[i].append(dict())
            correction_network_input[i][j]['y'] = y_val_pred[i][j] 
            for k in range(1, 1+num_ys):
                if j >= k:
                    correction_network_input[i][j]['y-{}'.format(k)] = y_val_pred[i][j-k]
                else:
                    correction_network_input[i][j]['y-{}'.format(k)] = 'N'
                if j < len(y_val_pred[i]) - k:
                    correction_network_input[i][j]['y+{}'.format(k)] = y_val_pred[i][j+1]
                else:
                    correction_network_input[i][j]['y+{}'.format(k)] = 'N'
    
    crf.fit(correction_network_input, y_val)

    X_expanded = []

    for i in range(len(y_train_new)):
        X_expanded.append([])
        for j in range(len(y_train_new[i])):
            X_expanded[i].append(dict())
            X_expanded[i][j]['y'] = y_train_new[i][j]     
            for k in range(1, 1+num_ys):
                if j >= k:
                    X_expanded[i][j]['y-{}'.format(k)] = y_train_new[i][j-k]
                else:
                    X_expanded[i][j]['y-{}'.format(k)] = 'N'
                if j < len(y_train_new[i]) - k:
                    X_expanded[i][j]['y+{}'.format(k)] = y_train_new[i][j+k]
                else:
                    X_expanded[i][j]['y+{}'.format(k)] = 'N'

    # Go from X_expanded to X_corrected
    y_corrected = crf.predict(X_expanded)

    error_pred = []

    for i in range(len(y_corrected)):    
        error_pred.append([])    
        for j in range(len(y_corrected[i])):
            if not(y_corrected[i][j]==y_train_new[i][j]):
                error_pred[i].append(True)
            else:
                error_pred[i].append(False)

    return measure_method(error_pred, error_train_array, X_train, y_corrected)

### How do results change based on markov blanket size?

In [42]:
n = 100
X_val_ = X_val[:n]
y_val_ = y_val[:n]

In [43]:
ppr, pre, pf1 = pseudolabeled_on_validation(X_val_, y_val_)

TP errors detected: 0.9823627097291416
FP errors detected: 0.067320234743342
F1 score on trained model: 0.5965127383566702
              precision    recall  f1-score   support

       B-geo       0.66      0.80      0.72      7631
       B-gpe       0.88      0.77      0.83      3217
       B-org       0.54      0.41      0.47      4060
       B-per       0.69      0.62      0.65      3419
       B-tim       0.89      0.58      0.70      4075
       I-geo       0.63      0.24      0.35      1443
       I-gpe       0.41      0.31      0.35        39
       I-org       0.50      0.60      0.54      3393
       I-per       0.67      0.74      0.70      3385
       I-tim       0.80      0.16      0.26      1293
           O       0.98      0.99      0.98    177371

   micro avg       0.93      0.93      0.93    209326
   macro avg       0.70      0.56      0.60    209326
weighted avg       0.93      0.93      0.93    209326



In [44]:
pr, re, f1 = gtc_with_y_neighboring_y(X_val_, y_val_, num_ys=3)

TP errors detected: 0.6293878485941705
FP errors detected: 0.010975195725812834
F1 score on trained model: 0.7898895885893769
              precision    recall  f1-score   support

       B-geo       0.86      0.90      0.88      7631
       B-gpe       0.98      0.92      0.95      3217
       B-org       0.82      0.66      0.73      4060
       B-per       0.84      0.78      0.81      3419
       B-tim       0.92      0.82      0.87      4075
       I-geo       0.75      0.71      0.73      1443
       I-gpe       0.92      0.56      0.70        39
       I-org       0.51      0.75      0.61      3393
       I-per       0.79      0.77      0.78      3385
       I-tim       0.83      0.54      0.66      1293
           O       0.98      0.99      0.98    177371

   micro avg       0.96      0.96      0.96    209326
   macro avg       0.84      0.76      0.79    209326
weighted avg       0.96      0.96      0.96    209326



In [45]:
pr, re, f1 = gtc_with_x_y_neighboring_y(X_val_, y_val_, num_x_keys=None)

TP errors detected: 0.9255855236786348
FP errors detected: 0.011924788192379266
F1 score on trained model: 0.7954438499635597
              precision    recall  f1-score   support

       B-geo       0.84      0.88      0.86      7631
       B-gpe       0.97      0.92      0.95      3217
       B-org       0.79      0.68      0.73      4060
       B-per       0.81      0.77      0.79      3419
       B-tim       0.92      0.83      0.88      4075
       I-geo       0.85      0.63      0.72      1443
       I-gpe       0.58      0.64      0.61        39
       I-org       0.71      0.78      0.75      3393
       I-per       0.78      0.89      0.83      3385
       I-tim       0.85      0.53      0.65      1293
           O       0.99      0.99      0.99    177371

   micro avg       0.96      0.96      0.96    209326
   macro avg       0.83      0.78      0.80    209326
weighted avg       0.96      0.96      0.96    209326



# Missing Systematic

In [19]:
X_train[0]

[{'+1:postag': 'NNS',
  '+1:postag[:2]': 'NN',
  '+1:word.istitle()': False,
  '+1:word.isupper()': False,
  '+1:word.lower()': 'huntsmen',
  'BOS': True,
  'bias': 1.0,
  'postag': 'DT',
  'postag[:2]': 'DT',
  'word.isdigit()': False,
  'word.istitle()': True,
  'word.isupper()': False,
  'word.lower()': 'the',
  'word[-2:]': 'he',
  'word[-3:]': 'The'},
 {'+1:postag': ',',
  '+1:postag[:2]': ',',
  '+1:word.istitle()': False,
  '+1:word.isupper()': False,
  '+1:word.lower()': ',',
  '-1:postag': 'DT',
  '-1:postag[:2]': 'DT',
  '-1:word.istitle()': True,
  '-1:word.isupper()': False,
  '-1:word.lower()': 'the',
  'bias': 1.0,
  'postag': 'NNS',
  'postag[:2]': 'NN',
  'word.isdigit()': False,
  'word.istitle()': False,
  'word.isupper()': False,
  'word.lower()': 'huntsmen',
  'word[-2:]': 'en',
  'word[-3:]': 'men'},
 {'+1:postag': 'IN',
  '+1:postag[:2]': 'IN',
  '+1:word.istitle()': False,
  '+1:word.isupper()': False,
  '+1:word.lower()': 'in',
  '-1:postag': 'NNS',
  '-1:postag

In [26]:
import spacy
from nltk.tokenize.treebank import TreebankWordDetokenizer

nlp = spacy.load("en_core_web_sm")

In [53]:
def missing_systematic_labels(X, y):
    y_new = []
    error_array = []
    
    counter = 0
    erasing = False
    
    for i in range(len(y)):
        if i%1000 == 0:
            print(i)
        error_array.append(list())
        y_new.append(list())
        sentence = TreebankWordDetokenizer().detokenize([t['word.lower()'] for t in X[i]])
        doc = nlp(sentence)
        ents = list(e.text for e in doc.ents)
        word_ents = list()
        for e in ents:
            word_ents.extend(e.split(' '))
        
        for j in range(len(y[i])):            
            current_tag = y[i][j]
            tagset = ['B-org', 'B-per', 'B-tim', 'B-geo', 
                   'I-org', 'I-per', 'I-tim', 'I-geo'] 
            if current_tag in tagset and not(X[i][j]['word.lower()'] in word_ents):
                y_new[i].append('O')
                error_array[i].append(True)                
                erasing = True
                
            else:
                error_array[i].append(False)
                y_new[i].append(current_tag)
                erasing = False
        
    return y_new, error_array

In [54]:
y_train_new, error_train_array = missing_systematic_labels(X_train, y_train)

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000


In [57]:
t = sum([len(a) for a in error_train_array]); print('Num tags:', t)
e = sum([sum(a) for a in error_train_array]); print('Num errs:', e)
e/t

Num tags: 817737
Num errs: 23702


0.028984869218342816

In [58]:
crf = CRF(algorithm = 'lbfgs',
         c1 = 0.1,
         c2 = 0.1,
         max_iterations = 100,
         all_possible_transitions = False)

In [59]:
crf.fit(X_train, y_train_new)
y_pred = crf.predict(X_test)
f1_score = flat_f1_score(y_test, y_pred, average = 'macro')
print(f1_score)

0.7705203787037477


In [60]:
report = flat_classification_report(y_test, y_pred, output_dict=True)
print(flat_classification_report(y_test, y_pred))

[autoreload of scripts failed: Traceback (most recent call last):
  File "/usr/lib/python3/dist-packages/IPython/extensions/autoreload.py", line 247, in check
    superreload(m, reload, self.old_objects)
  File "/home/a12d/dataopt-private/sequential/scripts.py", line 186
    from nltk.tokenize.treebank import TreebankWordDetokenizer
    ^
IndentationError: unexpected indent
]


              precision    recall  f1-score   support

       B-geo       0.87      0.84      0.86      7563
       B-gpe       0.97      0.94      0.96      3236
       B-org       0.84      0.56      0.67      4035
       B-per       0.80      0.34      0.47      3436
       B-tim       0.93      0.85      0.89      4061
       I-geo       0.83      0.70      0.76      1470
       I-gpe       0.90      0.49      0.63        53
       I-org       0.88      0.53      0.66      3405
       I-per       0.88      0.83      0.85      3473
       I-tim       0.88      0.63      0.74      1259
           O       0.96      1.00      0.98    178231

    accuracy                           0.95    210222
   macro avg       0.89      0.70      0.77    210222
weighted avg       0.95      0.95      0.95    210222



In [61]:
pr, re, f1 = gtc_with_x_y_neighboring_y(X_val, y_val, num_x_keys=None)

NameError: name 'measure_method' is not defined