In [72]:
import numpy as np
import time
import os
import random
import sys
sys.path.insert(1, os.path.realpath(os.path.pardir))

from optparse import OptionParser
from pathlib import Path
from model.hmm import HiddenMarkov, load_dataset

def test_hmm(timesteps, use_features, which_features, self_train, dataset, entire_documents):
    start_time = time.time()
    naive_bayes = timesteps == 0
    if naive_bayes:
        timesteps = 1
        
    print('Fitting...')
    X1, Y1, T1 = load_dataset(dataset + '/train')
    X2, Y2, T2 = load_dataset(dataset + '/valid')
    X3, Y3, T3 = load_dataset(dataset + '/test')    
    training_set = [x for x in zip(X1 + X2 + X3, Y1 + Y2 + Y3, T1 + T2 + T3)]

    if entire_documents:
        documents = []
        for p in training_set:    
            if p[0][0][0] == '-DOCSTART-':
                documents.append([])
            else:
                documents[len(documents)-1].append(p)

        random.shuffle(documents)
        fold_size = len(documents) // 5
        
        folds = []
        for i in range(5):
            start = i * fold_size
            end = start + fold_size if (i < 4) else len(training_set)
            
            aux = []    
            for d in documents[start:end]:
                aux = aux + d
            folds.append(aux)
        print('Fold size:', fold_size)

    else:
        sentences = []
        for p in training_set:     
            if not p[0][0][0] == '-DOCSTART-':
                sentences.append(p)
        training_set = sentences
        random.shuffle(training_set)
        fold_size = len(training_set) // 5

        folds = []
        for i in range(5):
            start = i * fold_size
            end = start + fold_size if (i < 4) else len(training_set)
            folds.append(training_set[start:end])
        print('Fold size:', fold_size)
    
    for i in range(5):
        train = []        
        for j in range(5):        
            if i != j:
                train = train + folds[j]
        test = folds[i]
                
        map(list, zip(*train))
        train_X, train_Y, train_T = [list(t) for t in zip(*train)]
        
        map(list, zip(*test))
        test_X, test_Y, test_T = [list(t) for t in zip(*test)]
        
        hmm = HiddenMarkov(timesteps, naive_bayes=naive_bayes, use_features=use_features, self_train=self_train)
        hmm.fit(train_X, train_Y, which_features)

        t = test_Y
        p = hmm.predict(test_X)

        t = [[['O', 'B-PER', 'I-PER'][t__] for t__ in t_] for t_ in t]
        p = [[['O', 'B-PER', 'I-PER'][p__] for p__ in p_] for p_ in p]
        w = test_T

        name = 'fold_' + str(i)
        print('Writing', name)
        with Path('../results/score2/{}.preds.txt'.format(name)).open('wb') as f:
            for words, preds, tags in zip(w, p, t):
                f.write(b'\n')
                for word, pred, tag in zip(words, preds, tags):
                    f.write(' '.join([word, tag, pred]).encode() + b'\n')

    print('Elapsed time: %.4f' % (time.time() - start_time))

In [73]:
which_features = [
    0, # Token.    
    1, # Lower case token.
    0, # Exact match.
    0, # Partial match.
    0, # Log frequency name.
    0, # Log frequency word.
    0, # Email.
    0, # Number.
    0, # Honorific.
    0, # URL.
    0, # Capitalization.
    0, # Punctuation.
    0, # HTML
    0  # CSS
]

test_hmm(3, False, which_features, False, '../data/ner_on_html', True)

Fitting...
Fold size: 29
Writing fold_0
Writing fold_1
Writing fold_2
Writing fold_3
Writing fold_4
Elapsed time: 83.1888


In [74]:
!../conlleval < ../results/score2/fold_0.preds.txt
!../conlleval < ../results/score2/fold_1.preds.txt
!../conlleval < ../results/score2/fold_2.preds.txt
!../conlleval < ../results/score2/fold_3.preds.txt
!../conlleval < ../results/score2/fold_4.preds.txt

result = !../partial_eval.sh
result = [str(x[:5]) for x in str(result[0]).split('\t')]
result = [float(x) for x in result[:-1]]
avg_f1 = sum([result[i+2] for i in range(0, 15, 3)])/5.0 
print('F1:', avg_f1)

processed 36761 tokens with 2178 phrases; found: 2073 phrases; correct: 1519.
accuracy:  93.73%; precision:  73.28%; recall:  69.74%; FB1:  71.47
              PER: precision:  73.28%; recall:  69.74%; FB1:  71.47  2073
processed 32964 tokens with 1628 phrases; found: 1771 phrases; correct: 1020.
accuracy:  90.20%; precision:  57.59%; recall:  62.65%; FB1:  60.02
              PER: precision:  57.59%; recall:  62.65%; FB1:  60.02  1771
processed 28023 tokens with 1436 phrases; found: 1245 phrases; correct: 662.
accuracy:  90.45%; precision:  53.17%; recall:  46.10%; FB1:  49.38
              PER: precision:  53.17%; recall:  46.10%; FB1:  49.38  1245
processed 51034 tokens with 2973 phrases; found: 2795 phrases; correct: 1439.
accuracy:  92.77%; precision:  51.48%; recall:  48.40%; FB1:  49.90
              PER: precision:  51.48%; recall:  48.40%; FB1:  49.90  2795
processed 42977 tokens with 2119 phrases; found: 2587 phrases; correct: 1675.
accuracy:  93.82%; precision:  64.75%; reca

In [75]:
!../eval_model_2.sh

73.28%	69.74%	71.47%	57.59%	62.65%	60.02%	53.17%	46.10%	49.38%	51.48%	48.40%	49.90%	64.75%	79.05%	71.19%	77.69%	69.70%	73.48%	62.39%	63.39%	62.89%	56.79%	46.31%	51.02%	53.39%	49.21%	51.22%	71.42%	81.26%	76.03%	

Fold size: 29


KeyboardInterrupt: 