In [1]:
%load_ext autoreload
%autoreload 2 

import os
import numpy as np
import pickle

from tqdm import tqdm

import torch
import torch.utils.data
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence

from collections import OrderedDict

import nltk
from nltk.tokenize import word_tokenize

from embeddings import load_embeddings, load_vocab
from load_conll import load_conll03
from loader import prepare_sentence, tag_mapping, cap_feature, CoNLLDataset, pad_list
from model import Tagger, cuda
from torch_utils import prepare_sequence, prepare_sequence_float, tensor
from utils import sent2seq, sent2chars, word_index, char_index, add_unknown_last, zero_digits
from eval import eval, micro_precision_recall_f1_accuracy, eval_metrics, eval_metrics_crf, save_plot

Using TensorFlow backend.


In [2]:
#Parameters
torch.manual_seed(0)

parameters = OrderedDict()

parameters["lr"] = 0.1
parameters["optimizer"] = "SGD"
parameters["hidden_size"] = 200
parameters["pre_emb"] = "glove"

parameters["w_embed_size"] = 300
# parameters["dim_cap"] = 10

parameters["batch_size"] = 20
parameters["load_embeds"] = True
parameters["dropout"] = 0.5
parameters["gradient_clipping"] = 0
parameters["crf"] = True

epochs = 1000
zero_digit = True

assert parameters["pre_emb"] in ["glove", "google"]
assert not parameters["pre_emb"] == "google" or parameters["w_embed_size"] == 300

param_str = "-".join(["%s:%s" % (str(k), str(v)) for (k,v) in parameters.items()]).lower()
print(param_str)

lr:0.1-optimizer:sgd-hidden_size:200-pre_emb:glove-w_embed_size:300-batch_size:20-load_embeds:true-dropout:0.5-gradient_clipping:0-crf:true


# 1. Data Preprocessing

### Load pretrained embeddings

In [3]:
if parameters["pre_emb"] == "glove":
    embeddings_path = "word_embeddings/glove.6B/glove.6B.%sd_w2vformat.txt" % parameters["w_embed_size"]
    binary = False
else:
    embeddings_path = "word_embeddings/google/GoogleNews-vectors-negative300.bin"
    binary = True
    
if parameters["load_embeds"]:
    loaded_embeddings, (w2idx, idx2w) = load_embeddings(embeddings_path, binary=binary)
else:
    parameters["freeze"] = 0

Loading from saved embeddings
Loading vocab


### Load CoNLL

In [4]:
# CoNLL03
sents_train_03, pos_train_03, chunk_train_03, ner_train_03 = load_conll03(["cleaned_eng.train"])
sents_dev_03, pos_dev_03, chunk_train_03, ner_dev_03 = load_conll03(["cleaned_eng.testa"])
sents_test_03, pos_test_03, chunk_train_03, ner_test_03 = load_conll03(["cleaned_eng.testb"])

print("Train %s, Dev %s, Test %s" % (len(sents_train_03), len(sents_dev_03), len(sents_test_03)))

Loaded CoNLL03 in 1.8718538284301758 seconds
Loaded CoNLL03 in 0.43241214752197266 seconds
Loaded CoNLL03 in 0.4268953800201416 seconds
Train 14041, Dev 3250, Test 3453


In [5]:
if zero_digit:
    sents_train_03 = [[zero_digits(w) for w in s] for s in sents_train_03]
    sents_test_03 = [[zero_digits(w) for w in s] for s in sents_test_03]    
    sents_dev_03 = [[zero_digits(w) for w in s] for s in sents_dev_03]    

In [6]:
sents_train = np.concatenate([sents_train_03, sents_dev_03, sents_test_03])

In [7]:
w2idx_train, idx2w_train = word_index(sents_train)
w2idx_train, idx2w_train = add_unknown_last(w2idx_train, idx2w_train)

X_train_03 = sent2seq(sents_train_03, w2idx_train)
X_dev_03 = sent2seq(sents_dev_03, w2idx_train)
X_test_03 = sent2seq(sents_test_03, w2idx_train)

idner_train, ner2idx, idx2ner = tag_mapping(ner_train_03)
idner_dev = tag_mapping(ner_dev_03, ner2idx)
idner_test = tag_mapping(ner_test_03, ner2idx)

num_ner_classes = len(ner2idx)

Found 9 unique named entity tags


### Data Pipeline

In [8]:
class CoNLLDataset(torch.utils.data.Dataset):
    def __init__(self, X, y, lens):
        self.data = X
        self.labels = y
        self.lens = lens
        
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx], self.lens[idx]

    def __len__(self):
        return len(self.data)

In [9]:
def pad_list(x, padding_idx=0):
    lens = [len(s) for s in x]
    maxlen = max(lens)    
    sorted_indices = sorted(range(len(lens)), key=lambda k: lens[k], reverse=True)
    
    batch = padding_idx * torch.ones(len(x), maxlen).long()
    
    for i, idx in enumerate(sorted_indices):
        batch[i, :lens[idx]] = torch.LongTensor(x[idx])
    
    return batch, sorted(lens, reverse=True), sorted_indices

In [10]:
data_train, lens_train, sorted_train = pad_list(X_train_03)
labels_train, _, _ = pad_list(idner_train)

data_dev, lens_dev, sorted_dev = pad_list(X_dev_03)
labels_dev, _, _ = pad_list(idner_dev)

data_test, lens_test, sorted_test = pad_list(X_test_03)
labels_test, _, _ = pad_list(idner_test)

In [11]:
dataset_train = CoNLLDataset(data_train, labels_train, lens_train)
loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=parameters["batch_size"], num_workers=4)

dataset_dev = CoNLLDataset(data_dev, labels_dev, lens_dev)
loader_dev = torch.utils.data.DataLoader(dataset_dev, batch_size=parameters["batch_size"], num_workers=4)

dataset_test = CoNLLDataset(data_test, labels_test, lens_test)
loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=parameters["batch_size"], num_workers=4)

In [12]:
idner_dev = [idner_dev[i] for i in sorted_dev]
idner_test = [idner_test[i] for i in sorted_test]

sents_dev_03 = [sents_dev_03[i] for i in sorted_dev]
sents_test_03 = [sents_test_03[i] for i in sorted_test]

In [13]:
# for x, y, l in loader:
#     packed = pack_padded_sequence(autograd.Variable(x), l.numpy(), batch_first=True)
#     batch, lens = pad_packed_sequence(packed, batch_first=True)

### Fit word embeddings to vocabulary

In [14]:
embeddings = np.random.normal(scale=0.001, size=(len(w2idx_train), parameters["w_embed_size"]))

if parameters["load_embeds"]:
    for w, i in w2idx_train.items():
        idx = w2idx.get(w)
        if idx is not None:
            embeddings[i] = loaded_embeddings[idx][:parameters["w_embed_size"]]

### Capitalization features

In [15]:
# cap_train_03 = [[cap_feature(w) for w in s] for s in sents_train_03]
# cap_test_03 = [[cap_feature(w) for w in s] for s in sents_test_03]
# cap_dev_03 = [[cap_feature(w) for w in s] for s in sents_dev_03]    

In [16]:
# if parameters["dim_cap"]:
#     n_cap = 4
#     cap_embeddings = np.random.normal(scale=0.001, size=(n_cap, parameters["dim_cap"]))

# 2. Training 

### Defining model

In [17]:
if not "<START>" in idx2ner.values():
    idx2ner[len(idx2ner)] = "<START>"
    idx2ner[len(idx2ner)] = "<STOP>"
    ner2idx = {v:k for (k,v) in idx2ner.items()}

idx2ner

{0: 'O',
 1: 'B-LOC',
 2: 'B-PER',
 3: 'B-ORG',
 4: 'I-PER',
 5: 'I-ORG',
 6: 'B-MISC',
 7: 'I-LOC',
 8: 'I-MISC',
 9: '<START>',
 10: '<STOP>'}

In [18]:
model = Tagger(tensor(embeddings),  parameters["hidden_size"], idx2ner, dropout=parameters["dropout"],
               crf=parameters["crf"])

trainable_parameters = filter(lambda p: p.requires_grad, model.parameters())

if parameters["optimizer"].lower() == "adam":
    optimizer = optim.Adam(trainable_parameters, lr= parameters["lr"])
elif parameters["optimizer"].lower() == "sgd":
    optimizer = optim.SGD(trainable_parameters, lr= parameters["lr"])

if torch.cuda.is_available():
    model.cuda()

In [20]:
reload = 1
model_path = "models/ner_crf_batch/%s/" % param_str

if not os.path.exists(model_path):
    os.makedirs(model_path)
    
if reload and os.path.exists(model_path + "last_state_dict"):
    model.load_state_dict(torch.load(model_path + "last_state_dict"))
    model = model.cuda()
    with open(model_path + "metrics.p", "rb") as file:
        metrics = pickle.load(file)
    best_ner = np.max(metrics["ner"]["val_loss_dev"])
    
else:
    metrics = {"ner":{"precision":[], "recall":[], "f1":[], "accuracy":[], "ent_f1":[], "loss": [], "val_loss_dev": [],
                      "precision_test":[], "recall_test":[], "f1_test":[], "accuracy_test":[], "ent_f1_test":[], 
                      "val_loss_test": []}}
    best_ner = np.inf

### Training 

In [None]:
for epoch in range(epochs):
    print("Epoch %s/%s :" % (epoch+1, epochs))

    losses = []
    val_loss_epoch = []
    loss_epoch = []
    
    test_lim = 3500 
    for i, (sentences, tags, lens) in enumerate(tqdm(loader_train)):
#     test_lim = 10
#     for i in tqdm(range(10)):
                      
        sentences_in = autograd.Variable(cuda(sentences[:,:lens.numpy()[0]]))
        targets = autograd.Variable(cuda(tags[:,:lens.numpy()[0]]))
        
        packed_targets = pack_padded_sequence(targets, lens.numpy(), batch_first=True) 
        
        if parameters["crf"]:        
            loss = model.neg_log_likelihood(sentences_in, lens, targets,
                                            gradient_clipping=parameters["gradient_clipping"])
        else:
            scores = model(sentences_in, lens)
            padded_scores = pad_packed_sequence(scores, batch_first=True) 
            
#             print(padded_scores[0].size())
#             print(targets.size())
            
            loss = nn.CrossEntropyLoss()(padded_scores[0].contiguous().view(-1,len(ner2idx)),
                                                          targets.contiguous().view(-1))

        loss.backward()        
        optimizer.step()
        
        losses.append(loss.cpu().data.numpy())
    
    preds_dev, ner_loss_dev = model.test(loader_dev)  
    
    eval_metrics_crf(preds_dev, metrics, idner_dev[:test_lim], sents_dev_03[:test_lim], 
                     ner2idx, idx2ner, model_path, dev=True)
    
    val_loss_epoch = ner_loss_dev
    
    preds_test, ner_loss_test = model.test(loader_test)     
    
    eval_metrics_crf(preds_test, metrics, idner_test[:test_lim], sents_test_03[:test_lim], 
                         ner2idx, idx2ner, model_path)
    
    loss_epoch = np.mean(losses)
    
    print("Loss :  NER %s" % (loss_epoch))
    print("Dev loss : NER %s" % (val_loss_epoch))
    print("Test loss : NER %s" % (ner_loss_test))
    
    torch.save(model.state_dict(), model_path + "last_state_dict")
    if ner_loss_dev < best_ner: 
        print("New best score on dev.")
        print("Saving model...")
        torch.save(model.state_dict(), model_path + "best_state_dict")
        
        best_ner = ner_loss_dev
    
    metrics["ner"]["val_loss_dev"].append(val_loss_epoch)
    metrics["ner"]["val_loss_test"].append(ner_loss_test)    
    metrics["ner"]["loss"].append(loss_epoch)    

    # Save learning curve
    save_plot(metrics, model_path)
    with open(model_path + "metrics.p", "wb") as file:
        pickle.dump(metrics, file)

print("Done")

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 1/1000 :


100%|██████████| 703/703 [02:57<00:00,  3.96it/s]  
163it [00:18, 38.06it/s]


ner : p 0.976183486957, r 0.977771679473, f 0.976976937766, acc 0.9711849227055022
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42609     30     41     26      5     22     12      4     10      0      0   99.649
 1  B-LOC   1837     73   1692     27     30      2      4      6      3      0      0      0   92.107
 2  B-PER   1842    103     13   1666     43     13      3      1      0      0      0      0   90.445
 3  B-ORG   1341    105     40     64   1093      4     19     15      1      0      0      0   81.506
 4  I-PER   1307     50      0      7      0   1221     28      0      0      1      0      0   93.420
 5  I-ORG    751     59      5      3     21     16    622      3     12     10      0      0   82.823
 6 B-MISC    922     91     13     26     27      1      4    758      0      2      0      0   82.213
 7  I-LOC    257     10      2      0      0      6     17      0    221      1      0      0

173it [00:17,  9.87it/s]


ner : p 0.959013734184, r 0.95934572362, f 0.959179700175, acc 0.9548831700226122
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37915     41     91     79     25     70     47     16     39      0      0   98.935
 1  B-LOC   1668     82   1479     26     48      4      3     19      4      3      0      0   88.669
 2  B-PER   1617    140     21   1311    110     30      1      1      2      1      0      0   81.076
 3  B-ORG   1661    199     88     97   1216      1     29     31      0      0      0      0   73.209
 4  I-PER   1156     53      0      3      4   1041     49      4      2      0      0      0   90.052
 5  I-ORG    835     68      5      0     20     33    658      5     29     17      0      0   78.802
 6 B-MISC    702    117     24     16     22      0      3    513      0      7      0      0   73.077
 7  I-LOC    257     19      2      0      0      9     20      0    207      0      0      0 

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 2/1000 :


100%|██████████| 703/703 [00:58<00:00, 12.12it/s]
163it [00:18,  8.89it/s]


ner : p 0.975654624447, r 0.977222832053, f 0.976438098596, acc 0.9706397725945252
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42630     35     22     23      3     22     11      4      9      0      0   99.698
 1  B-LOC   1837     69   1701     13     37      2      4      8      3      0      0      0   92.597
 2  B-PER   1842    124     25   1627     49     12      3      2      0      0      0      0   88.328
 3  B-ORG   1341    119     45     55   1090      4     15     12      1      0      0      0   81.283
 4  I-PER   1307     55      0      8      0   1219     22      0      1      2      0      0   93.267
 5  I-ORG    751     71      6      3     24     18    605      2     12     10      0      0   80.559
 6 B-MISC    922    101     16     15     22      1      3    762      0      2      0      0   82.646
 7  I-LOC    257      9      3      0      0     10     13      0    220      2      0      0

173it [00:16, 10.24it/s]


ner : p 0.95806479519, r 0.95845864255, f 0.958261678402, acc 0.9540002153547971
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37941     58     65     75     19     60     46     19     40      0      0   99.003
 1  B-LOC   1668     79   1497     18     47      3      3     19      1      1      0      0   89.748
 2  B-PER   1617    177     29   1249    127     29      1      2      2      1      0      0   77.242
 3  B-ORG   1661    217    103     66   1220      1     28     25      1      0      0      0   73.450
 4  I-PER   1156     68      0      3      3   1033     43      4      2      0      0      0   89.360
 5  I-ORG    835     74      4      0     28     33    646      5     29     16      0      0   77.365
 6 B-MISC    702    123     24     14     19      0      3    512      0      7      0      0   72.934
 7  I-LOC    257     21      3      0      0     10     22      0    201      0      0      0  

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 3/1000 :


100%|██████████| 703/703 [00:58<00:00, 12.12it/s]
163it [00:17, 37.22it/s]


ner : p 0.975478972191, r 0.977066018504, f 0.976271850365, acc 0.9704840154199603
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42618     26     36     27      1     19     18      4     10      0      0   99.670
 1  B-LOC   1837     82   1687     19     33      2      3      8      3      0      0      0   91.835
 2  B-PER   1842    110     16   1652     49     10      3      2      0      0      0      0   89.685
 3  B-ORG   1341    129     38     57   1085      4     13     14      1      0      0      0   80.910
 4  I-PER   1307     52      0      7      1   1225     20      0      0      2      0      0   93.726
 5  I-ORG    751     72      5      3     25     20    598      2     15     11      0      0   79.627
 6 B-MISC    922     96     14     22     20      1      3    765      0      1      0      0   82.972
 7  I-LOC    257     12      0      0      0     11     16      0    216      2      0      0

173it [00:16, 10.59it/s]


ner : p 0.958126960095, r 0.95845864255, f 0.958292772622, acc 0.9540002153547971
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37921     44     89     80     39     44     49     15     42      0      0   98.951
 1  B-LOC   1668     87   1469     27     55      6      1     21      1      1      0      0   88.070
 2  B-PER   1617    167     20   1282    113     30      0      2      2      1      0      0   79.283
 3  B-ORG   1661    214     88     83   1214      1     24     36      1      0      0      0   73.089
 4  I-PER   1156     60      0      4      4   1054     30      3      1      0      0      0   91.176
 5  I-ORG    835     76      5      0     25     37    642      5     27     18      0      0   76.886
 6 B-MISC    702    117     20     19     20      0      3    518      0      5      0      0   73.789
 7  I-LOC    257     19      3      2      0     10     24      0    199      0      0      0 

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 4/1000 :


100%|██████████| 703/703 [00:56<00:00, 12.46it/s]
163it [00:17,  9.11it/s]


ner : p 0.975785929609, r 0.977124823585, f 0.976454917631, acc 0.9705424243604222
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42628     28     31     23      4     14     16      4     11      0      0   99.694
 1  B-LOC   1837     81   1695     15     34      1      0      8      3      0      0      0   92.270
 2  B-PER   1842    122     15   1644     48      7      4      2      0      0      0      0   89.251
 3  B-ORG   1341    120     37     54   1094      2     13     19      2      0      0      0   81.581
 4  I-PER   1307     70      0      8      2   1215     10      1      0      1      0      0   92.961
 5  I-ORG    751     77      6      3     26     22    584      3     15     15      0      0   77.763
 6 B-MISC    922     95     12     15     21      1      2    775      0      1      0      0   84.056
 7  I-LOC    257     15      4      0      0      9     14      0    214      1      0      0

173it [00:16, 10.51it/s]


ner : p 0.958260343, r 0.958675003786, f 0.958467628545, acc 0.9542155701518251
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37982     39     74     65     22     34     49     20     38      0      0   99.110
 1  B-LOC   1668     85   1476     23     47      4      1     30      1      1      0      0   88.489
 2  B-PER   1617    175     21   1254    135     23      1      4      2      2      0      0   77.551
 3  B-ORG   1661    219     81     78   1221      2     20     40      0      0      0      0   73.510
 4  I-PER   1156     89      0      5      3   1030     25      3      1      0      0      0   89.100
 5  I-ORG    835     80      6      0     40     36    616      5     34     18      0      0   73.772
 6 B-MISC    702    117     16     15     20      0      2    527      0      5      0      0   75.071
 7  I-LOC    257     22      3      2      0     11     15      0    203      1      0      0   

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 5/1000 :


100%|██████████| 703/703 [00:57<00:00, 12.30it/s]
163it [00:19, 34.19it/s]


ner : p 0.97585714006, r 0.976909204955, f 0.976382889104, acc 0.9703282582453955
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42631     28     16     26      2     23     16      3     14      0      0   99.701
 1  B-LOC   1837     85   1682     15     38      1      4      9      3      0      0      0   91.562
 2  B-PER   1842    141     18   1608     61      8      4      2      0      0      0      0   87.296
 3  B-ORG   1341    123     35     44   1097      2     16     22      0      2      0      0   81.805
 4  I-PER   1307     43      0      7      1   1222     32      0      0      2      0      0   93.497
 5  I-ORG    751     61      5      3     22     20    610      2      9     19      0      0   81.225
 6 B-MISC    922     97     10     10     26      1      4    774      0      0      0      0   83.948
 7  I-LOC    257     10      3      0      0      7     20      0    214      3      0      0 

173it [00:16, 10.41it/s]


ner : p 0.958004112999, r 0.957506653108, f 0.957755318458, acc 0.9530526542478733
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37933     37     53     90     30     62     51     15     52      0      0   98.982
 1  B-LOC   1668     81   1465     26     52      4      5     28      3      4      0      0   87.830
 2  B-PER   1617    205     24   1199    152     28      1      3      2      3      0      0   74.150
 3  B-ORG   1661    216     73     60   1232      2     29     48      0      1      0      0   74.172
 4  I-PER   1156     58      0      4      3   1038     49      2      1      1      0      0   89.792
 5  I-ORG    835     64      4      0     23     28    670      5     22     19      0      0   80.240
 6 B-MISC    702    112     19     10     22      0      3    525      0     11      0      0   74.786
 7  I-LOC    257     17      4      1      0      8     30      0    193      4      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 6/1000 :


100%|██████████| 703/703 [00:58<00:00, 11.94it/s]
163it [00:18,  8.94it/s]


ner : p 0.97546312615, r 0.976419162616, f 0.975940910249, acc 0.9698415170748803
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42619     31     23     24      1     23     18      4     16      0      0   99.673
 1  B-LOC   1837     82   1687     12     37      2      5      9      3      0      0      0   91.835
 2  B-PER   1842    142     26   1605     52     10      5      2      0      0      0      0   87.134
 3  B-ORG   1341    128     33     39   1094      1     18     26      0      2      0      0   81.581
 4  I-PER   1307     57      0      6      1   1212     27      0      2      2      0      0   92.731
 5  I-ORG    751     64      5      2     23     18    607      2     12     18      0      0   80.826
 6 B-MISC    922     96     14      9     26      1      2    773      0      1      0      0   83.839
 7  I-LOC    257      9      2      0      0      8     20      0    216      2      0      0 

173it [00:17, 35.62it/s]


ner : p 0.957361153197, r 0.957009022264, f 0.957185055345, acc 0.9525573382147088
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37955     43     57     77     19     57     49     16     50      0      0   99.040
 1  B-LOC   1668     92   1476     13     47      3      4     30      2      1      0      0   88.489
 2  B-PER   1617    211     25   1191    143     34      1      6      3      3      0      0   73.655
 3  B-ORG   1661    221     84     66   1216      1     26     46      0      1      0      0   73.209
 4  I-PER   1156     74      0      1      3   1024     46      3      3      2      0      0   88.581
 5  I-ORG    835     73      6      0     25     27    649      5     29     21      0      0   77.725
 6 B-MISC    702    121     18      7     20      0      3    523      0     10      0      0   74.501
 7  I-LOC    257     17      4      0      0     10     23      0    198      5      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 7/1000 :


100%|██████████| 703/703 [00:58<00:00, 11.93it/s]
163it [00:18,  9.04it/s]


ner : p 0.976856203008, r 0.977928493022, f 0.977392053914, acc 0.971340679880067
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42623     27     32     23      2     18     17      3     14      0      0   99.682
 1  B-LOC   1837     68   1700     15     40      2      1      8      3      0      0      0   92.542
 2  B-PER   1842     99     23   1644     58     11      4      2      0      1      0      0   89.251
 3  B-ORG   1341    107     38     47   1102      3     16     24      1      3      0      0   82.177
 4  I-PER   1307     45      0      8      0   1228     25      0      0      1      0      0   93.956
 5  I-ORG    751     60      7      3     23     18    610      2     10     18      0      0   81.225
 6 B-MISC    922     91     14     16     26      2      1    771      0      1      0      0   83.623
 7  I-LOC    257      9      2      0      0      7     24      0    212      3      0      0 

173it [00:18,  9.21it/s]


ner : p 0.959379328255, r 0.959150998507, f 0.959265149794, acc 0.9546893507052869
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37951     37     70     80     20     51     47     20     47      0      0   99.029
 1  B-LOC   1668     77   1476     20     57      4      4     27      2      1      0      0   88.489
 2  B-PER   1617    158     24   1252    145     28      0      5      2      3      0      0   77.427
 3  B-ORG   1661    199     88     71   1234      3     25     39      0      2      0      0   74.293
 4  I-PER   1156     53      0      4      3   1046     42      4      2      2      0      0   90.484
 5  I-ORG    835     73      5      0     27     27    657      5     23     18      0      0   78.683
 6 B-MISC    702    115     21     11     24      0      3    521      0      7      0      0   74.217
 7  I-LOC    257     17      3      2      0      8     29      0    194      4      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 8/1000 :


100%|██████████| 703/703 [01:00<00:00, 11.56it/s]
163it [00:18,  9.00it/s]


ner : p 0.976275300474, r 0.977614865924, f 0.976944624003, acc 0.9710291655309373
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42627     27     33     19      1     22     17      2     11      0      0   99.691
 1  B-LOC   1837     77   1692     13     39      2      3      8      3      0      0      0   92.107
 2  B-PER   1842    118     15   1636     56     11      4      2      0      0      0      0   88.817
 3  B-ORG   1341    116     29     51   1107      4     13     20      0      1      0      0   82.550
 4  I-PER   1307     51      0      7      2   1227     19      0      0      1      0      0   93.879
 5  I-ORG    751     66      6      3     26     21    602      2     11     14      0      0   80.160
 6 B-MISC    922     96     10     14     27      2      1    771      0      1      0      0   83.623
 7  I-LOC    257     14      1      0      0     10     19      0    212      1      0      0

173it [00:16, 10.38it/s]


ner : p 0.958594730238, r 0.958739912157, f 0.958667315701, acc 0.9542801765909336
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37963     38     71     79     24     44     46     17     41      0      0   99.061
 1  B-LOC   1668     78   1463     27     56      6      5     28      3      2      0      0   87.710
 2  B-PER   1617    174     18   1241    145     30      1      4      1      3      0      0   76.747
 3  B-ORG   1661    199     71     76   1242      3     26     44      0      0      0      0   74.774
 4  I-PER   1156     65      0      2      2   1043     38      4      1      1      0      0   90.225
 5  I-ORG    835     75      5      0     29     34    645      5     25     17      0      0   77.246
 6 B-MISC    702    119     17     11     22      0      3    523      0      7      0      0   74.501
 7  I-LOC    257     17      4      0      0     11     30      0    192      3      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 9/1000 :


100%|██████████| 703/703 [01:00<00:00, 11.68it/s]
163it [00:18,  8.98it/s]


ner : p 0.976992813926, r 0.978046103183, f 0.977519174821, acc 0.9714574977609907
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42608     32     36     24      4     25     16      2     12      0      0   99.647
 1  B-LOC   1837     63   1702     15     39      2      5      8      3      0      0      0   92.651
 2  B-PER   1842     92     17   1660     59      9      4      1      0      0      0      0   90.119
 3  B-ORG   1341     95     39     59   1099      3     18     25      1      2      0      0   81.954
 4  I-PER   1307     43      0      9      2   1228     24      0      0      1      0      0   93.956
 5  I-ORG    751     63      6      3     21     18    609      3     10     18      0      0   81.092
 6 B-MISC    922     81     16     18     28      1      3    774      0      1      0      0   83.948
 7  I-LOC    257     11      3      0      0      6     19      0    216      2      0      0

173it [00:16, 10.36it/s]


ner : p 0.959668520241, r 0.959626993228, f 0.959647756285, acc 0.9551631312587487
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37915     48     84     90     25     56     47     15     43      0      0   98.935
 1  B-LOC   1668     71   1490     22     49      5      4     25      0      2      0      0   89.329
 2  B-PER   1617    139     24   1289    130     24      0      6      2      3      0      0   79.716
 3  B-ORG   1661    165     94     94   1234      3     25     46      0      0      0      0   74.293
 4  I-PER   1156     57      0      4      2   1046     40      4      2      1      0      0   90.484
 5  I-ORG    835     67      6      0     25     36    651      5     27     18      0      0   77.964
 6 B-MISC    702    101     19     17     26      0      3    527      0      9      0      0   75.071
 7  I-LOC    257     19      4      0      0      7     23      0    201      3      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 10/1000 :


100%|██████████| 703/703 [01:01<00:00, 11.46it/s]
163it [00:18,  8.99it/s]


ner : p 0.976130800862, r 0.977164026972, f 0.976647140646, acc 0.9705813636540633
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42619     27     35     21      2     17     19      3     16      0      0   99.673
 1  B-LOC   1837     72   1701     13     37      2      1      8      3      0      0      0   92.597
 2  B-PER   1842    120     23   1638     44      8      4      4      0      1      0      0   88.925
 3  B-ORG   1341    117     44     55   1083      2     15     21      1      3      0      0   80.761
 4  I-PER   1307     57      0      8      0   1220     20      0      0      2      0      0   93.344
 5  I-ORG    751     70      7      3     21     18    598      2     15     17      0      0   79.627
 6 B-MISC    922     94     15     15     20      1      2    775      0      0      0      0   84.056
 7  I-LOC    257     13      1      0      0     10     14      0    217      2      0      0

173it [00:16, 38.38it/s]


ner : p 0.958820219856, r 0.958675003786, f 0.958747606323, acc 0.9542155701518251
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37940     55     75     76     16     47     49     18     47      0      0   99.001
 1  B-LOC   1668     77   1491     23     45      4      2     23      2      1      0      0   89.388
 2  B-PER   1617    182     24   1268    101     30      0      7      2      3      0      0   78.417
 3  B-ORG   1661    199     99     84   1207      3     25     44      0      0      0      0   72.667
 4  I-PER   1156     68      0      2      4   1042     32      3      3      2      0      0   90.138
 5  I-ORG    835     75      9      0     29     29    638      5     31     19      0      0   76.407
 6 B-MISC    702    115     24      9     22      0      3    521      0      8      0      0   74.217
 7  I-LOC    257     21      5      0      0      7     19      0    202      3      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 11/1000 :


100%|██████████| 703/703 [01:01<00:00, 11.38it/s]
163it [00:19,  8.41it/s]


ner : p 0.976740543504, r 0.977889289635, f 0.977314579007, acc 0.9713017405864257
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42606     38     32     30      1     23     17      2     10      0      0   99.642
 1  B-LOC   1837     59   1704     16     39      2      6      8      3      0      0      0   92.760
 2  B-PER   1842    107     20   1628     67     14      4      2      0      0      0      0   88.382
 3  B-ORG   1341     84     43     52   1120      2     19     18      1      2      0      0   83.520
 4  I-PER   1307     48      0      8      2   1217     30      0      0      2      0      0   93.114
 5  I-ORG    751     53      5      3     22     17    623      2     11     15      0      0   82.956
 6 B-MISC    922     80     13     15     33      1      2    775      0      3      0      0   84.056
 7  I-LOC    257     10      2      0      0      8     20      0    215      2      0      0

173it [00:17,  9.77it/s]


ner : p 0.95860740869, r 0.958545187044, f 0.958576296857, acc 0.9540863572736082
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37920     59     57    100     17     59     52     15     44      0      0   98.948
 1  B-LOC   1668     69   1489     22     57      3      5     20      1      2      0      0   89.269
 2  B-PER   1617    171     28   1227    146     33      3      4      2      3      0      0   75.881
 3  B-ORG   1661    158    104     77   1255      1     30     35      0      1      0      0   75.557
 4  I-PER   1156     65      0      3      3   1030     48      4      2      1      0      0   89.100
 5  I-ORG    835     67      6      0     26     27    662      4     24     19      0      0   79.281
 6 B-MISC    702    104     22     13     28      0      3    524      0      8      0      0   74.644
 7  I-LOC    257     20      2      0      0      9     28      0    196      2      0      0 

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 12/1000 :


100%|██████████| 703/703 [01:02<00:00, 11.28it/s]
163it [00:18,  8.93it/s]


ner : p 0.977056048237, r 0.9783009252, f 0.977678090443, acc 0.9717106031696585
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42597     32     31     35      3     27     18      4     12      0      0   99.621
 1  B-LOC   1837     61   1707     12     38      2      6      8      3      0      0      0   92.923
 2  B-PER   1842     96     20   1646     58     15      4      3      0      0      0      0   89.359
 3  B-ORG   1341     87     38     52   1120      4     21     18      0      1      0      0   83.520
 4  I-PER   1307     41      0      8      0   1230     26      0      0      2      0      0   94.109
 5  I-ORG    751     51      5      3     21     19    625      2     11     14      0      0   83.222
 6 B-MISC    922     83     14     18     35      1      2    768      0      1      0      0   83.297
 7  I-LOC    257     11      1      0      0      8     20      0    216      1      0      0  

173it [00:16, 10.49it/s]


ner : p 0.958501914797, r 0.958501914797, f 0.958501914797, acc 0.9540432863142027
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37872     67     71    100     30     68     49     19     47      0      0   98.823
 1  B-LOC   1668     72   1487     26     52      5      3     21      0      2      0      0   89.149
 2  B-PER   1617    147     26   1261    140     32      2      4      2      3      0      0   77.984
 3  B-ORG   1661    171     90     85   1248      2     31     33      0      1      0      0   75.135
 4  I-PER   1156     44      0      3      3   1055     46      3      2      0      0      0   91.263
 5  I-ORG    835     63      4      0     24     29    667      5     25     18      0      0   79.880
 6 B-MISC    702    108     23     13     31      0      3    517      0      7      0      0   73.647
 7  I-LOC    257     16      2      1      0     11     30      0    194      3      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 13/1000 :


 37%|███▋      | 263/703 [00:37<00:34, 12.81it/s]

# Debug Tests

In [None]:
for x,y,l in loader_train:
    X = autograd.Variable(x)
    Y = autograd.Variable(y)
    L = l
    break

In [None]:
word_embeddings = nn.Embedding(len(w2idx_train), 300)

In [None]:
packed = pack_padded_sequence(X, l.numpy(), batch_first=True)

In [None]:
w = word_embeddings(X)

In [None]:
w.size()

In [None]:
p = pack_padded_sequence(w, l.numpy(), batch_first=True)

In [None]:
p

In [None]:
np.sum(p.batch_sizes)

In [None]:
np.sum(l.numpy())

In [None]:
print(l.numpy())
print(p.batch_sizes)