In [1]:
%load_ext autoreload
%autoreload 2 

import os
import numpy as np
import pickle

from tqdm import tqdm

import torch
import torch.utils.data
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence

from collections import OrderedDict

import nltk
from nltk.tokenize import word_tokenize

from embeddings import load_embeddings, load_vocab
from load_conll import load_conll03
from loader import prepare_sentence, tag_mapping, cap_feature, CoNLLDataset, pad_list
from model_char import Tagger, cuda
from torch_utils import prepare_sequence, prepare_sequence_float, tensor
from utils import sent2seq, sent2chars, word_index, char_index, add_unknown_last, zero_digits
from eval import eval, micro_precision_recall_f1_accuracy, eval_metrics, eval_metrics_crf, save_plot

Using TensorFlow backend.


In [2]:
#Parameters
torch.manual_seed(0)

parameters = OrderedDict()

parameters["lr"] = 0.001
parameters["optimizer"] = "Adam"
parameters["hidden_size"] = 300
parameters["pre_emb"] = "glove"

parameters["w_embed_size"] = 300
# parameters["dim_cap"] = 10

parameters["batch_size"] = 20

parameters["c_embed_size"] = 100
parameters["char_hidden_size"] = 100

parameters["load_embeds"] = True
parameters["dropout"] = 0.5
parameters["gradient_clipping"] = 0
parameters["crf"] = True

epochs = 1000
zero_digit = True

assert parameters["pre_emb"] in ["glove", "google"]
assert not parameters["pre_emb"] == "google" or parameters["w_embed_size"] == 300

param_str = "-".join(["%s:%s" % (str(k), str(v)) for (k,v) in parameters.items()]).lower()
print(param_str)

lr:0.001-optimizer:adam-hidden_size:300-pre_emb:glove-w_embed_size:300-batch_size:20-c_embed_size:100-char_hidden_size:100-load_embeds:true-dropout:0.5-gradient_clipping:0-crf:true


# 1. Data Preprocessing

### Load pretrained embeddings

In [3]:
if parameters["pre_emb"] == "glove":
    embeddings_path = "word_embeddings/glove.6B/glove.6B.%sd_w2vformat.txt" % parameters["w_embed_size"]
    binary = False
else:
    embeddings_path = "word_embeddings/google/GoogleNews-vectors-negative300.bin"
    binary = True
    
if parameters["load_embeds"]:
    loaded_embeddings, (w2idx, idx2w) = load_embeddings(embeddings_path, binary=binary)
else:
    parameters["freeze"] = 0

Loading from saved embeddings
Loading vocab


### Load CoNLL

In [4]:
# CoNLL03
sents_train_03, pos_train_03, chunk_train_03, ner_train_03 = load_conll03(["cleaned_eng.train"])
sents_dev_03, pos_dev_03, chunk_train_03, ner_dev_03 = load_conll03(["cleaned_eng.testa"])
sents_test_03, pos_test_03, chunk_train_03, ner_test_03 = load_conll03(["cleaned_eng.testb"])

print("Train %s, Dev %s, Test %s" % (len(sents_train_03), len(sents_dev_03), len(sents_test_03)))

Loaded CoNLL03 in 1.8940796852111816 seconds
Loaded CoNLL03 in 0.4967625141143799 seconds
Loaded CoNLL03 in 0.4694633483886719 seconds
Train 14041, Dev 3250, Test 3453


In [5]:
if zero_digit:
    sents_train_03 = [[zero_digits(w) for w in s] for s in sents_train_03]
    sents_test_03 = [[zero_digits(w) for w in s] for s in sents_test_03]    
    sents_dev_03 = [[zero_digits(w) for w in s] for s in sents_dev_03]    

In [6]:
sents_train = np.concatenate([sents_train_03, sents_dev_03, sents_test_03])

In [7]:
w2idx_train, idx2w_train = word_index(sents_train)
w2idx_train, idx2w_train = add_unknown_last(w2idx_train, idx2w_train)

X_train_03 = sent2seq(sents_train_03, w2idx_train)
X_dev_03 = sent2seq(sents_dev_03, w2idx_train)
X_test_03 = sent2seq(sents_test_03, w2idx_train)

idner_train, ner2idx, idx2ner = tag_mapping(ner_train_03)
idner_dev = tag_mapping(ner_dev_03, ner2idx)
idner_test = tag_mapping(ner_test_03, ner2idx)

num_ner_classes = len(ner2idx)

Found 9 unique named entity tags


## Character embeddings

In [8]:
c2idx, idx2c = char_index(sents_train)
c2idx, idx2c = add_unknown_last(c2idx, idx2c)
char_embeddings = np.random.normal(scale=0.001, size=(len(c2idx), parameters["c_embed_size"]))

In [9]:
chars_train_03 = sent2chars(sents_train_03, c2idx)
chars_dev_03 = sent2chars(sents_dev_03, c2idx)
chars_test_03 = sent2chars(sents_test_03, c2idx)

### Data Pipeline

In [10]:
class CoNLLDataset_chars(torch.utils.data.Dataset):
    def __init__(self, X, chars, y, lens, wlens, wsorted):
        self.words = X
        self.chars = chars
        self.labels = y
        self.lens = lens
        self.wlens = wlens
        self.wsorted = wsorted
        
    def __getitem__(self, idx):
        return self.words[idx], self.chars[idx], self.labels[idx], self.lens[idx], self.wlens[idx], self.wsorted[idx]

    def __len__(self):
        return len(self.words)

In [11]:
def pad_chars(chars, pad_index=0):
    lens_sents = [len(s) for s in chars]
    lens_words = [[len(w) for w in s] for s in chars]
    sorted_sents = sorted(range(len(lens_sents)), key=lambda k: lens_sents[k], reverse=True)
        
    maxlen_sent = max(lens_sents)
    maxlen = max(np.concatenate(lens_words))
    
    
    unrolled = []
    for s in chars:
        for w in s:
            unrolled.append(w)
    
    batch = pad_index * torch.ones(len(chars), int(maxlen_sent), int(maxlen)).long()
    sorted_indices = pad_index * torch.ones(len(chars), int(maxlen_sent)).long()
    wlens = pad_index * torch.ones(len(chars), int(maxlen_sent)).long()
    
    for i, s in enumerate(sorted_sents):
        ordered, _, sorted_ids = pad_list(chars[s], pad_index)
        for j, w in enumerate(ordered):
            batch[i, j, :lens_words[s][sorted_ids[j]]] = torch.LongTensor(w[:lens_words[s][sorted_ids[j]]])
            sorted_indices[i, :lens_sents[s]] = torch.LongTensor(sorted_ids)
            wlens[i, :lens_sents[s]] = torch.LongTensor(lens_words[s])
            
    return batch, wlens, sorted_indices

In [12]:
words_train, lens_train, sorted_train = pad_list(X_train_03)
chars_train, wlens_train, wsorted_train = pad_chars(chars_train_03)
labels_train, _, _ = pad_list(idner_train)

words_dev, lens_dev, sorted_dev = pad_list(X_dev_03)
chars_dev, wlens_dev, wsorted_dev = pad_chars(chars_dev_03)
labels_dev, _, _ = pad_list(idner_dev)

words_test, lens_test, sorted_test = pad_list(X_test_03)
chars_test, wlens_test, wsorted_test = pad_chars(chars_test_03)
labels_test, _, _ = pad_list(idner_test)

In [13]:
dataset_train = CoNLLDataset_chars(words_train, chars_train, labels_train, lens_train, wlens_train, wsorted_train)
loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=parameters["batch_size"], num_workers=0,
                                           pin_memory=True)

dataset_dev = CoNLLDataset_chars(words_dev, chars_dev,labels_dev, lens_dev, wlens_dev, wsorted_dev)
loader_dev = torch.utils.data.DataLoader(dataset_dev, batch_size=parameters["batch_size"], num_workers=0,
                                         pin_memory=True)

dataset_test = CoNLLDataset_chars(words_test, chars_test, labels_test, lens_test, wlens_test, wsorted_test)
loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=parameters["batch_size"], num_workers=0,
                                          pin_memory=True)

In [14]:
idner_dev = [idner_dev[i] for i in sorted_dev]
idner_test = [idner_test[i] for i in sorted_test]

sents_dev_03 = [sents_dev_03[i] for i in sorted_dev]
sents_test_03 = [sents_test_03[i] for i in sorted_test]

### Fit word embeddings to vocabulary

In [15]:
embeddings = np.random.normal(scale=0.001, size=(len(w2idx_train), parameters["w_embed_size"]))

if parameters["load_embeds"]:
    for w, i in w2idx_train.items():
        idx = w2idx.get(w)
        if idx is not None:
            embeddings[i] = loaded_embeddings[idx][:parameters["w_embed_size"]]

### Capitalization features

In [16]:
# cap_train_03 = [[cap_feature(w) for w in s] for s in sents_train_03]
# cap_test_03 = [[cap_feature(w) for w in s] for s in sents_test_03]
# cap_dev_03 = [[cap_feature(w) for w in s] for s in sents_dev_03]    

In [17]:
# if parameters["dim_cap"]:
#     n_cap = 4
#     cap_embeddings = np.random.normal(scale=0.001, size=(n_cap, parameters["dim_cap"]))

# 2. Training 

### Defining model

In [18]:
if not "<START>" in idx2ner.values():
    idx2ner[len(idx2ner)] = "<START>"
    idx2ner[len(idx2ner)] = "<STOP>"
    ner2idx = {v:k for (k,v) in idx2ner.items()}

idx2ner

{0: 'O',
 1: 'B-LOC',
 2: 'B-PER',
 3: 'B-ORG',
 4: 'I-PER',
 5: 'I-ORG',
 6: 'B-MISC',
 7: 'I-LOC',
 8: 'I-MISC',
 9: '<START>',
 10: '<STOP>'}

In [19]:
model = Tagger(tensor(embeddings),  parameters["hidden_size"], idx2ner, char_embeddings=tensor(char_embeddings),
               char_hidden_dim = parameters["char_hidden_size"], dropout=parameters["dropout"], 
               crf=parameters["crf"])

trainable_parameters = filter(lambda p: p.requires_grad, model.parameters())

if parameters["optimizer"].lower() == "adam":
    optimizer = optim.Adam(trainable_parameters, lr= parameters["lr"])
elif parameters["optimizer"].lower() == "sgd":
    optimizer = optim.SGD(trainable_parameters, lr= parameters["lr"])

if torch.cuda.is_available():
    model.cuda()

In [20]:
reload = 0
model_path = "models/ner/%s/" % param_str

if not os.path.exists(model_path):
    os.makedirs(model_path)
    
if reload and os.path.exists(model_path + "last_state_dict"):
    model.load_state_dict(torch.load(model_path + "last_state_dict"))
    model = model.cuda()
    with open(model_path + "metrics.p", "rb") as file:
        metrics = pickle.load(file)
    best_ner = np.max(metrics["ner"]["val_loss_dev"])
    
else:
    metrics = {"ner":{"precision":[], "recall":[], "f1":[], "accuracy":[], "ent_f1":[], "loss": [], "val_loss_dev": [],
                      "precision_test":[], "recall_test":[], "f1_test":[], "accuracy_test":[], "ent_f1_test":[], 
                      "val_loss_test": []}}
    best_ner = np.inf

### Training 

In [None]:
for epoch in range(epochs):
    print("Epoch %s/%s :" % (epoch+1, epochs))

    losses = []
    val_loss_epoch = []
    loss_epoch = []
    
    test_lim = 3500 
    for i, (words, chars, tags, lens, wlens, wsorted) in enumerate(tqdm(loader_train)):
#     test_lim = 10
#     for i in tqdm(range(10)):
                      
        words_in = autograd.Variable(cuda(words[:,:lens.numpy()[0]]))
        chars_in = autograd.Variable(cuda(chars[:,:lens.numpy()[0]]))
        targets = autograd.Variable(cuda(tags[:,:lens.numpy()[0]]))
        
#         packed_targets = pack_padded_sequence(targets, lens.numpy(), batch_first=True) 
        
        if parameters["crf"]:        
            loss = model.neg_log_likelihood(words_in, lens, targets, chars=chars_in, wlens=wlens, wsorted=wsorted,
                                            gradient_clipping=parameters["gradient_clipping"])
        else:
            scores = model(sentences_in, lens)
            padded_scores = pad_packed_sequence(scores, batch_first=True) 
            
#             print(padded_scores[0].size())
#             print(targets.size())
            
            loss = nn.CrossEntropyLoss()(padded_scores[0].contiguous().view(-1,len(ner2idx)),
                                                          targets.contiguous().view(-1))

        loss.backward()        
        optimizer.step()
        
        losses.append(loss.cpu().data.numpy())
    
    preds_dev, ner_loss_dev = model.test(loader_dev)  
    
    eval_metrics_crf(preds_dev, metrics, idner_dev[:test_lim], sents_dev_03[:test_lim], 
                     ner2idx, idx2ner, model_path, dev=True)
    
    val_loss_epoch = ner_loss_dev
    
    preds_test, ner_loss_test = model.test(loader_test)     
    
    eval_metrics_crf(preds_test, metrics, idner_test[:test_lim], sents_test_03[:test_lim], 
                         ner2idx, idx2ner, model_path)
    
    loss_epoch = np.mean(losses)
    
    print("Loss :  NER %s" % (loss_epoch))
    print("Dev loss : NER %s" % (val_loss_epoch))
    print("Test loss : NER %s" % (ner_loss_test))
    
    torch.save(model.state_dict(), model_path + "last_state_dict")
    if ner_loss_dev < best_ner: 
        print("New best score on dev.")
        print("Saving model...")
        torch.save(model.state_dict(), model_path + "best_state_dict")
        
        best_ner = ner_loss_dev
    
    metrics["ner"]["val_loss_dev"].append(val_loss_epoch)
    metrics["ner"]["val_loss_test"].append(ner_loss_test)    
    metrics["ner"]["loss"].append(loss_epoch)    

    # Save learning curve
    save_plot(metrics, model_path)
    with open(model_path + "metrics.p", "wb") as file:
        pickle.dump(metrics, file)

print("Done")

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 1/1000 :


100%|██████████| 703/703 [01:37<00:00,  7.21it/s]
163it [00:36, 11.49it/s]


ner : p 0.940446892457, r 0.943801944488, f 0.942121431506, acc 0.9374440247653908


0it [00:00, ?it/s]

ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42654     37     11     21      3     11     17      0      5      0      0   99.754
 1  B-LOC   1837    163   1625      7     26      2      0     10      2      2      0      0   88.459
 2  B-PER   1842    455     61   1302     12      9      1      2      0      0      0      0   70.684
 3  B-ORG   1341    377     86      8    856      1      4      9      0      0      0      0   63.833
 4  I-PER   1307    598     21     37      1    647      2      0      0      1      0      0   49.503
 5  I-ORG    751    343     24      3     79      2    288      1      8      3      0      0   38.349
 6 B-MISC    922    226     45      5     13      0      0    629      0      4      0      0   68.221
 7  I-LOC    257     43     34      0      0      2     25      0    148      5      0      0   57.588
 8 I-MISC    346    179      3      0      0      0      2     16      2 

173it [00:34, 11.47it/s]


ner : p 0.923208191126, r 0.924706289621, f 0.923956633122, acc 0.9204048670184128
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  38091     54     31     39      7     23     43      3     32      0      0   99.395
 1  B-LOC   1668    157   1423      8     62      2      3     11      1      1      0      0   85.312
 2  B-PER   1617    587     43    946     35      5      0      0      0      1      0      0   58.503
 3  B-ORG   1661    458    111     27   1021      5     12     26      0      1      0      0   61.469
 4  I-PER   1156    645     26     32      5    445      1      1      1      0      0      0   38.495
 5  I-ORG    835    307     28      3    171      8    286      4      8     20      0      0   34.251
 6 B-MISC    702    253     29      2     13      0      0    400      0      5      0      0   56.980
 7  I-LOC    257     53     24      0      3      1     48      0    127      1      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 2/1000 :


100%|██████████| 703/703 [01:39<00:00,  7.09it/s]
163it [00:35, 11.51it/s]


ner : p 0.944445529662, r 0.947722283205, f 0.946081069182, acc 0.941337954129512


0it [00:00, ?it/s]

ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42698     14     10      6      3     10     10      2      6      0      0   99.857
 1  B-LOC   1837    187   1603     12     21      0      1     10      1      2      0      0   87.262
 2  B-PER   1842    436     18   1368      8      8      1      3      0      0      0      0   74.267
 3  B-ORG   1341    471     44     26    776      2     11     11      0      0      0      0   57.867
 4  I-PER   1307    566      4     24      1    709      2      0      1      0      0      0   54.246
 5  I-ORG    751    335     16      2     33      1    354      0      6      4      0      0   47.137
 6 B-MISC    922    225     20      9      4      0      0    661      0      3      0      0   71.692
 7  I-LOC    257     43     12      0      0      0     19      0    180      3      0      0   70.039
 8 I-MISC    346    180      3      0      0      0      2     10      0 

173it [00:34, 11.28it/s]


ner : p 0.927471435668, r 0.929076786603, f 0.928273417065, acc 0.9247550339183805
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  38126     27     35     12      8     35     43      4     33      0      0   99.486
 1  B-LOC   1668    176   1394     17     57      2      4     15      2      1      0      0   83.573
 2  B-PER   1617    580     20    990     17      6      3      1      0      0      0      0   61.224
 3  B-ORG   1661    523     79     40    987      1     10     21      0      0      0      0   59.422
 4  I-PER   1156    643      4     19      2    482      4      0      2      0      0      0   41.696
 5  I-ORG    835    305     12      3     86     10    399      3      7     10      0      0   47.784
 6 B-MISC    702    255     16      8      7      0      0    413      0      3      0      0   58.832
 7  I-LOC    257     54     11      0      1      3     32      0    150      6      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 3/1000 :


100%|██████████| 703/703 [01:38<00:00,  7.12it/s]
163it [00:36,  4.48it/s]


ner : p 0.94983375709, r 0.95195624902, f 0.950893818652, acc 0.9455433978427631


0it [00:00, ?it/s]

ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42691     11     11     11      4      9     11      2      9      0      0   99.841
 1  B-LOC   1837    183   1603      6     21      2      0     13      5      4      0      0   87.262
 2  B-PER   1842    436     22   1364     10      3      0      5      2      0      0      0   74.050
 3  B-ORG   1341    406     42     15    857      2      8     11      0      0      0      0   63.908
 4  I-PER   1307    501      1     17      1    780      3      0      2      2      0      0   59.679
 5  I-ORG    751    283     15      2     38      6    393      0      7      7      0      0   52.330
 6 B-MISC    922    194     18      7      4      0      0    695      0      4      0      0   75.380
 7  I-LOC    257     34      9      0      0      4     17      0    182     11      0      0   70.817
 8 I-MISC    346    135      3      0      0      0      1     12      0 

173it [00:35, 11.42it/s]


ner : p 0.931713013017, r 0.932257296783, f 0.931985075434, acc 0.9279207494346936
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  38117     19     22     29     12     23     42      7     52      0      0   99.462
 1  B-LOC   1668    172   1389      9     69      1      3     22      0      3      0      0   83.273
 2  B-PER   1617    589     15    979     24      7      0      2      0      1      0      0   60.544
 3  B-ORG   1661    457     93     30   1033      0     15     32      0      1      0      0   62.191
 4  I-PER   1156    569      3     23      3    550      6      0      1      1      0      0   47.578
 5  I-ORG    835    268     10      3     98      9    414      2     13     18      0      0   49.581
 6 B-MISC    702    225     12      5     10      0      0    446      0      4      0      0   63.533
 7  I-LOC    257     51      4      0      0      3     35      0    160      4      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 4/1000 :


100%|██████████| 703/703 [01:37<00:00,  7.22it/s]
163it [00:36,  4.53it/s]


ner : p 0.93494409258, r 0.937509800847, f 0.936225188897, acc 0.931194268135976


0it [00:00, ?it/s]

ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42719      3      5      3      3      8      5      8      5      0      0   99.906
 1  B-LOC   1837    370   1368     42     23      1      1     21      7      4      0      0   74.469
 2  B-PER   1842    593      6   1229      8      5      0      1      0      0      0      0   66.721
 3  B-ORG   1341    582      8     29    707      1      5      9      0      0      0      0   52.722
 4  I-PER   1307    590      0      7      1    707      1      0      0      1      0      0   54.093
 5  I-ORG    751    360      6      3     10      3    358      0      5      6      0      0   47.670
 6 B-MISC    922    331      1      9      2      0      0    573      0      6      0      0   62.148
 7  I-LOC    257     59      2      0      0      2     11      0    167     16      0      0   64.981
 8 I-MISC    346    169      1      1      0      0      1      6      0 

173it [00:34, 11.32it/s]


ner : p 0.919106367349, r 0.920379064887, f 0.919742275843, acc 0.9160977710778507
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  38177     12     12      6      5     30     29      7     45      0      0   99.619
 1  B-LOC   1668    346   1204     29     56      1      4     23      4      1      0      0   72.182
 2  B-PER   1617    710      4    879     13      7      1      2      0      1      0      0   54.360
 3  B-ORG   1661    649     30     47    909      1      9     14      1      1      0      0   54.726
 4  I-PER   1156    639      0     15      0    498      3      0      0      1      0      0   43.080
 5  I-ORG    835    378      7      6     42     10    373      1     11      7      0      0   44.671
 6 B-MISC    702    329      3      4      3      0      1    356      0      6      0      0   50.712
 7  I-LOC    257     78      1      0      0      3     28      0    143      4      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 5/1000 :


100%|██████████| 703/703 [01:39<00:00,  7.04it/s]
163it [00:35, 11.56it/s]


ner : p 0.955064752142, r 0.956974282578, f 0.956018563847, acc 0.9505276274288385


0it [00:00, ?it/s]

ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42656     17     12     20      4     12     24      6      8      0      0   99.759
 1  B-LOC   1837    153   1619     10     31      2      2     14      4      2      0      0   88.133
 2  B-PER   1842    404     15   1399     17      4      0      3      0      0      0      0   75.950
 3  B-ORG   1341    301     36     32    945      0     11     16      0      0      0      0   70.470
 4  I-PER   1307    451      2     22      1    824      5      0      1      1      0      0   63.045
 5  I-ORG    751    226     11      2     32      3    460      1      5     11      0      0   61.252
 6 B-MISC    922    152      8      9     10      0      0    740      0      3      0      0   80.260
 7  I-LOC    257     30      8      0      0      2     31      0    178      8      0      0   69.261
 8 I-MISC    346    113      1      1      0      2      4     14      0 

173it [00:35, 11.60it/s]


ner : p 0.936359311478, r 0.936865791125, f 0.936612482831, acc 0.9325078066113923
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  38083     27     24     41      3     41     51      9     44      0      0   99.374
 1  B-LOC   1668    139   1398     18     79      2      3     26      1      2      0      0   83.813
 2  B-PER   1617    556     13    999     39      6      1      1      0      2      0      0   61.781
 3  B-ORG   1661    404     71     34   1106      3     13     30      0      0      0      0   66.586
 4  I-PER   1156    536      2     17      4    587      7      1      1      1      0      0   50.779
 5  I-ORG    835    215     10      4     81      6    492      3      9     15      0      0   58.922
 6 B-MISC    702    182      8      7     14      0      2    484      0      5      0      0   68.946
 7  I-LOC    257     41      7      0      1      2     44      0    152     10      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 6/1000 :


100%|██████████| 703/703 [01:37<00:00,  7.24it/s]
163it [00:34,  4.73it/s]


ner : p 0.954450292512, r 0.956190214835, f 0.955319461444, acc 0.9497488415560141


0it [00:00, ?it/s]

ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42644     29      8      9      5     17     17     14     16      0      0   99.731
 1  B-LOC   1837    150   1634     15     17      1      1     10      5      4      0      0   88.949
 2  B-PER   1842    422     24   1380      7      5      0      3      1      0      0      0   74.919
 3  B-ORG   1341    341     56     40    875      1     12     15      0      1      0      0   65.250
 4  I-PER   1307    392      3     16      0    887      3      0      3      3      0      0   67.865
 5  I-ORG    751    211     11      4     23      5    468      0     10     19      0      0   62.317
 6 B-MISC    922    176     18     10      6      0      1    707      0      4      0      0   76.681
 7  I-LOC    257     27     11      0      0      6     23      0    186      4      0      0   72.374
 8 I-MISC    346    121      3      0      0      2      7     11      0 

173it [00:33,  5.15it/s]


ner : p 0.936646983756, r 0.936930699496, f 0.936788820145, acc 0.9325724130505006
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  38084     38     22     19     11     32     41     33     43      0      0   99.376
 1  B-LOC   1668    138   1413     20     64      0      4     24      3      2      0      0   84.712
 2  B-PER   1617    554     24   1000     21      9      3      3      1      2      0      0   61.843
 3  B-ORG   1661    419    124     43   1023      1     21     30      0      0      0      0   61.589
 4  I-PER   1156    465      3     21      1    651      9      0      3      3      0      0   56.315
 5  I-ORG    835    185     11      3     71     15    499      3     25     23      0      0   59.760
 6 B-MISC    702    195     17      4      7      0      1    470      0      8      0      0   66.952
 7  I-LOC    257     39      7      0      2      5     35      0    164      5      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 7/1000 :


100%|██████████| 703/703 [01:34<00:00,  7.46it/s]
163it [00:35, 10.94it/s]


ner : p 0.95962009204, r 0.960541790811, f 0.960080720212, acc 0.9540711031501888


0it [00:00, ?it/s]

ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42576     25     14     15      5     46     40     10     28      0      0   99.572
 1  B-LOC   1837    131   1636     15     23      4      5     16      4      3      0      0   89.058
 2  B-PER   1842    349     24   1435     21      5      2      5      0      1      0      0   77.904
 3  B-ORG   1341    255     63     40    927      1     24     29      0      2      0      0   69.128
 4  I-PER   1307    359      1     10      1    927      7      0      0      2      0      0   70.926
 5  I-ORG    751    126      8      6     15      4    558      1      8     25      0      0   74.301
 6 B-MISC    922    130     13      8     13      0      1    753      0      4      0      0   81.670
 7  I-LOC    257     21      5      0      0      5     29      0    191      6      0      0   74.319
 8 I-MISC    346     90      2      1      1      2      7     16      1 

173it [00:35, 11.38it/s]


ner : p 0.941203212608, r 0.940673748891, f 0.940938406268, acc 0.9362980510390869
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  37999     39     31     23     11     71     60     29     60      0      0   99.155
 1  B-LOC   1668    127   1403     17     71      2      4     39      2      3      0      0   84.113
 2  B-PER   1617    473     30   1063     33     11      3      2      0      2      0      0   65.739
 3  B-ORG   1661    361    108     36   1073      3     26     51      2      1      0      0   64.600
 4  I-PER   1156    431      3     20      2    690      8      0      0      2      0      0   59.689
 5  I-ORG    835    128      8      1     55     11    587      2     15     28      0      0   70.299
 6 B-MISC    702    155     19      2     11      0      5    504      0      6      0      0   71.795
 7  I-LOC    257     29      9      0      0      4     45      0    158     12      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 8/1000 :


100%|██████████| 703/703 [01:37<00:00,  7.20it/s]
163it [00:35, 11.38it/s]


ner : p 0.956207666105, r 0.957444723224, f 0.956825794825, acc 0.950994898952533


0it [00:00, ?it/s]

ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42661     16      8     18      3     14     19      9     11      0      0   99.771
 1  B-LOC   1837    175   1580     16     35      2      1     21      3      4      0      0   86.010
 2  B-PER   1842    463     13   1320     38      3      0      4      0      1      0      0   71.661
 3  B-ORG   1341    300     19     18    968      0     13     22      0      1      0      0   72.185
 4  I-PER   1307    383      0     11      3    905      2      0      0      3      0      0   69.243
 5  I-ORG    751    191     13      3     25      3    490      1      5     20      0      0   65.246
 6 B-MISC    922    161      5      6     11      0      0    734      0      5      0      0   79.610
 7  I-LOC    257     33      5      0      0      2     18      0    187     12      0      0   72.763
 8 I-MISC    346     97      2      0      1      2      6     15      0 

173it [00:34, 11.64it/s]


ner : p 0.936734605565, r 0.936714338259, f 0.936724471802, acc 0.9323570582534726
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  38082     17     16     41      4     48     54     14     47      0      0   99.371
 1  B-LOC   1668    176   1345     12     80      1      6     47      0      1      0      0   80.635
 2  B-PER   1617    586     10    946     63      9      1      0      0      2      0      0   58.503
 3  B-ORG   1661    409     52     27   1119      0     17     37      0      0      0      0   67.369
 4  I-PER   1156    459      1      9      3    669     12      0      1      2      0      0   57.872
 5  I-ORG    835    197     11      1     86      5    495      5     14     21      0      0   59.281
 6 B-MISC    702    193      9      1     12      0      1    479      0      7      0      0   68.234
 7  I-LOC    257     45      7      0      1      3     34      0    159      8      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 9/1000 :


100%|██████████| 703/703 [01:39<00:00,  7.09it/s]
163it [00:35, 11.35it/s]


ner : p 0.950475854776, r 0.951407401599, f 0.950941400051, acc 0.9449982477317862


0it [00:00, ?it/s]

ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  42759  42655     13      8     11      3     14     26     13     16      0      0   99.757
 1  B-LOC   1837    212   1550     17     27      0      2     18      5      6      0      0   84.377
 2  B-PER   1842    538      9   1273     17      2      0      1      0      2      0      0   69.110
 3  B-ORG   1341    388     14     13    894      0      9     23      0      0      0      0   66.667
 4  I-PER   1307    516      0     12      1    766      3      0      3      6      0      0   58.607
 5  I-ORG    751    228      5      3     16      1    462      2      7     27      0      0   61.518
 6 B-MISC    922    160      5      6      7      0      0    740      0      4      0      0   80.260
 7  I-LOC    257     27      2      0      0      5     16      0    197     10      0      0   76.654
 8 I-MISC    346     97      1      1      0      1      5     15      1 

173it [00:34, 11.49it/s]


ner : p 0.932415167067, r 0.931608213073, f 0.932011515401, acc 0.9272746850436093
ID     NE  Total      O  B-LOC  B-PER  B-ORG  I-PER  I-ORG B-MISC  I-LOC I-MISC<START> <STOP>  Percent
 0      O  38323  38109     22     18     14      4     37     54     17     48      0      0   99.442
 1  B-LOC   1668    228   1317     16     62      2      3     35      2      3      0      0   78.957
 2  B-PER   1617    664      7    910     26      6      0      1      0      3      0      0   56.277
 3  B-ORG   1661    509     44     27   1017      0     21     43      0      0      0      0   61.228
 4  I-PER   1156    546      0     12      1    582      9      0      2      4      0      0   50.346
 5  I-ORG    835    241      7      2     46      7    468      4     21     39      0      0   56.048
 6 B-MISC    702    186      7      2      6      0      1    485      0     15      0      0   69.088
 7  I-LOC    257     45      4      0      0      1     30      0    170      7      0      0

  0%|          | 0/703 [00:00<?, ?it/s]

Epoch 10/1000 :


 60%|██████    | 424/703 [01:14<00:26, 10.42it/s]