In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

## Parameters

In [2]:
class Config:
    # Embeddings
    word_embedding_dim = 128
    char_embedding_dim = 128
    pos_embedding_dim = 50
    
    # RNN
    hidden_size_word = 128
    hidden_size_char = 128
    l2_reg_lambda = 1e-5
    
    # Training parameters
    batch_size = 20
    num_epochs = 10
    display_every = 500
    evaluate_every = 1000
    num_checkpoints = 5
    
    learning_rate = 1.0 
    decay_rate = 0.9
    
    # Testing parameters
    checkpoint_dir = ''
    
    UNK = "$UNK$"
    NUM = "$NUM$"
    NONE = "O"
    PAD = '$PAD$'

## Dataset 

ACE2005 dataset

In [3]:
import nltk
import re
import os

class Dataset:
    def __init__(self):
        self.all_tags, self.all_words, self.all_chars = [], [], []
        self.dir_path = './data/CoNLL2005'
        
    def processing_word(self, word):
        word = word.lower()
        if word.isdigit():
            word = Config.NUM
        return word
        
    def load_dataset(self, name):
        pred_idx_col, words_col, tags_col = [], [], []
        
        path = os.path.join(self.dir_path, 'conll05.{}.txt'.format(name))
        
        with open(path) as f:
            for line in f:
                line = line.strip()
                line = line.split('|||')
                
                words = line[0].split()
                tags = line[1].split()
                
                pred_idx = int(words[0])
                words = words[1:]
                
                for word in words:
                    self.all_words.append(word)
                    self.all_chars.extend(list(word))
                    
                for tag in tags:
                    self.all_tags.append(tag)
                
                tags_col.append(' '.join(tags))
                words_col.append(' '.join(words))
                pred_idx_col.append(pred_idx)
                
        return pd.DataFrame({'pred_idx': pred_idx_col, 'words': words_col, 'tags': tags_col})
        
    def load_datasets(self):
        train_df = self.load_dataset('train')
        dev_df =  self.load_dataset('devel')
        test_wsj_df = self.load_dataset('test.wsj')
        
        return train_df, dev_df, test_wsj_df

dataset = Dataset()
train_df, dev_df, test_wsj_df = dataset.load_datasets()

print('[Data Size]')
print('train : ', len(train_df))
print('dev : ', len(dev_df))
print('test.wsj : ', len(test_wsj_df))

train_df.head(10)

[Data Size]
train :  90750
dev :  3248
test.wsj :  5267


Unnamed: 0,pred_idx,tags,words
0,17,O O O O O O O O O O O O O O O O O B-V B-A1 O O...,In an Oct. 19 review of `` The Misanthrope '' ...
1,19,O O O O O O O O O O O O O O O O O B-A0 I-A0 B-...,In an Oct. 19 review of `` The Misanthrope '' ...
2,37,O O O O O O O O O O O O O O O O O O O O O O O ...,In an Oct. 19 review of `` The Misanthrope '' ...
3,44,B-AM-LOC I-AM-LOC I-AM-LOC I-AM-LOC I-AM-LOC I...,In an Oct. 19 review of `` The Misanthrope '' ...
4,2,B-A0 I-A0 B-V B-A1 O,Ms. Haag plays Elianti .
5,4,B-A0 I-A0 I-A0 I-A0 B-V B-A1 I-A1 I-A1 I-A1 I-...,Rolls-Royce Motor Cars Inc. said it expects it...
6,6,O O O O O B-A0 B-V B-A1 I-A1 I-A1 I-A1 I-A1 I-...,Rolls-Royce Motor Cars Inc. said it expects it...
7,11,O O O O O O O B-A1 I-A1 I-A1 O B-V B-A3 B-AM-M...,Rolls-Royce Motor Cars Inc. said it expects it...
8,6,B-A0 I-A0 I-A0 I-A0 B-AM-TMP I-AM-TMP B-V B-A1...,"The luxury auto maker last year sold 1,214 car..."
9,9,B-A0 I-A0 I-A0 I-A0 I-A0 I-A0 I-A0 I-A0 I-A0 B...,"Howard Mosher , president and chief executive ..."


In [4]:
word_list = list(set(dataset.all_words)) + [Config.PAD, Config.UNK]
word2idx = {w: i for i, w in enumerate(word_list)}
idx2word = {i: w for i, w in enumerate(word_list)}

tag_list = list(set(dataset.all_tags))
tag2idx = {w: i for i, w in enumerate(tag_list)}
idx2tag = {i: w for i, w in enumerate(tag_list)}

char_list = list(set(dataset.all_chars)) + [Config.PAD, Config.UNK]
char2idx = {w: i for i, w in enumerate(char_list)}
idx2char = {i: w for i, w in enumerate(char_list)}

## Model

In [5]:
class Model:
    def __init__(self, 
               num_classes, 
               vocab_size, 
               char_size,
               word_embedding_dim, 
               char_embedding_dim,
               pos_embedding_dim,
               hidden_size_word,
               hidden_size_char):
        
        self.word_ids = tf.placeholder(tf.int32, shape=[None, None], name='word_ids') 
        self.pos = tf.placeholder(tf.int32, shape=[None, None], name='pos')
        self.sequence_lengths = tf.placeholder(tf.int32, shape=[None], name="sequence_lengths")
        
        self.labels = tf.placeholder(tf.int32, shape=[None, None], name='labels')
        
        self.char_ids = tf.placeholder(tf.int32, shape=[None, None, None], name='char_ids') # [batch_size, max_sequence_length, max_word_length]
        self.word_lengths = tf.placeholder(tf.int32, shape=[None, None], name="word_lengths") # [batch_size, max_sequence_length]
        
        self.dropout = tf.placeholder(dtype=tf.float32, shape=[],name="dropout")
        
        initializer = tf.contrib.layers.xavier_initializer()
        
        # Word Embedding layer
        with tf.device('/cpu:0'), tf.variable_scope('word-embedding'):
            self._word_embeddings = tf.Variable(tf.random_uniform([vocab_size, word_embedding_dim], -0.25, 0.25), name='_word_embeddings')
            self.word_embeddings = tf.nn.embedding_lookup(self._word_embeddings, self.word_ids) # [batch_size, max_sequence_length, word_embedding_dim]
        
        with tf.variable_scope('position-embedding'):
            self._pos_embeddings = tf.get_variable('_pos_embeddings', [vocab_size, pos_embedding_dim], initializer=tf.keras.initializers.glorot_normal())
            self.pos_embeddings = tf.nn.embedding_lookup(self._pos_embeddings, self.pos)
            
        # Char Embedding Layer
        with tf.variable_scope('char-embedding'):
            self._char_embeddings = tf.get_variable(dtype=tf.float32, shape=[char_size, char_embedding_dim], name='_char_embeddings')
            
            # [batch_size, max_sequence_length, max_word_length, char_embedding_dim]
            self.char_embeddings = tf.nn.embedding_lookup(self._char_embeddings, self.char_ids) 
            
            s = tf.shape(self.char_embeddings)
            
            # [batch_size*max_sequence_length, max_word_length, char_embedding_dim]
            char_embeddings = tf.reshape(self.char_embeddings, shape=[s[0]*s[1], s[2], char_embedding_dim])
            word_lengths = tf.reshape(self.word_lengths, shape=[-1])
            
            fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size_char, state_is_tuple=True)
            bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size_char, state_is_tuple=True)
            
            _, ((_, output_fw), (_, output_bw)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=fw_cell, 
                                                                                   cell_bw=bw_cell, 
                                                                                   inputs=char_embeddings,
                                                                                   sequence_length=word_lengths,
                                                                                   dtype=tf.float32)
            # shape: [batch_size*max_sequnce_length, 2*hidden_size_char]
            output = tf.concat([output_fw, output_bw], axis=-1)
            output = tf.reshape(output, shape=[s[0], s[1], 2*hidden_size_char])
            
            # shape: # [batch_size, max_sequence_length, word_embedding_dim + 2*hidden_size_char + pos_embedding_dim]
            self.word_embeddings = tf.concat([self.word_embeddings, output, self.pos_embeddings], axis=-1) 
            # self.word_embeddings = tf.nn.dropout(self.word_embeddings, self.dropout)
            
        # Bidirectional LSTM
        with tf.variable_scope("bi-lstm"):
            fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size_word)
            bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size_word)
            (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=fw_cell,
                                                                  cell_bw=bw_cell,
                                                                  inputs=self.word_embeddings,
                                                                  sequence_length= self.sequence_lengths, # [batch_size],
                                                                  dtype=tf.float32)
            
            self.rnn_outputs = tf.concat([output_fw, output_bw], axis=-1)  # [batch_size, max_sequence_length, 2*hidden_size_word]
            self.rnn_outputs = tf.nn.dropout(self.rnn_outputs, self.dropout)
        
        
        # Fully connected layer
        with tf.variable_scope('output'):
            self.W_output = tf.get_variable('W_output', shape=[2*hidden_size_word, num_classes],  dtype=tf.float32)
            self.b_output = tf.get_variable('b_output', shape=[num_classes], dtype=tf.float32, initializer=tf.zeros_initializer())
            
            nsteps = tf.shape(self.rnn_outputs)[1]
            rnn_outputs_flat = tf.reshape(self.rnn_outputs, [-1, 2*hidden_size_word])
            pred = tf.matmul(rnn_outputs_flat, self.W_output) + self.b_output
            
            self.logits = tf.reshape(pred, [-1, nsteps, num_classes]) # [batch_size, max_sequence_length, num_classes]
    
        # Calculate mean corss-entropy loss
        with tf.variable_scope('loss'):
            log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood(self.logits, self.labels, self.sequence_lengths)
            self.trans_params = trans_params  # need to evaluate it for decoding
            
            self.l2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
            self.loss = tf.reduce_mean(-log_likelihood) + Config.l2_reg_lambda * self.l2
            
#             When CRF is not in use
#             self.losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.labels)
#             mask = tf.sequence_mask(self.sequence_lengths)
#             losses = tf.boolean_mask(self.losses, mask)
#             self.loss = tf.reduce_mean(losses) 
        
    # Length of the sequence data
    @staticmethod
    def _length(seq):
        relevant = tf.sign(tf.abs(seq))
        length = tf.reduce_sum(relevant, reduction_indices=1)
        length = tf.cast(length, tf.int32)
        return length
    
    @staticmethod
    def viterbi_decode(logits, trans_params):
        # get tag scores and transition params of CRF
        viterbi_sequences = []

        # iterate over the sentences because no batching in vitervi_decode
        for logit, sequence_length in zip(logits, sequence_lengths):
            logit = logit[:sequence_length]  # keep only the valid steps
            viterbi_seq, viterbi_score = tf.contrib.crf.viterbi_decode(
                logit, trans_params)
            viterbi_sequences += [viterbi_seq]

        return np.array(viterbi_sequences)

## Function for train

In [6]:
# Pre-trained glove
def load_glove(word_embedding_dim, word2idx):
    download_path = tf.keras.utils.get_file(
      fname="glove.6B.zip", 
      origin="http://nlp.stanford.edu/data/glove.6B.zip", 
      extract=True)
    
    embedding_path = os.path.join(os.path.dirname(download_path), 'glove.6B.300d.txt')
    print('embedding_path :', embedding_path)

    # initial matrix with random uniform
    initW = np.random.randn(len(word2idx), word_embedding_dim).astype(np.float32) / np.sqrt(len(word2idx))
    # load any vectors from the glove
    print("Load glove file {0}".format(embedding_path))
    f = open(embedding_path, 'r', encoding='utf8')
    for line in f:
        splitLine = line.split(' ')
        word = splitLine[0]
        embedding = np.asarray(splitLine[1:], dtype='float32')
        if word in word2idx:
            initW[word2idx[word]] = embedding
    return initW

def batch_iter(df, batch_size, num_epochs, shuffle=True, tqdm_disable=False):
    """
    Generates a batch iterator for a dataset.
    """
    data_size = len(df)
    num_batches_per_epoch = int((data_size - 1) / batch_size) + 1
    for epoch in tqdm(range(num_epochs), disable=tqdm_disable):
        # Shuffle the data at each epoch
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_df= df.iloc[shuffle_indices]
        else:
            shuffled_df = df
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            yield shuffled_df.iloc[start_index:end_index]    
            

def get_feed_dict(batch_df):
    max_length = max(map(lambda x : len(x.split(' ')), batch_df['words'].tolist()))
    
    max_length_word = 0
    for seq in batch_df['words'].tolist():
        for word in seq.split(' '):
            max_length_word = max(max_length_word, len(word))
    
    word_ids, sequence_lengths, labels, char_ids, word_lengths, pos = [], [], [], [], [], []
    for index, row in batch_df.iterrows():
        sentence = row['words'].split(' ')
        tags = row['tags'].split(' ')
        pred_idx = row['pred_idx']

        word_ids_row, labels_row, char_ids_row, word_lengths_row, pos_row = [], [], [], [], []
        for word in sentence:
            word_ids_row.append(word2idx[word])
        
            char_ids_row.append([char2idx[char] for char in word] + [char2idx[Config.PAD]]* (max_length_word - len(word)) )
            word_lengths_row.append(len(word))
            
        
        empty_char_ids = [char2idx[Config.PAD]]* max_length_word
        char_ids_row += [empty_char_ids] * (max_length - len(char_ids_row))
        word_lengths_row += [0] * (max_length - len(word_lengths_row))
        
        for tag in tags:
            labels_row.append(tag2idx[tag])

        if len(sentence) < max_length:
            word_ids_row += [word2idx[Config.PAD]]* (max_length - len(sentence))
            labels_row += [tag2idx[Config.NONE]]* (max_length - len(sentence))
            
        for i in range(max_length):
            pos_row.append(i - pred_idx)

        word_ids.append(word_ids_row)
        labels.append(labels_row)
        sequence_lengths.append(len(sentence))
        char_ids.append(char_ids_row)
        word_lengths.append(word_lengths_row)
        pos.append(pos_row)
    
    word_ids = np.array(word_ids)
    labels = np.array(labels)
    sequence_lengths = np.array(sequence_lengths)
    char_ids = np.array(char_ids)
    word_lengths = np.array(word_lengths)
    pos = np.array(pos)
    
    return word_ids, labels, sequence_lengths, char_ids, word_lengths, pos

def evaluation(y, preds, lengths):
    from sklearn.metrics import classification_report
    arg_answers, arg_preds = [], []
    
    accs = []
    correct_preds, total_correct, total_preds = 0.0, 0.0, 0.0
    for i in range(len(y)):
        sent_answers,sent_preds = [], []
        sent_answer_chunks, sent_pred_chunks = [], []
        
        for j in range(lengths[i]):
            sent_answers.append(idx2tag[y[i][j]])
            sent_preds.append(idx2tag[preds[i][j]])
            
            if idx2tag[y[i][j]] != Config.NONE:
                sent_answer_chunks.append(idx2tag[y[i][j]] + '-' + str(j))
            if idx2tag[preds[i][j]] != Config.NONE:
                sent_pred_chunks.append(idx2tag[preds[i][j]] + '-' + str(j))
    
        arg_answers.extend(sent_answers)
        arg_preds.extend(sent_preds)
        
        accs += [a == b for (a, b) in zip(sent_answers, sent_preds)]
        
        sent_answer_chunks = set(sent_answer_chunks)
        sent_pred_chunks = set(sent_pred_chunks)

        correct_preds += len(sent_answer_chunks & sent_pred_chunks)
        total_preds += len(sent_pred_chunks)
        total_correct += len(sent_answer_chunks)
    
    p = correct_preds / total_preds if correct_preds > 0 else 0
    r = correct_preds / total_correct if correct_preds > 0 else 0
    f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
    acc = np.mean(accs)
        
    print(classification_report(arg_answers, arg_preds))
    
    print('Tag based evaluation: acc: {}, f1: {}'.format(acc, f1))

## Train

In [7]:
import datetime
import time

from sklearn.metrics import f1_score
import warnings
import sklearn.exceptions
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)

tf.reset_default_graph()
sess = tf.Session()
with sess.as_default():
    model = Model(
        num_classes=len(tag_list),
        vocab_size=len(word_list),
        char_size=len(char_list),
        word_embedding_dim=Config.word_embedding_dim,
        char_embedding_dim=Config.char_embedding_dim,
        pos_embedding_dim=Config.pos_embedding_dim,
        hidden_size_word=Config.hidden_size_word,
        hidden_size_char=Config.hidden_size_char
    )
    
    global_step = tf.Variable(0, name='global_step', trainable=False)
    # train_op = tf.train.AdamOptimizer(Config.learning_rate).minimize(model.loss, global_step=global_step)
    
    optimizer = tf.train.AdadeltaOptimizer(Config.learning_rate, Config.decay_rate, 1e-6)
    gvs = optimizer.compute_gradients(model.loss)
    capped_gvs = [(tf.clip_by_value(grad, -1.0, 1.0), var) for grad, var in gvs]
    train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step)
    
    # Output directory for models and summary
    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "34.runs", timestamp))
    print("Writing to {}\n".format(out_dir))

    sess.run(tf.global_variables_initializer())

#     pretrain_W = load_glove(Config.word_embedding_dim, word2idx)
#     sess.run(model._word_embeddings.assign(pretrain_W))
#     print("Success to load pre-trained glove model!\n")
    
    # Generate batches
    batches = batch_iter(train_df, Config.batch_size, Config.num_epochs)
    for batch_df in batches:
        word_ids, labels, sequence_lengths, char_ids, word_lengths, pos = get_feed_dict(batch_df)
        feed_dict = {
            model.word_ids: word_ids,
            model.labels: labels,
            model.sequence_lengths: sequence_lengths,
            model.char_ids: char_ids,
            model.word_lengths: word_lengths,
            model.dropout: 0.5,
            model.pos: pos
        }
        _, step, loss, logits, trans_params = sess.run([
            train_op, global_step, model.loss, model.logits, model.trans_params], feed_dict)
        
        predictions = model.viterbi_decode(logits, trans_params)
        
        # Training log display
        if step % Config.display_every == 0:
            time_str = datetime.datetime.now().isoformat()
            print("Train Evaluation {}: step {}, loss {:g}".format(time_str, step, loss))
#             evaluation(labels, predictions, sequence_lengths)
            
            
        # Evaluation
        if step % Config.evaluate_every == 0:
            batches = batch_iter(dev_df, Config.batch_size, 1, tqdm_disable=True)
            
            total_loss, predictions_all, labels_all, sequence_lengths_all  = 0, [], [], []
            for batch_df in batches:
                word_ids, labels, sequence_lengths, char_ids, word_lengths, pos = get_feed_dict(batch_df)
                feed_dict = {
                    model.word_ids: word_ids,
                    model.labels: labels,
                    model.sequence_lengths: sequence_lengths,
                    model.char_ids: char_ids,
                    model.word_lengths: word_lengths,
                    model.dropout: 1.0,
                    model.pos: pos
                }
                loss, logits, trans_params = sess.run([model.loss, model.logits, model.trans_params], feed_dict)
                predictions = model.viterbi_decode(logits, trans_params)
                
                total_loss += loss
                predictions_all += predictions.tolist()
                labels_all += labels.tolist()
                sequence_lengths_all += sequence_lengths.tolist()
        
            time_str = datetime.datetime.now().isoformat()
            print("\nDev Evaluation\n{}: loss {:g}\n".format(time_str, total_loss/len(predictions_all)))
            evaluation(labels_all, predictions_all, sequence_lengths_all)


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Writing to /home/seungwon/project/tf-notes/34.runs/1559299841



  0%|          | 0/10 [00:00<?, ?it/s]

Train Evaluation 2019-05-31T19:51:43.112539: step 500, loss 10.9968
Train Evaluation 2019-05-31T19:52:41.776053: step 1000, loss 6.32168

Dev Evaluation
2019-05-31T19:52:52.744927: loss 0.430784

              precision    recall  f1-score   support

        B-A0       0.78      0.66      0.72      2081
        B-A1       0.74      0.77      0.75      2994
        B-A2       0.45      0.22      0.29       673
        B-A3       1.00      0.03      0.05       114
        B-A4       0.69      0.14      0.23        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.55      0.15      0.24       279
    B-AM-CAU       0.00      0.00      0.00        45
    B-AM-DIR       0.00      0.00      0.00        36
    B-AM-DIS       0.69      0.43      0.53       202
    B-AM-EXT       1.00      0.04      0.07        28
    B-AM-LOC       0.50      0.34      0.40       194
    B-AM-MNR       0.55      0.19      0.29    

Train Evaluation 2019-05-31T19:58:16.412778: step 3500, loss 4.37075
Train Evaluation 2019-05-31T19:59:16.865511: step 4000, loss 4.39849

Dev Evaluation
2019-05-31T19:59:27.856915: loss 0.248154

              precision    recall  f1-score   support

        B-A0       0.81      0.80      0.80      2081
        B-A1       0.82      0.85      0.84      2994
        B-A2       0.66      0.37      0.48       673
        B-A3       0.72      0.18      0.29       114
        B-A4       0.69      0.52      0.60        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.51      0.48      0.49       279
    B-AM-CAU       0.74      0.44      0.56        45
    B-AM-DIR       0.00      0.00      0.00        36
    B-AM-DIS       0.78      0.59      0.68       202
    B-AM-EXT       0.82      0.32      0.46        28
    B-AM-LOC       0.56      0.47      0.51       194
    B-AM-MNR       0.51      0.38      0.43   

 10%|█         | 1/10 [09:50<1:28:30, 590.09s/it]

Train Evaluation 2019-05-31T20:01:28.238145: step 5000, loss 6.63184

Dev Evaluation
2019-05-31T20:01:39.225756: loss 0.236946

              precision    recall  f1-score   support

        B-A0       0.86      0.79      0.82      2081
        B-A1       0.81      0.87      0.84      2994
        B-A2       0.59      0.47      0.53       673
        B-A3       0.66      0.25      0.37       114
        B-A4       0.49      0.60      0.54        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.62      0.41      0.50       279
    B-AM-CAU       0.81      0.47      0.59        45
    B-AM-DIR       0.45      0.14      0.21        36
    B-AM-DIS       0.75      0.64      0.69       202
    B-AM-EXT       0.82      0.32      0.46        28
    B-AM-LOC       0.52      0.52      0.52       194
    B-AM-MNR       0.59      0.38      0.46       242
    B-AM-MOD       0.97      0.89      0.93       317
    B-A

Train Evaluation 2019-05-31T20:07:03.591718: step 7500, loss 6.32982
Train Evaluation 2019-05-31T20:08:04.055783: step 8000, loss 3.40295

Dev Evaluation
2019-05-31T20:08:15.035197: loss 0.217106

              precision    recall  f1-score   support

        B-A0       0.87      0.82      0.84      2081
        B-A1       0.85      0.86      0.85      2994
        B-A2       0.68      0.51      0.58       673
        B-A3       0.73      0.32      0.45       114
        B-A4       0.89      0.38      0.54        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.47      0.54      0.51       279
    B-AM-CAU       0.91      0.47      0.62        45
    B-AM-DIR       0.44      0.11      0.18        36
    B-AM-DIS       0.76      0.69      0.73       202
    B-AM-EXT       0.82      0.32      0.46        28
    B-AM-LOC       0.60      0.46      0.52       194
    B-AM-MNR       0.48      0.48      0.48   

 20%|██        | 2/10 [19:54<1:19:14, 594.33s/it]

Train Evaluation 2019-05-31T20:11:27.023517: step 9500, loss 3.79931
Train Evaluation 2019-05-31T20:12:27.545326: step 10000, loss 3.99216

Dev Evaluation
2019-05-31T20:12:38.527297: loss 0.211695

              precision    recall  f1-score   support

        B-A0       0.89      0.82      0.85      2081
        B-A1       0.85      0.88      0.87      2994
        B-A2       0.68      0.51      0.58       673
        B-A3       0.68      0.41      0.51       114
        B-A4       0.69      0.62      0.65        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.53      0.52      0.52       279
    B-AM-CAU       0.93      0.58      0.71        45
    B-AM-DIR       0.57      0.22      0.32        36
    B-AM-DIS       0.81      0.70      0.75       202
    B-AM-EXT       0.90      0.32      0.47        28
    B-AM-LOC       0.61      0.53      0.57       194
    B-AM-MNR       0.61      0.41      0.49  

Train Evaluation 2019-05-31T20:18:02.676335: step 12500, loss 2.86902
Train Evaluation 2019-05-31T20:19:02.883634: step 13000, loss 3.89338

Dev Evaluation
2019-05-31T20:19:13.937601: loss 0.201354

              precision    recall  f1-score   support

        B-A0       0.87      0.84      0.86      2081
        B-A1       0.86      0.89      0.88      2994
        B-A2       0.69      0.59      0.64       673
        B-A3       0.75      0.35      0.48       114
        B-A4       0.70      0.57      0.63        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.61      0.48      0.54       279
    B-AM-CAU       0.89      0.56      0.68        45
    B-AM-DIR       0.43      0.08      0.14        36
    B-AM-DIS       0.78      0.72      0.75       202
    B-AM-EXT       0.90      0.32      0.47        28
    B-AM-LOC       0.64      0.53      0.58       194
    B-AM-MNR       0.59      0.43      0.50 

 30%|███       | 3/10 [29:47<1:09:17, 593.91s/it]

Train Evaluation 2019-05-31T20:21:15.773069: step 14000, loss 3.25524

Dev Evaluation
2019-05-31T20:21:26.770995: loss 0.20529

              precision    recall  f1-score   support

        B-A0       0.89      0.83      0.86      2081
        B-A1       0.88      0.88      0.88      2994
        B-A2       0.66      0.63      0.64       673
        B-A3       0.73      0.41      0.53       114
        B-A4       0.65      0.68      0.66        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.59      0.49      0.54       279
    B-AM-CAU       0.83      0.33      0.48        45
    B-AM-DIR       0.71      0.14      0.23        36
    B-AM-DIS       0.76      0.76      0.76       202
    B-AM-EXT       0.90      0.32      0.47        28
    B-AM-LOC       0.59      0.58      0.59       194
    B-AM-MNR       0.63      0.50      0.56       242
    B-AM-MOD       0.96      0.95      0.96       317
    B-A

Train Evaluation 2019-05-31T20:26:50.571365: step 16500, loss 2.98406
Train Evaluation 2019-05-31T20:27:52.069656: step 17000, loss 2.50031

Dev Evaluation
2019-05-31T20:28:03.087110: loss 0.200879

              precision    recall  f1-score   support

        B-A0       0.88      0.84      0.86      2081
        B-A1       0.88      0.88      0.88      2994
        B-A2       0.65      0.67      0.66       673
        B-A3       0.67      0.46      0.54       114
        B-A4       0.78      0.65      0.71        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.68      0.47      0.55       279
    B-AM-CAU       0.94      0.38      0.54        45
    B-AM-DIR       0.47      0.22      0.30        36
    B-AM-DIS       0.79      0.73      0.76       202
    B-AM-EXT       0.89      0.29      0.43        28
    B-AM-LOC       0.64      0.54      0.58       194
    B-AM-MNR       0.58      0.54      0.56 

 40%|████      | 4/10 [39:51<59:41, 596.99s/it]  

Train Evaluation 2019-05-31T20:31:16.136817: step 18500, loss 2.28311
Train Evaluation 2019-05-31T20:32:15.955616: step 19000, loss 2.65561

Dev Evaluation
2019-05-31T20:32:26.927993: loss 0.20064

              precision    recall  f1-score   support

        B-A0       0.89      0.85      0.87      2081
        B-A1       0.88      0.90      0.89      2994
        B-A2       0.70      0.66      0.68       673
        B-A3       0.72      0.45      0.55       114
        B-A4       0.79      0.65      0.71        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.62      0.53      0.57       279
    B-AM-CAU       0.87      0.44      0.59        45
    B-AM-DIR       0.50      0.11      0.18        36
    B-AM-DIS       0.77      0.73      0.75       202
    B-AM-EXT       0.90      0.32      0.47        28
    B-AM-LOC       0.62      0.52      0.56       194
    B-AM-MNR       0.60      0.46      0.52  

Train Evaluation 2019-05-31T20:37:51.331351: step 21500, loss 1.87389
Train Evaluation 2019-05-31T20:38:51.648001: step 22000, loss 2.1328

Dev Evaluation
2019-05-31T20:39:02.604990: loss 0.199197

              precision    recall  f1-score   support

        B-A0       0.88      0.85      0.87      2081
        B-A1       0.88      0.89      0.88      2994
        B-A2       0.75      0.62      0.68       673
        B-A3       0.64      0.51      0.57       114
        B-A4       0.68      0.66      0.67        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.53      0.53      0.53       279
    B-AM-CAU       0.89      0.56      0.68        45
    B-AM-DIR       0.67      0.33      0.44        36
    B-AM-DIS       0.77      0.73      0.75       202
    B-AM-EXT       0.82      0.32      0.46        28
    B-AM-LOC       0.72      0.54      0.62       194
    B-AM-MNR       0.63      0.54      0.58  

 50%|█████     | 5/10 [49:44<49:39, 595.81s/it]

Train Evaluation 2019-05-31T20:41:04.123276: step 23000, loss 2.42257

Dev Evaluation
2019-05-31T20:41:15.110720: loss 0.200592

              precision    recall  f1-score   support

        B-A0       0.89      0.85      0.87      2081
        B-A1       0.88      0.89      0.89      2994
        B-A2       0.69      0.65      0.67       673
        B-A3       0.79      0.46      0.58       114
        B-A4       0.80      0.69      0.74        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.56      0.51      0.53       279
    B-AM-CAU       0.85      0.49      0.62        45
    B-AM-DIR       0.50      0.25      0.33        36
    B-AM-DIS       0.78      0.72      0.75       202
    B-AM-EXT       0.82      0.32      0.46        28
    B-AM-LOC       0.58      0.56      0.57       194
    B-AM-MNR       0.64      0.48      0.55       242
    B-AM-MOD       0.96      0.96      0.96       317
    B-

Train Evaluation 2019-05-31T20:46:38.554925: step 25500, loss 3.34834
Train Evaluation 2019-05-31T20:47:39.036993: step 26000, loss 2.50213

Dev Evaluation
2019-05-31T20:47:50.090216: loss 0.202597

              precision    recall  f1-score   support

        B-A0       0.87      0.85      0.86      2081
        B-A1       0.89      0.89      0.89      2994
        B-A2       0.79      0.63      0.70       673
        B-A3       0.59      0.51      0.54       114
        B-A4       0.81      0.68      0.74        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.63      0.51      0.56       279
    B-AM-CAU       0.93      0.56      0.69        45
    B-AM-DIR       0.52      0.33      0.41        36
    B-AM-DIS       0.78      0.76      0.77       202
    B-AM-EXT       0.90      0.32      0.47        28
    B-AM-LOC       0.65      0.57      0.61       194
    B-AM-MNR       0.66      0.53      0.59 

 60%|██████    | 6/10 [59:49<39:53, 598.43s/it]

Train Evaluation 2019-05-31T20:51:04.033060: step 27500, loss 2.46162
Train Evaluation 2019-05-31T20:52:04.249233: step 28000, loss 1.72644

Dev Evaluation
2019-05-31T20:52:15.208228: loss 0.200966

              precision    recall  f1-score   support

        B-A0       0.88      0.86      0.87      2081
        B-A1       0.90      0.89      0.89      2994
        B-A2       0.71      0.68      0.69       673
        B-A3       0.65      0.48      0.56       114
        B-A4       0.75      0.72      0.73        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.53      0.58      0.55       279
    B-AM-CAU       0.84      0.58      0.68        45
    B-AM-DIR       0.59      0.28      0.38        36
    B-AM-DIS       0.79      0.75      0.77       202
    B-AM-EXT       0.91      0.36      0.51        28
    B-AM-LOC       0.66      0.60      0.63       194
    B-AM-MNR       0.63      0.51      0.57 

Train Evaluation 2019-05-31T20:57:39.229467: step 30500, loss 3.68529
Train Evaluation 2019-05-31T20:58:39.512454: step 31000, loss 2.17844

Dev Evaluation
2019-05-31T20:58:50.498619: loss 0.207728

              precision    recall  f1-score   support

        B-A0       0.90      0.84      0.87      2081
        B-A1       0.89      0.89      0.89      2994
        B-A2       0.73      0.65      0.69       673
        B-A3       0.84      0.36      0.50       114
        B-A4       0.80      0.62      0.70        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.60      0.54      0.57       279
    B-AM-CAU       0.97      0.64      0.77        45
    B-AM-DIR       0.58      0.31      0.40        36
    B-AM-DIS       0.82      0.73      0.77       202
    B-AM-EXT       0.91      0.36      0.51        28
    B-AM-LOC       0.65      0.55      0.60       194
    B-AM-MNR       0.65      0.50      0.57 

 70%|███████   | 7/10 [1:09:41<29:49, 596.57s/it]

Train Evaluation 2019-05-31T21:00:51.853871: step 32000, loss 10.9977

Dev Evaluation
2019-05-31T21:01:03.020796: loss 0.219925

              precision    recall  f1-score   support

        B-A0       0.89      0.87      0.88      2081
        B-A1       0.90      0.88      0.89      2994
        B-A2       0.65      0.69      0.67       673
        B-A3       0.77      0.39      0.51       114
        B-A4       0.78      0.60      0.68        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.61      0.52      0.56       279
    B-AM-CAU       0.82      0.62      0.71        45
    B-AM-DIR       0.56      0.28      0.37        36
    B-AM-DIS       0.78      0.70      0.74       202
    B-AM-EXT       0.75      0.32      0.45        28
    B-AM-LOC       0.62      0.59      0.61       194
    B-AM-MNR       0.64      0.49      0.55       242
    B-AM-MOD       0.98      0.93      0.95       317
    B-

Train Evaluation 2019-05-31T21:06:27.720207: step 34500, loss 2.3276
Train Evaluation 2019-05-31T21:07:27.594439: step 35000, loss 2.17418

Dev Evaluation
2019-05-31T21:07:38.545411: loss 0.214922

              precision    recall  f1-score   support

        B-A0       0.89      0.85      0.87      2081
        B-A1       0.89      0.88      0.89      2994
        B-A2       0.64      0.74      0.68       673
        B-A3       0.84      0.38      0.52       114
        B-A4       0.87      0.63      0.73        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.61      0.53      0.57       279
    B-AM-CAU       0.90      0.58      0.70        45
    B-AM-DIR       0.60      0.25      0.35        36
    B-AM-DIS       0.78      0.73      0.76       202
    B-AM-EXT       0.82      0.32      0.46        28
    B-AM-LOC       0.63      0.63      0.63       194
    B-AM-MNR       0.64      0.54      0.59  

 80%|████████  | 8/10 [1:19:44<19:57, 598.70s/it]

Train Evaluation 2019-05-31T21:10:51.061070: step 36500, loss 2.21314
Train Evaluation 2019-05-31T21:11:51.300370: step 37000, loss 2.76894

Dev Evaluation
2019-05-31T21:12:02.282594: loss 0.225014

              precision    recall  f1-score   support

        B-A0       0.88      0.86      0.87      2081
        B-A1       0.87      0.90      0.89      2994
        B-A2       0.74      0.66      0.70       673
        B-A3       0.78      0.46      0.57       114
        B-A4       0.86      0.58      0.70        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.61      0.55      0.58       279
    B-AM-CAU       0.81      0.64      0.72        45
    B-AM-DIR       0.47      0.22      0.30        36
    B-AM-DIS       0.79      0.78      0.78       202
    B-AM-EXT       0.91      0.36      0.51        28
    B-AM-LOC       0.60      0.62      0.61       194
    B-AM-MNR       0.64      0.55      0.60 

Train Evaluation 2019-05-31T21:17:25.884365: step 39500, loss 3.02459
Train Evaluation 2019-05-31T21:18:25.822935: step 40000, loss 11.5988

Dev Evaluation
2019-05-31T21:18:36.821883: loss 0.223531

              precision    recall  f1-score   support

        B-A0       0.89      0.85      0.87      2081
        B-A1       0.87      0.90      0.89      2994
        B-A2       0.69      0.69      0.69       673
        B-A3       0.59      0.49      0.54       114
        B-A4       0.78      0.65      0.71        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.56      0.57      0.56       279
    B-AM-CAU       0.81      0.58      0.68        45
    B-AM-DIR       0.62      0.22      0.33        36
    B-AM-DIS       0.75      0.80      0.77       202
    B-AM-EXT       0.77      0.36      0.49        28
    B-AM-LOC       0.61      0.66      0.63       194
    B-AM-MNR       0.61      0.51      0.55 

 90%|█████████ | 9/10 [1:29:37<09:56, 596.78s/it]

Train Evaluation 2019-05-31T21:20:38.499665: step 41000, loss 3.11211

Dev Evaluation
2019-05-31T21:20:49.492051: loss 0.229712

              precision    recall  f1-score   support

        B-A0       0.89      0.85      0.87      2081
        B-A1       0.89      0.89      0.89      2994
        B-A2       0.70      0.65      0.67       673
        B-A3       0.74      0.42      0.54       114
        B-A4       0.80      0.57      0.67        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.52      0.59      0.55       279
    B-AM-CAU       0.93      0.60      0.73        45
    B-AM-DIR       0.59      0.28      0.38        36
    B-AM-DIS       0.79      0.75      0.77       202
    B-AM-EXT       0.85      0.39      0.54        28
    B-AM-LOC       0.69      0.51      0.59       194
    B-AM-MNR       0.64      0.53      0.58       242
    B-AM-MOD       0.98      0.94      0.96       317
    B-

Train Evaluation 2019-05-31T21:26:14.098330: step 43500, loss 1.91097
Train Evaluation 2019-05-31T21:27:13.848913: step 44000, loss 2.88428

Dev Evaluation
2019-05-31T21:27:24.861968: loss 0.235118

              precision    recall  f1-score   support

        B-A0       0.88      0.86      0.87      2081
        B-A1       0.89      0.89      0.89      2994
        B-A2       0.73      0.68      0.71       673
        B-A3       0.78      0.46      0.58       114
        B-A4       0.77      0.66      0.71        65
        B-A5       0.00      0.00      0.00         2
        B-AA       0.00      0.00      0.00         1
    B-AM-ADV       0.56      0.56      0.56       279
    B-AM-CAU       0.92      0.49      0.64        45
    B-AM-DIR       0.79      0.31      0.44        36
    B-AM-DIS       0.80      0.67      0.73       202
    B-AM-EXT       0.83      0.36      0.50        28
    B-AM-LOC       0.62      0.66      0.64       194
    B-AM-MNR       0.63      0.52      0.57 

100%|██████████| 10/10 [1:39:40<00:00, 598.80s/it]
