In [11]:
import sys
sys.path.insert(0,'../models/')
sys.path.insert(0,'../datasets/')
sys.path.insert(0,'..')

import pandas as pd
import numpy as np
import json
from subprocess import Popen, PIPE, STDOUT
import re
from collections import defaultdict

import tensorflow as tf
import tqdm
from models import PropbankEncoder
import config

INPUT_DIR = '../datasets/binaries/'
PROPBANK_GLO50_PATH = '{:}deep_glo50.pickle'.format(INPUT_DIR)

<h1><center>Structured Predictions Network CWIS SRL (BR)</center></h1>

<center>In this notebook we solve the semantic role labeling task using structured predictions networks.</center>

## 1. Builds a "human friendly" version of the dataset

In [12]:
dfgs = pd.read_csv('../datasets/csvs/gs.csv', index_col=0, sep=',', encoding='utf-8')
column_files = [
    '../datasets/csvs/column_chunks/chunks.csv',
    '../datasets/csvs/column_predmarker/predicate_marker.csv',
    '../datasets/csvs/column_shifts_ctx_p/form.csv',
    '../datasets/csvs/column_shifts_ctx_p/gpos.csv',
    '../datasets/csvs/column_shifts_ctx_p/lemma.csv',
    '../datasets/csvs/column_t/t.csv',
    '../datasets/csvs/column_iob/iob.csv'
]

for col_f in column_files:
    _df = pd.read_csv(col_f, index_col=0, encoding='utf-8')
    dfgs = pd.concat((dfgs, _df), axis=1)

DISPLAY_COLUMNS = ['ID', 'P', 'FORM', 'ARG', 'T', 
                   'CHUNK_ID', 'CHUNK_START', 'CHUNK_FINISH', 'CHUNK_LEN', 'CHUNK_CANDIDATE_ID']            
dfgs[DISPLAY_COLUMNS].head(33)    

Unnamed: 0_level_0,ID,P,FORM,ARG,T,CHUNK_ID,CHUNK_START,CHUNK_FINISH,CHUNK_LEN,CHUNK_CANDIDATE_ID
INDEX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,1,1,Brasília,*,*,1,0,1,1,0
1,2,1,Pesquisa_Datafolha,(A0*,A0,2,1,4,3,35
2,3,1,publicada,*,A0,2,1,4,3,35
3,4,1,hoje,*),A0,2,1,4,3,35
4,5,1,revela,(V*),V,3,4,5,1,126
5,6,1,um,(A1*,A1,4,5,32,27,181
6,7,1,dado,*,A1,4,5,32,27,181
7,8,1,supreendente,*,A1,4,5,32,27,181
8,9,1,:,*,A1,4,5,32,27,181
9,10,1,recusando,*,A1,4,5,32,27,181


## 2. Gets encodings

Propbank Encoder holds an indexed version of propbank dataset an answers to FOUR different dataformats: 
* CAT: this is the raw categorical data.
* EMB: tokens are embedding using GloVe embeddings.
* HOT: onehot encoding of the words and tokens.
* IDX: dense indexed representations.

In [16]:
# LOAD ENCODER
propbank_encoder = PropbankEncoder.recover(PROPBANK_GLO50_PATH)
db = propbank_encoder.db
lex2idx = propbank_encoder.lex2idx
idx2lex = propbank_encoder.idx2lex

# FOR TEXTUAL DATA ONLY
lex2tok = propbank_encoder.lex2tok
tok2idx = propbank_encoder.tok2idx
embeddings = propbank_encoder.embeddings

In [17]:
print('attributes\t',
       len(db),
      '\n',             
      'records\t',
       len(db['ARG'].keys()))

attributes	 44 
 records	 141730


In [18]:
def filter_type(ds_type, db):
    '''Filters only records from train dataset
    '''
    ds_types = ('train', 'test', 'valid')
    if ds_type not in ds_types:
        _msg = 'ds_type must be in {:} got {:}'
        _msg = _msg.format(ds_types, ds_type)
        raise ValueError(_msg)
    elif ds_type in ('train',):
        lb = 0 
        ub = config.DATASET_TRAIN_SIZE
    elif ds_type in ('test',):        
        lb = config.DATASET_TRAIN_SIZE
        ub = lb + config.DATASET_VALID_SIZE         
    elif ds_type in ('valid',):                
        lb = config.DATASET_TRAIN_SIZE + config.DATASET_VALID_SIZE
        ub = lb + config.DATASET_TEST_SIZE         

    sel_keys_ = {key_ for key_, prop_ in db['P'].items() if prop_ > lb and prop_ <= ub}

    return {
                attr_:{ idx_: i_
                        for idx_, i_ in dict_.items() if idx_ in sel_keys_
                      }        
                for attr_, dict_  in db.items()
            }

def make_propositions_dict(db):
    '''Reindex db by propositions creating a nested dict in which the
        outer key is the proposition        
    '''
    
    triple_list = []
    prev_prop = -1
    for idx, prop in db['P'].items():
        if prev_prop != prop:
            if idx > 0:
                ub = idx-1
                triple_list.append((lb, ub, prev_prop))
            lb = idx
        prev_prop = prop
    triple_list.append((lb, ub, prev_prop))
            

        
    prop_set = set(db['P'].values())
    return { prop_:
                    {
                        attr_:{ idx_: dict_[idx_]
                                for idx_ in range(lb_, ub_ + 1, 1)
                          }        
                        for attr_, dict_ in db.items() if attr_ not in ('P',)
                    }
             for lb_, ub_, prop_ in  triple_list
            }, {prop_: ub_ - lb_ + 1 for lb_, ub_, prop_ in  triple_list}   


def numpfy_propositions_dict(prop_dict, proplen_dict):
    '''Converts inner dict examples into numpy arrays
    '''
    prop_dict_ = defaultdict(dict)    
    for prop, columns_dict in prop_dict.items():
        len_ = proplen_dict[prop]
        shape_ = (len_, 1)
        for column, values_dict in columns_dict.items():
            tuple_list = [idx_value 
                          for idx_value in values_dict.items()]
            
            tuple_list = sorted(tuple_list, key=lambda x: x[0])            
            # Converts lexicon (raw/indexed) into token (embedded/indexed)
            if (('FORM' in column) or ('LEMMA' in column)):
                values_list = [tok2idx[lex2tok[idx2lex[column][tuple_[1]]]]                
                                   for tuple_ in tuple_list]
            else:
                values_list = [tuple_[1] for tuple_ in tuple_list]
            
            prop_dict_[prop][column]  = np.array(values_list).reshape(shape_)
    
    return prop_dict_        


In [19]:
traindb  = filter_type('train', db)
print('attributes\t',
       len(traindb),
      '\n',             
      'records\t',
       len(traindb['ARG'].keys()),
       '\n',             
      'vocab\t',
        max([form for _, form in traindb['FORM'].items()]))

attributes	 44 
 records	 123846 
 vocab	 13289


In [20]:
prop_dict, proplen_dict = make_propositions_dict(traindb)
print('attributes\t',
       len(prop_dict[1]) + 1,
      '\n',             
      'records\t',
       sum([len(d['ARG']) for p, d in prop_dict.items()]),
        '\n',             
      'vocab\t',
        max([form for _, prop in prop_dict.items() for _, form in prop['FORM'].items()]))

attributes	 44 
 records	 123837 
 vocab	 13289


In [21]:
prop_dict1 = numpfy_propositions_dict(prop_dict, proplen_dict)
print('attributes\t',
       len(prop_dict1[1]) + 1,
      '\n',             
      'records\t',
       sum([len_ for _, len_ in proplen_dict.items()]),
        '\n',             
      'vocab\t',
        max([max(form) for _, prop in prop_dict1.items() for form in prop['FORM']]))

attributes	 44 
 records	 123837 
 vocab	 12037


In [31]:
def get_inputs(db1, propid):
    '''Generate inputs
    '''
    propdb = db1[propid] # nested dict of columns and idx value
    proplen = len(propdb['ID'])
    if 'CHUNK_SPACE' not in propdb:
        propdb['CHUNK_SPACE'] = generate_chunk_space(proplen)

    word    = propdb['FORM']
    ctx_pm  = propdb['FORM_CTX_P-1']
    ctx_p0  = propdb['FORM_CTX_P+0']
    ctx_pp  = propdb['FORM_CTX_P+1']
    
    marker  = propdb['MARKER']
    pos     = propdb['GPOS']
    chunk_type  = propdb['T']
    chunk_start, chunk_finish = propdb['CHUNK_SPACE']
    
    return word, ctx_pm, ctx_p0, ctx_pp, marker, pos, chunk_type, chunk_start, chunk_finish
            
def generate_chunk_space(n):
    '''Generates all possible spaces for chunks
    '''
    start_list = []
    end_list = []
    for i in range(n):
        for j in range(i,n,1):
            start_list.append(i)
            end_list.append(j+1)
    shape_ = (len(start_list), 1)
    start_ = np.array(start_list).reshape(shape_)
    finish_ = np.array(end_list).reshape(shape_)
    return start_, finish_
            

def get_outputs(db1, propid):
    ''' Generate outputs
    '''
    propdb_ = db1[propid] # nested dict of columns and idx value
    plen_ = len(propdb_['ID'])
    if 'OUTPUTS' not in propdb_: 
        propdb_['OUTPUTS'] = propdb_['T'].reshape((plen_,))

    return propdb_['OUTPUTS']

In [33]:
%%timeit
propid = 1119
# propid = 2
word, ctx_pm, ctx_p0, ctx_pp, marker, pos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict1, propid)
y = get_outputs(prop_dict1, propid)
# worst proposition 1120 size 92!

1.03 µs ± 21 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


 ## MODEL

In [38]:
propid = 1
word, ctx_pm, ctx_p0, ctx_pp, marker, pos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict1,  propid)
y = get_outputs(prop_dict1, propid)
print(y.shape)
print(y)
# _start  = np.repeat(chunk_start, len(lex2idx['T']))
# _finish = np.repeat(chunk_finish, len(lex2idx['T']))
# print([(_start[y_].flatten(), _finish[y_].flatten()) for y_ in y])
# print(list(zip(_start[y].flatten(), _finish[y].flatten())))


(33,)
[ 0  1  1  1 35  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2
  2  2  2  2  2  2  2  0]


In [39]:
import struct_perc.colored_weighted_interval_scheduling as cwis
import struct_perc.weighted_interval_scheduling as wis
import struct_perc.utils as spu

 ## Tensorflow Graph

In [13]:
# vocab_size = len(lex2idx['FORM']) + 1
# embed_size = 50

# n_pos = len(lex2idx['GPOS'])
# # n_type = len(lex2idx['T'])
# n_classes  = len(lex2idx['T'])

# tf.reset_default_graph()

# # word index and gpos 
# tf_words = tf.placeholder(tf.int32, shape=(None,1))
# tf_pos = tf.placeholder(tf.int32, shape=(None,1))
# # t_x_type = tf.placeholder(tf.int32, shape=(None,1))

# # índices de inicio de intervalo
# tf_s = tf.placeholder(tf.int32, shape=(None,1))
# # índices de fim de intervalo
# tf_f = tf.placeholder(tf.int32, shape=(None,1))

# # replicamos os indicies de inicio e fim para cada classe de chunk possivel
# tf_sc = tf.reshape(
#       tf.tile(tf_s,  [1, n_classes]), [-1,1])
# tf_fc = tf.reshape(
#       tf.tile(tf_f,  [1, n_classes]), [-1,1])

# # n_features = (embed_size + n_pos + n_type)
# n_features = (embed_size + n_pos)
# # hidden_features = 300
# W_shape = (n_features, n_classes)
# EMBS = tf.constant(embeddings)
# # tf_token = tf.Variable(initial_value=None, expected_shape=(embed_size,), dtype=tf.float32, trainable=False)

# # geramos os paramteros do modelo
# with tf.variable_scope("model"):
#     W = tf.Variable(
#         tf.random_normal(W_shape, 0, 1/np.sqrt(n_features * n_classes), name='W')
#     )
#     b = tf.Variable(
#         tf.random_normal((n_classes,), 0, 1/np.sqrt(n_classes), name='b')
#     )
    

# # tf_token = tf.nn.embedding_lookup(tf_embeddings, id) 
# # Recuperamos os embeddings de cada palavra
# tf_word_features = tf.gather_nd(EMBS, tf_words)

# tf_pos_flat = tf.reshape(tf_pos, [-1])
# tf_pos_features = tf.one_hot(tf_pos_flat, depth=n_pos)

# # t_x_type_flat = tf.reshape(t_x_type,[-1])
# # t_type_features = tf.one_hot(t_x_type_flat, depth=n_type)

# # t_tok_features = tf.concat((t_word_features,t_pos_features,t_type_features),axis=1)
# tf_tok_features = tf.concat((tf_word_features,tf_pos_features),axis=1)

# # a partir das features do intervalo computamos o score
# tf_scores = tf.matmul(tf_tok_features, W) + b

# tf_pred = tf.argmax(tf_scores, axis=1)


In [46]:
vocab_size = len(tok2idx)
embed_size = 50
n_pos = len(lex2idx['GPOS'])
# n_type = len(id_to_type)
n_classes = len(lex2idx['T'])

# índices das palavras
t_x_words = tf.placeholder(tf.int64, shape=(None,1), name='word')


t_x_ctx_pm = tf.placeholder(tf.int64, shape=(None,1), name='ctx_pm')
t_x_ctx_p0 = tf.placeholder(tf.int64, shape=(None,1), name='ctx_p0')
t_x_ctx_pp = tf.placeholder(tf.int64, shape=(None,1), name='ctx_pp')

t_x_pos = tf.placeholder(tf.int64, shape=(None,1), name='gpos')
t_x_marker = tf.cast( tf.placeholder(tf.int64, shape=(None,1), name='marker'), tf.float32 )

# t_x_type = tf.placeholder(tf.int32, shape=(None,1))

W_shape = (embed_size * 4 + 1 + n_pos, n_classes)
b_shape = (1,n_classes)
# geramos os paramteros do modelo
with tf.variable_scope("model"):
    # parâmetros de embedding
    t_W_tok = tf.Variable(tf.random_normal((vocab_size, embed_size), 0, 0.01, dtype=tf.float32), name='embeddings')
    # parâmetros que computam o score a partir das features do intervalo
#     t_W_interval = tf.Variable(np.zeros((embed_size + n_pos, n_classes)).astype(np.float32))
    t_W_interval = tf.Variable(tf.zeros(W_shape, dtype=tf.float32), name='W')

#     t_b_interval = tf.Variable(np.zeros((1,n_classes)).astype(np.float32))
# np.zeros((1,n_classes)).astype(np.float32)
    t_b_interval = tf.Variable(tf.zeros(b_shape, dtype=tf.float32), name='b')
#     np.zeros((1,n_classes)).astype(np.float32)
#     t_b_interval = tf.Variable()

# Recuperamos os embeddings de cada palavra
t_word_features = tf.gather_nd(t_W_tok, t_x_words, name='word_features')

t_ctx_pm_features = tf.gather_nd(t_W_tok, t_x_ctx_pm, name='ctx_pm')
t_ctx_p0_features = tf.gather_nd(t_W_tok, t_x_ctx_p0, name='ctx_p0')
t_ctx_pp_features = tf.gather_nd(t_W_tok, t_x_ctx_pp, name='ctx_pp')

t_x_pos_flat = tf.reshape(t_x_pos,[-1], name='gpos_flat')
t_pos_features = tf.one_hot(t_x_pos_flat, depth=n_pos, name='gpos_features')

# t_x_type_flat = tf.reshape(t_x_type,[-1])
# t_type_features = tf.one_hot(t_x_type_flat, depth=n_type)

# t_tok_features = tf.concat((t_word_features,t_pos_features,t_type_features),axis=1)
# t_tok_features = tf.concat((t_word_features,t_pos_features),axis=1, name='tok_features')
t_tok_features = tf.concat((t_word_features, 
                            t_ctx_pm_features, t_ctx_p0_features, t_ctx_pp_features, 
                            t_pos_features, t_x_marker),axis=1, name='tok_features')

# a partir das features do intervalo computamos o score
t_scores = tf.matmul(t_tok_features, t_W_interval, name='xW') + t_b_interval

t_pred = tf.argmax(t_scores, axis=1, name='t_pred')

 ## Tensorflow test session

In [48]:
propid  = 1
words, ctx_pm, ctx_p0, ctx_pp, marker, gpos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict1, propid)
y = get_outputs(prop_dict1, propid)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    a = sess.run(t_tok_features, feed_dict={
        t_x_words:words,
        t_x_ctx_pm: ctx_pm,
        t_x_ctx_p0: ctx_p0,
        t_x_ctx_pp: ctx_pp,        
        t_x_marker: marker,                
        t_x_pos:gpos
    })
print(a.shape)

(33, 226)


In [53]:
def pred(sess, x_words, x_ctx_pm, x_ctx_p0, x_ctx_pp, x_marker, x_pos):
    result = sess.run(t_pred,feed_dict={
        t_x_words: x_words,
        t_x_ctx_pm: x_ctx_pm,
        t_x_ctx_p0: x_ctx_p0,
        t_x_ctx_pp: x_ctx_pp,        
        t_x_marker: x_marker,                
        t_x_pos:x_pos
    })
    return result

In [54]:
# índices dos intervalos computados pelo Weighted Interval Scheduling
t_p = tf.placeholder(tf.int32, shape=(None,), name='predictions')
# índices dos intervalos corretos
t_y = tf.placeholder(tf.int32, shape=(None,), name='y')
t_len = tf.placeholder(tf.int32, shape=(), name='proplen')
t_indices = tf.to_int32(tf.range(t_len), name='indices')

# score da estrutura predita
t_scores_p = tf.gather_nd(t_scores, tf.stack((t_indices, t_p),-1), name='predicted_score')
# score da estrutura correta
t_scores_y = tf.gather_nd(t_scores, tf.stack((t_indices,t_y),-1), name='target_score')

# função de custo do perceptron estruturado
t_cost = tf.reduce_sum(t_scores_p) - tf.reduce_sum(t_scores_y)

# gradiente descendente no custo do perceptron estruturado
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(t_cost)

In [56]:
# x_words, x_pos, x_types = get_input_features(df_chunk,train_sentences[10],word_to_id, pos_to_id, type_to_id)
# y_ck_ids = get_output_features(df_chunk, train_sentences[10], target_to_id)

prop_id = 1
words, ctx_pm, ctx_p0, ctx_pp, marker, gpos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict1, propid)
y = get_outputs(prop_dict1, propid)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
predictions = pred(sess, words, ctx_pm, ctx_p0, ctx_pp, marker, gpos)
print(y.shape)
print(len(predictions))

(33,)
33


 ## Can it memorize ?

In [59]:
for i in range(10000):
    
    predictions = pred(sess, words, ctx_pm, ctx_p0, ctx_pp, marker, gpos)
    
    
    _, cost = sess.run([train, t_cost], feed_dict={
        t_x_words: words,
        t_x_ctx_pm: ctx_pm,
        t_x_ctx_p0: ctx_p0,
        t_x_ctx_pp: ctx_pp,        
        t_x_marker: marker,        
        t_x_pos: gpos,
        t_p: predictions,
        t_len: proplen_dict[propid],
        t_y:y})


    if i % 100 == 0:
        err = np.sum(y.flatten() != predictions.flatten())
        acc = 1 - err/len(predictions.flatten())
        print(acc, ' ', cost)
        if acc == 1:
            break

0.0606060606061   0.0
0.939393939394   0.17409
1.0   0.0


 ## The model reproduces the sentence

In [61]:
p = sess.run(t_pred, 
             feed_dict={
                t_x_words: words,
                t_x_ctx_pm: ctx_pm,
                t_x_ctx_p0: ctx_p0,
                t_x_ctx_pp: ctx_pp,        
                t_x_marker: marker,        
                t_x_pos: gpos
             })
print(p)
print()
print(y)

[ 0  1  1  1 35  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2
  2  2  2  2  2  2  2  0]

[ 0  1  1  1 35  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2
  2  2  2  2  2  2  2  0]


## Training

In [None]:
sess= tf.Session()
sess.run(tf.global_variables_initializer())

epochs = 30
for i in range(epochs):
    total_err = 0
    total_size = 0
    nprops = len(proplen_dict)
    for propid in tqdm.tqdm(range(1, nprops + 1)):
        words, ctx_pm, ctx_p0, ctx_pp, marker, gpos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict1,  propid)
        y = get_outputs(prop_dict1, propid)

        predictions = pred(sess, words, ctx_pm, ctx_p0, ctx_pp, marker, gpos)

        _, cost = sess.run([train, t_cost], feed_dict={
            t_x_words:words,
            t_x_ctx_pm: ctx_pm,
            t_x_ctx_p0: ctx_p0,
            t_x_ctx_pp: ctx_pp,        
            t_x_marker: marker,
            t_x_pos: gpos,
            t_p:predictions,
            t_len: proplen_dict[propid],
            t_y:y})
        total_err += np.sum(predictions.flatten()!=y.flatten())
        total_size += len(predictions.flatten())

    print('epoch ', i, ' acc: ', 1 - total_err/total_size)

100%|██████████| 5099/5099 [00:15<00:00, 331.30it/s]
  1%|          | 35/5099 [00:00<00:14, 345.12it/s]

epoch  0  acc:  0.431502701131


100%|██████████| 5099/5099 [00:14<00:00, 353.68it/s]
  1%|          | 33/5099 [00:00<00:15, 327.45it/s]

epoch  1  acc:  0.481075930457


100%|██████████| 5099/5099 [00:14<00:00, 359.28it/s]
  1%|          | 33/5099 [00:00<00:15, 322.91it/s]

epoch  2  acc:  0.510679360773


100%|██████████| 5099/5099 [00:14<00:00, 353.95it/s]
  1%|          | 28/5099 [00:00<00:18, 277.95it/s]

epoch  3  acc:  0.540581570936


100%|██████████| 5099/5099 [00:14<00:00, 350.99it/s]
  1%|          | 36/5099 [00:00<00:14, 353.54it/s]

epoch  4  acc:  0.560494844029


100%|██████████| 5099/5099 [00:14<00:00, 346.20it/s]
  1%|          | 34/5099 [00:00<00:15, 335.46it/s]

epoch  5  acc:  0.579495627317


100%|██████████| 5099/5099 [00:14<00:00, 355.57it/s]
  1%|          | 35/5099 [00:00<00:14, 343.04it/s]

epoch  6  acc:  0.592004005265


100%|██████████| 5099/5099 [00:14<00:00, 348.46it/s]
  1%|          | 32/5099 [00:00<00:15, 318.05it/s]

epoch  7  acc:  0.605392572494


100%|██████████| 5099/5099 [00:14<00:00, 356.93it/s]
  1%|          | 34/5099 [00:00<00:15, 333.71it/s]

epoch  8  acc:  0.614339817664


100%|██████████| 5099/5099 [00:14<00:00, 356.24it/s]
  1%|          | 29/5099 [00:00<00:17, 285.41it/s]

epoch  9  acc:  0.617464893368


100%|██████████| 5099/5099 [00:14<00:00, 354.75it/s]
  1%|          | 31/5099 [00:00<00:16, 302.88it/s]

epoch  10  acc:  0.625120117574


100%|██████████| 5099/5099 [00:14<00:00, 353.47it/s]
  1%|          | 29/5099 [00:00<00:17, 287.39it/s]

epoch  11  acc:  0.640745496096


100%|██████████| 5099/5099 [00:14<00:00, 347.79it/s]
  1%|          | 34/5099 [00:00<00:14, 338.24it/s]

epoch  12  acc:  0.641674136163


100%|██████████| 5099/5099 [00:14<00:00, 351.27it/s]
  1%|          | 33/5099 [00:00<00:15, 323.19it/s]

epoch  13  acc:  0.639784555504


100%|██████████| 5099/5099 [00:14<00:00, 355.64it/s]
  1%|          | 31/5099 [00:00<00:16, 303.36it/s]

epoch  14  acc:  0.646365787285


100%|██████████| 5099/5099 [00:14<00:00, 359.32it/s]
  1%|          | 35/5099 [00:00<00:14, 349.14it/s]

epoch  15  acc:  0.65076673369


100%|██████████| 5099/5099 [00:14<00:00, 358.39it/s]
  1%|          | 30/5099 [00:00<00:17, 292.61it/s]

epoch  16  acc:  0.658438108158


100%|██████████| 5099/5099 [00:14<00:00, 359.33it/s]
  1%|          | 31/5099 [00:00<00:16, 302.79it/s]

epoch  17  acc:  0.659649377811


100%|██████████| 5099/5099 [00:15<00:00, 331.04it/s]
  1%|          | 33/5099 [00:00<00:15, 328.21it/s]

epoch  18  acc:  0.665810702779


100%|██████████| 5099/5099 [00:14<00:00, 361.88it/s]
  1%|          | 30/5099 [00:00<00:17, 296.60it/s]

epoch  19  acc:  0.676502176248


100%|██████████| 5099/5099 [00:14<00:00, 357.90it/s]
  1%|          | 36/5099 [00:00<00:14, 354.54it/s]

epoch  20  acc:  0.676623303213


100%|██████████| 5099/5099 [00:14<00:00, 358.16it/s]
  1%|          | 36/5099 [00:00<00:14, 353.77it/s]

epoch  21  acc:  0.674556069672


100%|██████████| 5099/5099 [00:14<00:00, 358.13it/s]
  1%|          | 27/5099 [00:00<00:19, 264.66it/s]

epoch  22  acc:  0.679158894353


100%|██████████| 5099/5099 [00:14<00:00, 358.89it/s]
  1%|          | 34/5099 [00:00<00:15, 337.21it/s]

epoch  23  acc:  0.683939371916


100%|██████████| 5099/5099 [00:14<00:00, 343.42it/s]
  1%|          | 37/5099 [00:00<00:14, 361.53it/s]

epoch  24  acc:  0.684359278729


100%|██████████| 5099/5099 [00:15<00:00, 325.83it/s]
  1%|          | 28/5099 [00:00<00:18, 276.34it/s]

epoch  25  acc:  0.680200586255


100%|██████████| 5099/5099 [00:16<00:00, 318.51it/s]
  1%|          | 35/5099 [00:00<00:14, 345.22it/s]

epoch  26  acc:  0.691546145336


100%|██████████| 5099/5099 [00:15<00:00, 339.87it/s]
  1%|          | 26/5099 [00:00<00:19, 257.60it/s]

epoch  27  acc:  0.689543512843


 97%|█████████▋| 4959/5099 [00:14<00:00, 349.33it/s]