In [2]:
import sys
sys.path.insert(0,'../models/')
sys.path.insert(0,'../datasets/')
sys.path.insert(0,'..')

import pandas as pd
import numpy as np
import json
from subprocess import Popen, PIPE, STDOUT
import re
from collections import defaultdict
from utils.info import get_db_bounds
from datasets import propbankbr_arg2se

import tensorflow as tf
# import tqdm
from models import PropbankEncoder
import config 

INPUT_DIR = '../datasets/binaries/1.0/'
PROPBANK_WAN50_PATH = '{:}wan50/deep_wan50.pickle'.format(INPUT_DIR)
PEARL_SRLEVAL_PATH = '../srlconll04/srl-eval.pl'

  return f(*args, **kwds)
  return f(*args, **kwds)


# SPN Chunker

Uma mente insana realiza um experimento *lúcido* com o dataset de chunking da conll e seu script de avaliação :)

## 1.1 Carregar dados

In [5]:
dfgs = pd.read_csv('../datasets/csvs/1.0/gs.csv', index_col=0, sep=',', encoding='utf-8')
column_files = [
    '../datasets/csvs/1.0/column_chunks/chunks.csv',
    '../datasets/csvs/1.0/column_predmarker/predicate_marker.csv',
    '../datasets/csvs/1.0/column_shifts_ctx_p/form.csv',
    '../datasets/csvs/1.0/column_shifts_ctx_p/gpos.csv',
    '../datasets/csvs/1.0/column_shifts_ctx_p/lemma.csv',
    '../datasets/csvs/1.0/column_iob/iob.csv',
    '../datasets/csvs/1.0/column_t/t.csv'
]

for col_f in column_files:
    _df = pd.read_csv(col_f, index_col=0, encoding='utf-8')
    dfgs = pd.concat((dfgs, _df), axis=1)

DISPLAY_COLUMNS = ['ID', 'P', 'FORM', 'GPOS', 'MARKER', 'ARG', 'T', 
                   'CHUNK_ID', 'CHUNK_START', 'CHUNK_FINISH', 'CHUNK_LEN', 'CHUNK_CANDIDATE_ID']            
dfgs[DISPLAY_COLUMNS].head(33)  

Unnamed: 0_level_0,ID,P,FORM,GPOS,MARKER,ARG,T,CHUNK_ID,CHUNK_START,CHUNK_FINISH,CHUNK_LEN,CHUNK_CANDIDATE_ID
INDEX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,1,1,Brasília,PROP,0,*,*,1,0,1,1,0
1,2,1,Pesquisa_Datafolha,N,0,(A0*,A0,2,1,4,3,35
2,3,1,publicada,V-PCP,0,*,A0,2,1,4,3,35
3,4,1,hoje,ADV,0,*),A0,2,1,4,3,35
4,5,1,revela,V-FIN,1,(V*),V,3,4,5,1,126
5,6,1,um,ART,1,(A1*,A1,4,5,32,27,181
6,7,1,dado,N,1,*,A1,4,5,32,27,181
7,8,1,supreendente,ADJ,1,*,A1,4,5,32,27,181
8,9,1,:,PU,1,*,A1,4,5,32,27,181
9,10,1,recusando,V-GER,1,*,A1,4,5,32,27,181


## 1.2 Load Encodings

Propbank Encoder holds an indexed version of propbank dataset an answers to FOUR different dataformats: 
* CAT: this is the raw categorical data.
* EMB: tokens are embedding using GloVe embeddings.
* HOT: onehot encoding of the words and tokens.
* IDX: dense indexed representations.

In [7]:
# LOAD ENCODER
propbank_encoder = PropbankEncoder.recover(PROPBANK_WAN50_PATH)
db = propbank_encoder.db
lex2idx = propbank_encoder.lex2idx
idx2lex = propbank_encoder.idx2lex

# FOR TEXTUAL DATA ONLY
tok2idx = propbank_encoder.tok2idx
lex2tok = propbank_encoder.lex2tok
idx2word = propbank_encoder.idx2word
embeddings = propbank_encoder.embeddings

n_targets = len(lex2idx['T'])

In [11]:
print('attributes\t',
       len(db),
      '\n',             
      'records\t',
       len(db['ARG'].keys()))

attributes	 43 
 records	 138378


# Segment datasets

## Separar datasets

In [4]:
data_sentences = df_ck.groupby('dataset')['sentence_id'].unique()
train_sentences = data_sentences['train']
dev_sentences = data_sentences['dev']
test_sentences = data_sentences['test']

## Separar informações das sentenças

In [5]:
sentence_str = df_ck.groupby('sentence_id')['word'].apply(list)
sentence_words = df_ck.groupby('sentence_id')['word_norm-id'].apply(list)
sentence_capts = df_ck.groupby('sentence_id')['capt-id'].apply(list)
sentence_pos = df_ck.groupby('sentence_id')['cpos-id'].apply(list)
sentence_ck = df_ck.groupby('sentence_id')['chunk-id'].apply(list)

## Função de geração da entrada

In [6]:
def get_input(sentence_id):
    a_txt = sentence_str[sentence_id]
    a_words = list(sentence_words[sentence_id])
    a_capt = list(sentence_capts[sentence_id])
    a_pos = list(sentence_pos[sentence_id])
    
    a_words += [word_to_id['#END_SENT']]
    a_capt += [capt_to_id['#END_SENT']]
    a_pos += [pos_to_id['#END_SENT']]
    
    a_words = np.array(a_words)
    a_capt = np.array(a_capt)
    a_pos = np.array(a_pos)
    
    a_words = a_words.reshape((-1,1))
    a_capt = a_capt.reshape((-1,1))
    a_pos = a_pos.reshape((-1,1))
    
    a_chars = [[char_to_id[x] if x in char_to_id else char_to_id['#UNK'] for x in token] for token in a_txt] 
    
    a_chars += [[char_to_id['#END_SENT']]]
    
    token_lens = np.array([len(x) for x in a_chars]).astype(np.int32) - 1
    
    n = len(token_lens)
    m = np.max(np.array(token_lens)+1)
    
    char_matrix = np.ones((n,m))*len(char_to_id)
    for i in range(n):
        char_matrix[i][:token_lens[i]+1] = a_chars[i]
        
    return a_words, a_capt, a_chars, a_pos, char_matrix, token_lens

## Função de geração da saída

In [7]:
def get_output(sentence_id):
    a_ck = list(sentence_ck[sentence_id])
    a_ck.append(ck_to_id['#END_SENT'])
    a_ck = np.array(a_ck)
    return a_ck

## Funções Viterbi

In [8]:
def longest_path(t_out, t_edge_scores, n_tags):
    
    def step(prev, et):
        # last computed scores and last computed transitions
        prev_scores, prev_selections = prev
        
        current_scores = tf.transpose(prev_scores + et)

        best_scores = tf.reduce_max(current_scores, axis=0)
        best_options = tf.argmax(current_scores, axis=0)

        return best_scores, best_options
    
    score_matrix, selection_matrix = tf.scan(
        fn=step,
        elems=t_edge_scores,
        initializer=(tf.zeros(n_tags), tf.to_int64(tf.zeros(n_tags))),
    )
    
    return score_matrix, selection_matrix

In [9]:
def retrieve_path(selection_matrix, last_tag):
    
    def step(prev, t):
        selection_matrix, prev_best = prev

        current = selection_matrix[t][prev_best]

        return selection_matrix, current

    m = tf.shape(selection_matrix)[0]
    _, rev_path = tf.scan(
        fn = step,
        elems=m-1-tf.range(m),
        initializer=(selection_matrix, last_tag)

    )

    best_path = tf.concat((tf.reverse(rev_path,axis=[0]),[last_tag]),axis=0)
    return best_path

In [10]:
def path_score(t_edge_scores, path, n):
    
    def step(prev, t):
        edge_scores, path, prev_score = prev
        
        transition_score = edge_scores[t]
        
        p_t = path[t]
        p_tp1 = path[t+1]

        current_score = transition_score[p_tp1,p_t] + prev_score

        return edge_scores, path, current_score

    _, _, path_score = tf.scan(
        fn = step,
        elems=tf.range(n-1),
        initializer=(t_edge_scores, path, tf.zeros(1))

    )
    return path_score[-1]


## Embeddings

In [11]:
import numpy as np

In [12]:
with open('../../../data/glove.6B.100d.txt','r') as f:
    content = f.read()

Fazer matriz de embeddings

In [13]:
lines = content.split('\n')
lines = lines[:-1]

word_embeds = {}

for line in lines:
    features = line.split()
    word = features[0]
    values = np.array(features[1:]).astype(np.float32)

    word_embeds[word] = values

embed_size = len(word_embeds['the'])
embedding_matrix = np.zeros((len(id_to_word)+1,embed_size))

found_words = 0
for word in id_to_word:
    if word in word_embeds:
        found_words += 1
        embedding_matrix[word_to_id[word]] = word_embeds[word]
print(found_words)

17101


## Modelo

In [24]:
import tensorflow as tf
import numpy as np
import json
import time

Meta-informação do modelo

In [15]:
model_meta = {
    'vocab_size':len(id_to_word),
    'capt_size':len(id_to_capt),
    'pos_size':len(id_to_pos),
    'char_size':len(id_to_char),
    'ck_size':len(id_to_ck),
    'embed_size':embedding_matrix.shape[1],
    'capt_embed_size':10,
    'char_embed_size':30,
    'pos_embed_size':10,
    'hidden_features':200,
    'state_size':200,
    'learning_rate':0.001,
    'spn_layer':False
}

### Gerar modelo a partir da Meta-informacao e Embeddings

In [16]:
def make_model(model_meta, embedding_matrix):
    vocab_size = model_meta['vocab_size']
    capt_size = model_meta['capt_size']
    pos_size = model_meta['pos_size']
    char_size = model_meta['char_size']
    ck_size = model_meta['ck_size']
    embed_size = model_meta['embed_size']
    capt_embed_size = model_meta['capt_embed_size']
    char_embed_size = model_meta['char_embed_size']
    char_hidden = model_meta['char_hidden_features']
    pos_embed_size = model_meta['pos_embed_size']
    hidden_features = model_meta['hidden_features']
    state_size = model_meta['state_size']
    lr = model_meta['learning_rate']
    spn_layer = model_meta['spn_layer']

    tf.reset_default_graph()

    t_x_words = tf.placeholder(tf.int32,(None,1)) # ids das palavras
    t_x_capt = tf.placeholder(tf.int32,(None,1)) # ids de capitalizacao
    t_x_pos = tf.placeholder(tf.int32,(None,1)) # ids de POS
    t_x_chars = tf.placeholder(tf.int32, (None,None))
    t_x_lens = tf.placeholder(tf.int32, (None))
    t_y_ck = tf.placeholder(tf.int32, shape=(None,)) # ids das classes de POS para cada token
    
    t_inputs = [t_x_words, t_x_capt, t_x_pos, t_x_chars, t_x_lens]
    t_targets = [t_y_ck]

    with tf.variable_scope('Feature_Vars'):
        t_W_embed = tf.Variable(embedding_matrix.astype(np.float32))
        t_W_char = tf.Variable(np.random.normal(0,0.1,(char_size+1, char_embed_size)).astype(np.float32))

        t_gamma = tf.Variable(np.random.normal(0,1.0, 1).astype(np.float32))
        t_beta = tf.Variable(np.random.normal(0,1.0, 1).astype(np.float32))

        dim = state_size
        if dim is None:
            dim = 2*hidden_features
        if spn_layer:
            t_W_tran = tf.Variable(np.random.normal(0,1.0/np.sqrt(3*dim),(3*dim, ck_size*ck_size)).astype(np.float32))
        else:
            t_W_ck = tf.Variable(np.random.normal(0,0.1,(dim, ck_size)).astype(np.float32))
        


    t_words = tf.gather_nd(t_W_embed, t_x_words)
    t_x_pos_flat = tf.squeeze(t_x_pos,axis=1)
    t_x_capt_flat = tf.squeeze(t_x_capt,axis=1)

    t_capt = tf.to_float(tf.one_hot(indices=t_x_capt_flat, depth=capt_size+1,on_value=1,off_value=0))
    t_pos = tf.to_float(tf.one_hot(indices=t_x_pos_flat,depth=pos_size+1,on_value=1,off_value=0))

    # t_capt = tf.gather_nd(t_W_capt, t_x_capt)
    # t_pos = tf.gather_nd(t_W_pos, t_x_pos)
    t_char = tf.nn.embedding_lookup(t_W_char, t_x_chars)

    t_lstmcell_char = tf.nn.rnn_cell.LSTMCell(num_units=char_hidden, state_is_tuple=True)

    with tf.variable_scope('LSTM_Chars'):
        t_h, t_ls = tf.nn.dynamic_rnn(
            cell=t_lstmcell_char,
            dtype=tf.float32,
            inputs=t_char,
        )

    t_x_ind = tf.range(tf.shape(t_x_lens)[0])
    t_x_get = tf.transpose(tf.stack([t_x_ind, t_x_lens]))
    t_x_token_char = tf.gather_nd(t_h, t_x_get)

    t_word_feats = tf.concat((t_words, t_capt, t_pos, t_x_token_char), axis=1)

    t_sq_len = tf.shape(t_x_words)[0]

    n_features = embed_size

    t_words_shp = tf.reshape(
        t_word_feats, (1,t_sq_len, pos_size+1 +embed_size+capt_size+1+char_hidden))

    t_lstmcellf = tf.nn.rnn_cell.LSTMCell(num_units=hidden_features, state_is_tuple=True)
    t_lstmcellb = tf.nn.rnn_cell.LSTMCell(num_units=hidden_features, state_is_tuple=True)

    with tf.variable_scope("Bilstm"):

        t_h1, t_last_states =tf.nn.bidirectional_dynamic_rnn(
            cell_fw=t_lstmcellf,
            cell_bw=t_lstmcellb,
            dtype=tf.float32,
            inputs=t_words_shp)

        t_hidden = tf.concat((t_h1[0][0],t_h1[1][0]),axis=1)

    if state_size is not None:
        t_lstmcell = tf.nn.rnn_cell.LSTMCell(num_units=state_size, state_is_tuple=True)
        t_hidden_shp = tf.reshape(t_hidden, (1,t_sq_len, 2*hidden_features))

        with tf.variable_scope('LSTM_last'):
            t_h2, t_last_states2 =tf.nn.dynamic_rnn(
                cell=t_lstmcell,
                dtype=tf.float32,
                sequence_length=[t_sq_len],
                inputs=t_hidden_shp)

        t_out = t_h2[0]
    else:
        t_out = t_hidden
        
    n = tf.shape(t_out)[0]
    
    t_outputs = []
    if spn_layer:
        t_edges = tf.concat((t_out[1:],t_out[:-1],t_out[1:]*t_out[:-1]),axis=1)
        t_edge_scores = tf.matmul(t_edges, t_W_tran)

        t_edge_scores = tf.reshape(t_edge_scores, ((n-1)*ck_size*ck_size,))

        # Batch Normalization
        t_es_mean = tf.reduce_mean(t_edge_scores)
        t_es_m2 = tf.reduce_mean(t_edge_scores**2)

        t_es_var = t_es_m2 - t_es_mean**2
        t_es_std = tf.sqrt(t_es_var + 1e-8)

        t_es_norm = (t_edge_scores - t_es_mean)/t_es_std

        t_es_renorm = t_gamma * t_es_norm + t_beta

        t_edge_scores = tf.reshape(t_es_renorm, (n-1,ck_size,ck_size))

        t_score_matrix, t_selection_matrix = longest_path(t_out, t_edge_scores,ck_size)

        t_best_score = tf.reduce_max(t_score_matrix[-1])
        t_last_tag = tf.argmax(t_score_matrix[-1])

        t_best_path = retrieve_path(t_selection_matrix, t_last_tag)

        t_correct_score = path_score(t_edge_scores, t_y_ck, n)

        t_cost = -t_correct_score

        # # gradiente descendente no custo do perceptron estruturado
        t_optimizer = tf.train.AdamOptimizer(learning_rate=lr)
        t_train = t_optimizer.minimize(t_cost)

        t_outputs.extend([t_score_matrix, t_best_path])

        def t_pred(sess, inputs, x_words, x_capt, x_pos, x_char_matrix, x_char_lens):
            result = sess.run(t_best_path, feed_dict={
                inputs[0]:x_words,
                inputs[1]:x_capt,
                inputs[2]:x_pos,
                inputs[3]:x_char_matrix,
                inputs[4]:x_char_lens
            })
            return result

        def my_t_train(sess, inputs,targets, x_words, x_capt, x_pos, x_char_matrix, x_char_lens, y_ck):
            _, result = sess.run([t_train, t_cost], feed_dict={
                inputs[0]:x_words,
                inputs[1]:x_capt,
                inputs[2]:x_pos,
                inputs[3]:x_char_matrix,
                inputs[4]:x_lens,
                targets[0]:y_ck,
            })
            return result
    else: #CRF
        t_ck_score = tf.matmul(t_out,t_W_ck)

        t_ck_score_ext = tf.expand_dims(t_ck_score, 0)
        t_y_ck_ext = tf.expand_dims(t_y_ck, 0)

        t_sequence_lengths = tf.shape(t_x_words)[0]
        t_sequence_lengths = tf.expand_dims(t_sequence_lengths,0)

        t_log_likelihood, t_transition_params = tf.contrib.crf.crf_log_likelihood(
            t_ck_score_ext, 
            t_y_ck_ext, 
            t_sequence_lengths)
        
        t_outputs.extend([t_ck_score, t_transition_params])
    
        t_cost = -t_log_likelihood

        # # gradiente descendente no custo do perceptron estruturado
        t_optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
        # optimizer = tf.train.GradientDescentOptimizer(0.003)
        t_train = t_optimizer.minimize(t_cost)

        def t_pred(sess, inputs, x_words, x_capt, x_pos, x_char_matrix, x_char_lens):
            score, tparams = sess.run([t_ck_score,t_transition_params], feed_dict={
                inputs[0]:x_words,
                inputs[1]:x_capt,
                inputs[2]:x_pos,
                inputs[3]:x_char_matrix,
                inputs[4]:x_char_lens
            })

            return tf.contrib.crf.viterbi_decode(score=score,transition_params=tparams)[0][:-1]
        
        def my_t_train(sess, inputs,targets, x_words, x_capt, x_pos, x_char_matrix, x_char_lens, y_ck):
            _, result = sess.run([t_train, t_cost], feed_dict={
                inputs[0]:x_words,
                inputs[1]:x_capt,
                inputs[2]:x_pos,
                inputs[3]:x_char_matrix,
                inputs[4]:x_lens,
                targets[0]:y_ck,
            })
            return result
    
    return t_inputs, t_targets, t_outputs, my_t_train, t_pred


In [17]:
model_meta = {
    'vocab_size':len(id_to_word),
    'capt_size':len(id_to_capt),
    'pos_size':len(id_to_pos),
    'char_size':len(id_to_char),
    'ck_size':len(id_to_ck),
    'embed_size':embedding_matrix.shape[1],
    'capt_embed_size':10,
    'char_hidden_features':30,
    'char_embed_size':30,
    'pos_embed_size':10,
    'hidden_features':200,
    'state_size':200,
    'learning_rate':0.001,
    'spn_layer':False
}

t_inputs, t_targets, t_outputs, t_train, t_pred = make_model(model_meta, embedding_matrix)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


----

In [18]:
sample = 968
x_words, x_capt, x_chars, x_pos, x_char_matrix, x_lens = get_input(sample)
y_ck = get_output(sample)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    o_ck = t_pred(sess, t_inputs, x_words, x_capt, x_pos, x_char_matrix, x_lens)
print(o_ck)

[17, 10, 17, 10, 17, 10, 17, 10, 17, 10, 17, 10, 17, 10, 17, 10, 17, 10, 17, 10]


## Decorar uma sentença

In [19]:
sample =968
x_words, x_capt, x_chars, x_pos, x_char_matrix, x_lens = get_input(sample)
y_ck = get_output(sample)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(200):
    L = t_train(sess,t_inputs,t_targets, x_words,x_capt,x_pos,x_char_matrix,x_lens,y_ck)
    if i % 100 == 0:
        print(L)

[66.59537]
[0.1083374]


## Avaliação

In [20]:
from subprocess import Popen, PIPE, STDOUT
import re

In [21]:
def tag_to_conll(sess, df_chunk,sentences, t_inputs,t_pred):
    result = ''
    for sentence in sentences:
        
        df_sentence = df_chunk[df_chunk['sentence_id']==sentence]
        s_tags = np.array(df_sentence['chunk'])
        s_words = np.array(df_sentence['word'])
        
        x_words, x_capt, x_chars, x_pos, x_char_matrix, x_lens = get_input(sentence)
        y_ck = get_output(sentence)[:-1]
        o_ck = t_pred(sess, t_inputs, x_words, x_capt, x_pos, x_char_matrix, x_lens)[:-1]
        
        tags = [id_to_ck[x] for x in o_ck]
        
        for i in range(len(o_ck)):
            word_str = s_words[i] + ' ' + s_tags[i] + ' ' + tags[i]
            result += word_str + '\n'
        result += '\n'
    return result

In [22]:
def evaluate_conll(sess, df_chunk,sentences,t_inputs,t_pred,verbose=True):
    result = tag_to_conll(sess, df_chunk, sentences,t_inputs,t_pred)
    f1 = 0
    path = 'conlleval.txt'
    p = Popen(['perl', path], stdout=PIPE, stdin=PIPE, stderr=STDOUT)
    conlleval_stdout = p.communicate(input=result.encode())[0]
    if verbose:
        print(conlleval_stdout.decode())
    try:
        f1 = float(re.findall(r'(\d+\.?\d+)', conlleval_stdout.decode())[7])
    except Exception as e:
        print(e)
        print(conlleval_stdout.decode())
    return f1

## Treino Parcial

In [24]:
# model_meta = {
#     'vocab_size':len(id_to_word),
#     'capt_size':len(id_to_capt),
#     'pos_size':len(id_to_pos),
#     'char_size':len(id_to_char),
#     'ck_size':len(id_to_ck),
#     'embed_size':embedding_matrix.shape[1],
#     'capt_embed_size':10,
#     'char_hidden_features':30,
#     'char_embed_size':30,
#     'pos_embed_size':10,
#     'hidden_features':200,
#     'state_size':200,
#     'learning_rate':0.001,
#     'spn_layer':True
# }

# t_inputs, t_targets, t_outputs, t_train, t_pred = make_model(model_meta, embedding_matrix)

# sess = tf.Session()
# sess.run(tf.global_variables_initializer())

# n_epochs = 3

# indices = np.arange(len(train_sentences)//100)    
# n = len(indices)

# best_path = None
# best_f1 = 0

# for epoch in range(n_epochs):
#     j = 0
#     np.random.shuffle(indices)
#     for sid in indices:
#         j += 1
#         x_words, x_capt, x_chars, x_pos, x_char_matrix, x_lens = get_input(train_sentences[sid])
#         y_ck = get_output(train_sentences[sid])
#         L = t_train(sess,t_inputs,t_targets,x_words,x_capt,x_pos,x_char_matrix,x_lens,y_ck)
#         if j % (n//10) == 0:
#             print('{:.2f} %'.format(100*j/n))
        
#     print("Epoca ", (epoch+1))
#     dev_f1 = evaluate_conll(sess, df_ck, dev_sentences[:len(indices)],t_inputs,t_pred,verbose=False)
#     print("dev f1: ", dev_f1)
#     train_f1 = evaluate_conll(sess, df_ck, train_sentences[:len(indices)],t_inputs,t_pred,verbose=False)
#     print("train f1: ", train_f1)
    
#     if dev_f1 > best_f1:
#         best_f1 = dev_f1
# sess.close()

## Treino Total

In [25]:
import time
import datetime

In [None]:
lr_search = [0.0001]
hidden_features_search = [50,150,250]

for j in range(len(hidden_features_search)):
    for i in range(len(lr_search)):
        for s in range(2):
            lr = lr_search[i]
            hf = hidden_features_search[j]

            print(j, ' ', len(hidden_features_search))
            model_meta = {
                'vocab_size':len(id_to_word),
                'capt_size':len(id_to_capt),
                'pos_size':len(id_to_pos),
                'char_size':len(id_to_char),
                'ck_size':len(id_to_ck),
                'embed_size':embedding_matrix.shape[1],
                'capt_embed_size':10,
                'char_embed_size':30,
                'pos_embed_size':10,
                'char_hidden_features':30,
                'hidden_features':hf,
                'state_size':hf,
                'learning_rate':lr,
                'spn_layer':s==0
            }
            print('Training Model', model_meta)
            
            last_layer = 'crf'
            if s == 0:
                last_layer = 'spn'

            n_epochs = 30
            model_name = 'bilstm_viterbi_h' + str(hf) + '_lr_' + str(lr) + '_' + last_layer
            model = make_model(model_meta, embedding_matrix)


            t_inputs, t_targets, t_outputs, t_train, t_pred = model
            
            #NEW SESSION PER MODEL
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            sess = tf.Session(config=config)
            sess.run(tf.global_variables_initializer())

            indices = np.arange(len(train_sentences))
            n = len(indices)

            best_path = None
            best_f1 = 0

            save_dir = '../../../data/CHUNK/' + language + '/models/'
            exp_desc_dir = 'results/'
            saver = tf.train.Saver(max_to_keep=n_epochs*20)

            dev_f1_list = []
            train_f1_list = []
            consecutive_bad_dev = 0

            for epoch in range(n_epochs):
                start = time.time()
                np.random.shuffle(indices)
                it = 0
                for sid in indices:
                    it += 1
                    x_words, x_capt, x_chars, x_pos, x_char_matrix, x_lens = get_input(train_sentences[sid])
                    y_ck = get_output(train_sentences[sid])
                    L = t_train(sess,t_inputs, t_targets, x_words,x_capt,x_pos,x_char_matrix,x_lens,y_ck)
                print("Epoca ", (epoch+1))
                dev_f1 = evaluate_conll(sess, df_ck, dev_sentences,t_inputs,t_pred,verbose=False)
                print("dev f1: ", dev_f1)
                train_f1 = evaluate_conll(sess, df_ck, train_sentences,t_inputs,t_pred,verbose=False)
                print("train f1: ", train_f1)
                dev_f1_list.append(dev_f1)
                train_f1_list.append(train_f1)
                end = time.time()
                print('Tempo por Epoca: ', (end-start), ' s')
                if dev_f1 > best_f1:
                    best_f1 = dev_f1
                    timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H-%M-%S')
                    save_path = save_dir + model_name + '_' + timestamp
                    saver.save(sess, save_path)
                    print('## BEST MODEL saved at ', save_path)
                    consecutive_bad_dev = 0
                else:
                    consecutive_bad_dev += 1
                if consecutive_bad_dev == 5:
                    break
            timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H-%M-%S')
            exp_path = exp_desc_dir + model_name + timestamp + '.txt'
            with open(exp_path,'w') as f:
                exp_desc = {
                    'model':model_meta,
                    'train_f1':train_f1_list,
                    'dev_f1':dev_f1_list
                }
                json.dump(exp_desc, f)
                print('save results on ' + exp_path)

0   3
Training Model {'vocab_size': 18687, 'capt_size': 9, 'pos_size': 45, 'char_size': 85, 'ck_size': 24, 'embed_size': 100, 'capt_embed_size': 10, 'char_embed_size': 30, 'pos_embed_size': 10, 'char_hidden_features': 30, 'hidden_features': 50, 'state_size': 50, 'learning_rate': 0.0001, 'spn_layer': True}
Epoca  1
dev f1:  78.3
train f1:  76.87
Tempo por Epoca:  547.1610581874847  s
## BEST MODEL saved at  ../../../data/CHUNK/en/models/bilstm_viterbi_h50_lr_0.0001_spn_2018-10-09_11-13-17
Epoca  2
dev f1:  83.84
train f1:  82.68
Tempo por Epoca:  510.7972779273987  s
## BEST MODEL saved at  ../../../data/CHUNK/en/models/bilstm_viterbi_h50_lr_0.0001_spn_2018-10-09_11-21-48
Epoca  3
dev f1:  85.89
train f1:  84.73
Tempo por Epoca:  552.3075170516968  s
## BEST MODEL saved at  ../../../data/CHUNK/en/models/bilstm_viterbi_h50_lr_0.0001_spn_2018-10-09_11-31-01
Epoca  4
dev f1:  88.4
train f1:  87.26
Tempo por Epoca:  496.2450067996979  s
## BEST MODEL saved at  ../../../data/CHUNK/en/models/