In [1]:
import sys
sys.path.insert(0,'../models/')
sys.path.insert(0,'../datasets/')
sys.path.insert(0,'..')

import pandas as pd
import numpy as np
import json
from subprocess import Popen, PIPE, STDOUT
import re
from collections import defaultdict
from utils.info import get_db_bounds
from datasets import propbankbr_arg2se, propbankbr_iob2arg

import tensorflow as tf
# import tqdm
from models import PropbankEncoder
import config 

LANG = 'en'
# INPUT_DIR = '../datasets/binaries/1.0/'
# PROPBANK_WAN50_PATH = '{:}wan50/deep_wan50.pickle'.format(INPUT_DIR)
# PEARL_SRLEVAL_PATH = '../srlconll04/srl-eval.pl'

  
  


In [2]:
if LANG == 'pt':
    LANG_DIR = 'pt/1.0/'
    LM = 'wan50'
    PEARL_SRLEVAL_PATH = '../srlconll04/srl-eval.pl'
else:
    LANG_DIR = 'en/'
    LM = 'glo50'
    PEARL_SRLEVAL_PATH = '../srlconll05/bin/srl-eval.pl'

GS_DIR = '../datasets/csvs/{:}'.format(LANG_DIR)
LM_DIR = '{:}/'.format(LM)
INPUT_DIR = '../datasets/binaries/{:}'.format(LANG_DIR)
PROPBANK_PATH = '{:}{:}deep_{:}.pickle'.format(INPUT_DIR, LM_DIR, LM)


# SPN Chunker

Uma mente insana realiza um experimento *lúcido* com o dataset de chunking da conll e seu script de avaliação :)

## 1.1 Carregar dados

In [3]:
GS_PATH = '{:}gs.csv'.format(GS_DIR)
dfgs = pd.read_csv(GS_PATH, index_col=0, sep=',', encoding='utf-8')
column_files = [
    '{:}column_chunks/chunks.csv'.format(GS_DIR),
    '{:}column_predmarker/predicate_marker.csv'.format(GS_DIR),
    '{:}column_shifts_ctx_p/form.csv'.format(GS_DIR),
    '{:}column_shifts_ctx_p/gpos.csv'.format(GS_DIR),
    '{:}column_iob/iob.csv'.format(GS_DIR),
    '{:}column_t/t.csv'.format(GS_DIR)
]

if LANG == 'pt':
    column_files.append(
        '{:}column_shifts_ctx_p/lemma.csv'.format(GS_DIR)
    )

for col_f in column_files:
    _df = pd.read_csv(col_f, index_col=0, encoding='utf-8')
    dfgs = pd.concat((dfgs, _df), axis=1)

DISPLAY_COLUMNS = ['ID', 'P', 'FORM', 'GPOS', 'MARKER', 'ARG', 'T', 'IOB',
                   'CHUNK_ID', 'CHUNK_START', 'CHUNK_FINISH', 'CHUNK_LEN', 'CHUNK_CANDIDATE_ID']            
dfgs[DISPLAY_COLUMNS].head(66)  

  mask |= (ar1 == a)


Unnamed: 0_level_0,ID,P,FORM,GPOS,MARKER,ARG,T,IOB,CHUNK_ID,CHUNK_START,CHUNK_FINISH,CHUNK_LEN,CHUNK_CANDIDATE_ID
INDEX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,1,1,In,IN,0,*,*,O,1,0,17,17,16
1,2,1,an,DT,0,*,*,O,1,0,17,17,16
2,3,1,Oct.,NNP,0,*,*,O,1,0,17,17,16
3,4,1,19,CD,0,*,*,O,1,0,17,17,16
4,5,1,review,NN,0,*,*,O,1,0,17,17,16
5,6,1,of,IN,0,*,*,O,1,0,17,17,16
6,7,1,``,'',0,*,*,O,1,0,17,17,16
7,8,1,The,DT,0,*,*,O,1,0,17,17,16
8,9,1,Misanthrope,NN,0,*,*,O,1,0,17,17,16
9,10,1,'','',0,*,*,O,1,0,17,17,16


## 1.2 Load Encodings

Propbank Encoder holds an indexed version of propbank dataset an answers to FOUR different dataformats: 
* CAT: this is the raw categorical data.
* EMB: tokens are embedding using GloVe embeddings.
* HOT: onehot encoding of the words and tokens.
* IDX: dense indexed representations.

In [4]:
# LOAD ENCODER
propbank_encoder = PropbankEncoder.recover(PROPBANK_PATH)
db = propbank_encoder.db
lex2idx = propbank_encoder.lex2idx
idx2lex = propbank_encoder.idx2lex

# FOR TEXTUAL DATA ONLY
tok2idx = propbank_encoder.tok2idx
lex2tok = propbank_encoder.lex2tok
idx2word = propbank_encoder.idx2word

#Numpyfy embeddings
embeddings = propbank_encoder.embeddings

embeddings = np.concatenate([np.array(embs).reshape((1,50))
                             for embs in embeddings], axis=0)
n_targets = len(lex2idx['IOB'])

In [50]:
import json
# dfgs.to_csv('ptb.en-set.csv', sep='\t', encoding='utf-8')
# with open('idx2word.en-set.json', mode='w') as f:
#     json.dump(idx2word, f)
np.save('glo50.eng.npy', embeddings)

In [5]:
print(
'''Overall:
  \tattributes:{:}\trecords:{:}\tvocab:{:}\tpropositions:{:}'''
    .format(len(db), len(db['ARG'].keys()), 
            len(set([form for _, form in db['FORM'].items()])),
            len(set([p for _, p in db['P'].items()]))))

Overall:
  	attributes:34	records:2681866	vocab:43440	propositions:93998


# 2. Data Wrangling

## 2.1 Helpful  function

In [6]:
def filter_type(ds_type, db):
    '''Filters only records from train dataset
    '''
    lb, ub = get_db_bounds(ds_type, lang=LANG)

    sel_keys_ = {key_ for key_, prop_ in db['P'].items() if prop_ >= lb and prop_ < ub}

    return {
                attr_:{ idx_: i_
                        for idx_, i_ in dict_.items() if idx_ in sel_keys_
                      }        
                for attr_, dict_  in db.items()
            }

def make_propositions_dict(db):
    '''Reindex db by propositions creating a nested dict in which the
        outer key is the proposition        
    '''
    
    triple_list = []
    prev_idx = min(db['P'].keys())
    prev_prop = min(db['P'].values()) - 1 # Always enter first time
    first = True
    for idx, prop in db['P'].items():        
        if prev_prop != prop:
            if not first:
                ub = prev_idx
                triple_list.append((lb, ub, prev_prop))
            lb = idx
            first = False
        prev_prop = prop
        prev_idx = idx
    triple_list.append((lb, prev_idx, prev_prop))
            

        
    prop_set = set(db['P'].values())
    return { prop_:
                    {
                        attr_:{ idx_: dict_[idx_]
                                for idx_ in range(lb_, ub_ + 1, 1)
                          }        
                        for attr_, dict_ in db.items() if attr_ not in ('P',)
                    }
             for lb_, ub_, prop_ in  triple_list
            }, {prop_: ub_ - lb_ + 1 for lb_, ub_, prop_ in  triple_list}   


def numpfy_propositions_dict(prop_dict, proplen_dict):
    '''Converts inner dict examples into numpy arrays
    '''
    prop_dict_ = defaultdict(dict)    
    for prop, columns_dict in prop_dict.items():
        len_ = proplen_dict[prop]
        shape_ = (len_, 1)
        for column, values_dict in columns_dict.items():
            tuple_list = [idx_value 
                          for idx_value in values_dict.items()]

            tuple_list = sorted(tuple_list, key=lambda x: x[0])            
            # Converts lexicon (raw/indexed) into token (embedded/indexed)
            if (('FORM' in column) or ('LEMMA' in column)):
                values_list = [tok2idx[lex2tok[idx2word[tuple_[1]]]]                
                                   for tuple_ in tuple_list]
            else:
                values_list = [tuple_[1] for tuple_ in tuple_list]

            prop_dict_[prop][column]  = np.array(values_list).reshape(shape_)

    return prop_dict_        


In [7]:
print(lex2tok['77'])

77


## 2.2 Data segmentation

In [8]:
dbtrain = filter_type('train', db)
dbvalid = filter_type('valid', db)
if LANG == 'pt':
    dbtest = filter_type('test', db)

In [9]:
print(
'''Train:
  \tattributes:{:}\trecords:{:07d}\tvocab:{:07d}\tpropositions:{:}'''
    .format(len(dbtrain), 
            len(dbtrain['ARG'].keys()), 
            len(set([form for _, form in dbtrain['FORM'].items()])),
            len(set([p for _, p in dbtrain['P'].items()]))))

print(
'''Valid:
  \tattributes:{:}\trecords:{:07d}\tvocab:{:07d}\tpropositions:{:}'''
    .format(len(dbvalid),
            len(dbvalid['ARG'].keys()), 
            len(set([form for _, form in dbvalid['FORM'].items()])),
            len(set([p for _, p in dbvalid['P'].items()]))))

if LANG == 'pt':
    print(
    '''Test:
      \tattributes:{:}\trecords:{:07d}\tvocab:{:07d}\tpropositions:{:}'''
        .format(len(dbtest),
                len(dbtest['ARG'].keys()), 
                len(set([form for _, form in dbtest['FORM'].items()])),
                len(set([p for _, p in dbtest['P'].items()]))))

    print(
    '''Overall:
      \tattributes:{:}\trecords:{:07d}\tvocab:{:07d}\tpropositions:{:}'''
        .format(len(dbtrain) + len(dbvalid) + len(dbtest), 
                len(set(list(dbtrain['ARG'].keys()) + list(dbvalid['ARG'].keys()) + list(dbtest['ARG'].keys()))), 
                len(set([form for _, form in dbtrain['FORM'].items()] +
                        [form for _, form in dbvalid['FORM'].items()] +
                        [form for _, form in dbtest['FORM'].items()])),
                 len(set([form for _, form in dbtrain['P'].items()] +
                        [form for _, form in dbvalid['P'].items()] +
                        [form for _, form in dbtest['P'].items()]))))
else:
    print(
    '''Overall:
      \tattributes:{:}\trecords:{:07d}\tvocab:{:07d}\tpropositions:{:}'''
        .format(len(dbtrain) + len(dbvalid), 
                len(set(list(dbtrain['ARG'].keys()) + list(dbvalid['ARG'].keys()))), 
                len(set([form for _, form in dbtrain['FORM'].items()] +
                        [form for _, form in dbvalid['FORM'].items()])),
                 len(set([form for _, form in dbtrain['P'].items()] +
                        [form for _, form in dbvalid['P'].items()]))))
    

Train:
  	attributes:34	records:2587103	vocab:0042711	propositions:90750
Valid:
  	attributes:34	records:0094763	vocab:0006126	propositions:3248
Overall:
      	attributes:68	records:2681866	vocab:0043440	propositions:93998


## 2.3 Nested proposition

In [10]:
dbtrain, lentrain = make_propositions_dict(dbtrain)
dbvalid, lenvalid = make_propositions_dict(dbvalid)
if LANG == 'pt':
    dbtest, lentest = make_propositions_dict(dbtest)

In [11]:
print(
'''Train:
  \tattributes:{:}\trecords:{:07d}\tvocab:{:07d}\tpropositions:{:}'''
    .format(len(dbtrain[1]) + 1, 
            sum([len(d['ARG']) for p, d in dbtrain.items()]), 
            len(set([v                    
                     for p, d in dbtrain.items()
                     for v in d['FORM'].values()])),
            len(lentrain)))

print(
'''Valid:
  \tattributes:{:}\trecords:{:07d}\tvocab:{:07d}\tpropositions:{:}'''
    .format(len(dbvalid[min(dbvalid)]) + 1,
            sum([len(d['ARG']) for p, d in dbvalid.items()]), 
            len(set([v
                     for p, d in dbvalid.items()
                     for v in d['FORM'].values()])),
            len(lenvalid)))

if LANG == 'pt':
    print(
    '''Test:
      \tattributes:{:}\trecords:{:07d}\tvocab:{:07d}\tpropositions:{:}'''
        .format(len(dbtest[min(dbtest)]) + 1,
                sum([len(d['ARG']) for p, d in dbtest.items()]), 
                len(set([v
                         for p, d in dbtest.items()
                         for v in d['FORM'].values()])),
                len(lentest)))

    print(
    '''Overall:
      \tattributes:{:}\trecords:{:07d}\tvocab:{:07d}\tpropositions:{:}'''
        .format(len(dbtrain[1]) + 1, 
                sum([len(d['ARG']) for p, d in dbtrain.items()] +
                    [len(d['ARG']) for p, d in dbvalid.items()] +
                    [len(d['ARG']) for p, d in dbtest.items()]), 
                len(set([v                    
                         for p, d in dbtrain.items()
                         for v in d['FORM'].values()] +
                        [v                    
                         for p, d in dbvalid.items()
                         for v in d['FORM'].values()] +
                        [v
                         for p, d in dbtest.items()  
                         for v in d['FORM'].values()])),
                 len(lentrain) + len(lenvalid) + len(lentest)))
else:

    print(
    '''Overall:
      \tattributes:{:}\trecords:{:07d}\tvocab:{:07d}\tpropositions:{:}'''
        .format(len(dbtrain[1]) + 1, 
                sum([len(d['ARG']) for p, d in dbtrain.items()] +
                    [len(d['ARG']) for p, d in dbvalid.items()]), 
                len(set([v                    
                         for p, d in dbtrain.items()
                         for v in d['FORM'].values()] +
                        [v                    
                         for p, d in dbvalid.items()
                         for v in d['FORM'].values()])),
                 len(lentrain) + len(lenvalid)))


Train:
  	attributes:34	records:2587103	vocab:0042711	propositions:90750
Valid:
  	attributes:34	records:0094763	vocab:0006126	propositions:3248
Overall:
      	attributes:34	records:2681866	vocab:0043440	propositions:93998


## 2.4 Numpfy

In [12]:
dbtrain = numpfy_propositions_dict(dbtrain, lentrain)
dbvalid = numpfy_propositions_dict(dbvalid, lenvalid)
if LANG == 'pt':
    dbtest = numpfy_propositions_dict(dbtest, lentest)

## 3. Data I/O

In [13]:
def get_inputs(db1, propid):
    '''Generate inputs
    '''
    propdb = db1[propid] # nested dict of columns and idx value

    
    
    # Replaces word with tokens
    word    = propdb['FORM']
    ctx_p_left  = propdb['FORM_CTX_P-1']
    ctx_p0  = propdb['FORM_CTX_P+0']
    ctx_p_right  = propdb['FORM_CTX_P+1']

    marker  = propdb['MARKER']
    pos     = propdb['GPOS']
    chunk_type  = propdb['T']
    
    return word, ctx_p_left, ctx_p0, ctx_p_right, marker, pos
            
# def generate_chunk_space(n):
#     '''Generates all possible spaces for chunks
#     '''
#     start_list = []
#     end_list = []
#     for i in range(n):
#         for j in range(i,n,1):
#             start_list.append(i)
#             end_list.append(j+1)
#     shape_ = (len(start_list), 1)
#     start_ = np.array(start_list).reshape(shape_)
#     finish_ = np.array(end_list).reshape(shape_)
#     return start_, finish_
            

def get_outputs(db1, propid, n_targets):
    ''' Generate outputs
    '''
    return db1[propid]['IOB']

In [14]:
%%timeit
propid = 1120
word, ctx_p_left, ctx_p0, ctx_p_right, marker, pos = get_inputs(dbtrain, propid)
targets = get_outputs(dbtrain, propid, n_targets)


718 ns ± 14.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [15]:
propid = 1120
labels = get_outputs(dbtrain, propid, n_targets)
print(lex2idx['IOB'])
print(labels)

OrderedDict([('B-A0', 0), ('B-A1', 1), ('B-A2', 2), ('B-A3', 3), ('B-A4', 4), ('B-A5', 5), ('B-AA', 6), ('B-AM', 7), ('B-AM-ADV', 8), ('B-AM-CAU', 9), ('B-AM-DIR', 10), ('B-AM-DIS', 11), ('B-AM-EXT', 12), ('B-AM-LOC', 13), ('B-AM-MNR', 14), ('B-AM-MOD', 15), ('B-AM-NEG', 16), ('B-AM-PNC', 17), ('B-AM-PRD', 18), ('B-AM-REC', 19), ('B-AM-TM', 20), ('B-AM-TMP', 21), ('B-V', 22), ('I-A0', 23), ('I-A1', 24), ('I-A2', 25), ('I-A3', 26), ('I-A4', 27), ('I-A5', 28), ('I-AA', 29), ('I-AM', 30), ('I-AM-ADV', 31), ('I-AM-CAU', 32), ('I-AM-DIR', 33), ('I-AM-DIS', 34), ('I-AM-EXT', 35), ('I-AM-LOC', 36), ('I-AM-MNR', 37), ('I-AM-MOD', 38), ('I-AM-NEG', 39), ('I-AM-PNC', 40), ('I-AM-PRD', 41), ('I-AM-REC', 42), ('I-AM-TM', 43), ('I-AM-TMP', 44), ('I-V', 45), ('O', 46)])
[[46]
 [46]
 [46]
 [46]
 [ 0]
 [46]
 [16]
 [22]
 [10]
 [46]
 [46]]


In [16]:
df = dfgs[dfgs['P'] == 1120]
df[DISPLAY_COLUMNS].head(18)

Unnamed: 0_level_0,ID,P,FORM,GPOS,MARKER,ARG,T,IOB,CHUNK_ID,CHUNK_START,CHUNK_FINISH,CHUNK_LEN,CHUNK_CANDIDATE_ID
INDEX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
32051,1,1120,(,(,0,*,*,O,6708,0,4,4,3
32052,2,1120,So,RB,0,*,*,O,6708,0,4,4,3
32053,3,1120,long,RB,0,*,*,O,6708,0,4,4,3
32054,4,1120,as,IN,0,*,*,O,6708,0,4,4,3
32055,5,1120,you,PRP,0,(A0*),A0,B-A0,6709,4,5,1,38
32056,6,1120,do,AUX,0,*,*,O,6710,5,6,1,45
32057,7,1120,n't,RB,0,(AM-NEG*),AM-NEG,B-AM-NEG,6711,6,7,1,51
32058,8,1120,look,VB,1,(V*),V,B-V,6712,7,8,1,56
32059,9,1120,down,RB,1,(AM-DIR*),AM-DIR,B-AM-DIR,6713,8,9,1,60
32060,10,1120,.,.,1,*,*,O,6714,9,11,2,64


## 4. Viterbi Functions

In [32]:
def longest_path(t_out, t_edge_scores, n_tags):
    
    def step(prev, et):
        # last computed scores and last computed transitions
        prev_scores, prev_selections = prev
        
        current_scores = tf.transpose(prev_scores + et)

        best_scores = tf.reduce_max(current_scores, axis=0)
        best_options = tf.argmax(current_scores, axis=0)

        return best_scores, best_options
    
    score_matrix, selection_matrix = tf.scan(
        fn=step,
        elems=t_edge_scores,
        initializer=(tf.zeros(n_tags), tf.to_int64(tf.zeros(n_tags))),
    )
    
    return score_matrix, selection_matrix

In [33]:
def retrieve_path(selection_matrix, last_tag):
    
    def step(prev, t):
        selection_matrix, prev_best = prev

        current = selection_matrix[t][prev_best]

        return selection_matrix, current

    m = tf.shape(selection_matrix)[0]
    _, rev_path = tf.scan(
        fn = step,
        elems=m-1-tf.range(m),
        initializer=(selection_matrix, last_tag)

    )

    best_path = tf.concat((tf.reverse(rev_path,axis=[0]),[last_tag]),axis=0)
    return best_path

In [34]:
def path_score(t_edge_scores, path, n):
    
    def step(prev, t):
        edge_scores, path, prev_score = prev
        
        transition_score = edge_scores[t]
        
        p_t = path[t]
        p_tp1 = path[t+1]

        current_score = transition_score[p_tp1,p_t] + prev_score

        return edge_scores, path, current_score

    _, _, path_score = tf.scan(
    fn = step,
    elems=tf.range(n-1),
    initializer=(t_edge_scores, path, tf.zeros(1))
    )
    return path_score[-1]


## 5. Build the computation graph

In [35]:
import tensorflow as tf
import numpy as np
import json
import time

In [36]:
hparams = {
    'hidden_features':200,
    'state_size':200,
    'learning_rate':0.001,
    'spn_layer':True
}

In [37]:
def make_graph(hparams):

    # Determine parameters for the computation graph
#     vocab_size = len(set(lex2tok.values()))
#     capt_size = model_meta['capt_size']
    pos_size = len(lex2idx['GPOS'])
#     char_size = model_meta['char_size']
    ck_size = len(lex2idx['IOB'])
    embed_size = embeddings.shape[1]
#     capt_embed_size = model_meta['capt_embed_size']
#     char_embed_size = model_meta['char_embed_size']
#     char_hidden = model_meta['char_hidden_features']
    pos_embed_size = len(idx2lex['GPOS'])

    hidden_features = hparams['hidden_features']
    state_size = hparams['state_size']
    lr = hparams['learning_rate']
    spn_layer = hparams['spn_layer']

    tf.reset_default_graph()
# word, ctx_p_left, ctx_p0, ctx_p_right, marker, pos, 

    t_x_words = tf.placeholder(tf.int32,(None,1))   # ids form
    t_x_ctx_p_left = tf.placeholder(tf.int32,(None,1))
    t_x_ctx_p0 = tf.placeholder(tf.int32,(None,1)) 
    t_x_ctx_p_right = tf.placeholder(tf.int32,(None,1))
    
    t_x_pos = tf.placeholder(tf.int32,(None,1))     # ids POS
    t_x_marker = tf.placeholder(tf.int32, (None,1))
    
    t_y_ck = tf.placeholder(tf.int32, shape=(None,)) # ids IOB-SRL
    
    
    t_inputs = [t_x_words, t_x_ctx_p_left, t_x_ctx_p0, t_x_ctx_p_right, t_x_marker, t_x_pos]
    t_targets = [t_y_ck]

    with tf.variable_scope('Feature_Vars'):
        t_W_embed = tf.Variable(embeddings.astype(np.float32))
#         t_W_char = tf.Variable(np.random.normal(0,0.1,(char_size+1, char_embed_size)).astype(np.float32))

        t_gamma = tf.Variable(np.random.normal(0,1.0, 1).astype(np.float32))
        t_beta = tf.Variable(np.random.normal(0,1.0, 1).astype(np.float32))

        dim = state_size
        if dim is None:
            dim = 2*hidden_features
        if spn_layer:
            t_W_tran = tf.Variable(np.random.normal(0,1.0/np.sqrt(3*dim),(3*dim, ck_size*ck_size)).astype(np.float32))
        else:
            t_W_ck = tf.Variable(np.random.normal(0,0.1,(dim, ck_size)).astype(np.float32))
        


    t_words = tf.gather_nd(t_W_embed, t_x_words)
    t_ctx_p_left = tf.gather_nd(t_W_embed, t_x_ctx_p_left)
    t_ctx_p0 = tf.gather_nd(t_W_embed, t_x_ctx_p0)
    t_ctx_p_right = tf.gather_nd(t_W_embed, t_x_ctx_p_right)
                                     
    t_x_pos_flat = tf.squeeze(t_x_pos, axis=1)

    t_pos = tf.to_float(tf.one_hot(indices= t_x_pos_flat, depth=pos_size, on_value=1,off_value=0))
    t_marker = tf.cast(t_x_marker, tf.float32)
    
    t_word_feats = tf.concat((t_words, t_ctx_p_left, t_ctx_p0, t_ctx_p_right, t_marker, t_pos), axis=1)
    print(t_word_feats.get_shape())

    t_sq_len = tf.shape(t_x_words)[0]
    print(t_sq_len)

    n_features = embed_size

    t_words_shp = tf.reshape(
        t_word_feats, (1,t_sq_len, pos_size + embed_size * 4 + 1)
    )

    cell_fw = tf.nn.rnn_cell.LSTMCell(num_units=hidden_features, state_is_tuple=True)
    cell_bw = tf.nn.rnn_cell.LSTMCell(num_units=hidden_features, state_is_tuple=True)

    with tf.variable_scope("Bilstm"):
        t_h1, t_last_states =tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            dtype=tf.float32,
            inputs=t_words_shp)

        t_hidden = tf.concat((t_h1[0][0],t_h1[1][0]),axis=1)

    if state_size is not None:
        t_lstmcell = tf.nn.rnn_cell.LSTMCell(num_units=state_size, state_is_tuple=True)
        t_hidden_shp = tf.reshape(t_hidden, (1,t_sq_len, 2*hidden_features))

        with tf.variable_scope('LSTM_last'):
            t_h2, t_last_states2 =tf.nn.dynamic_rnn(
                cell=t_lstmcell,
                dtype=tf.float32,
                sequence_length=[t_sq_len],
                inputs=t_hidden_shp)

        t_out = t_h2[0]
    else:
        t_out = t_hidden
        
    n = tf.shape(t_out)[0]
    
    t_outputs = []
    if spn_layer:
        t_edges = tf.concat((t_out[1:],t_out[:-1],t_out[1:]*t_out[:-1]),axis=1)
        t_edge_scores = tf.matmul(t_edges, t_W_tran)

        t_edge_scores = tf.reshape(t_edge_scores, ((n-1)*ck_size*ck_size,))

        # Batch Normalization
        t_es_mean = tf.reduce_mean(t_edge_scores)
        t_es_m2 = tf.reduce_mean(t_edge_scores**2)

        t_es_var = t_es_m2 - t_es_mean**2
        t_es_std = tf.sqrt(t_es_var + 1e-8)

        t_es_norm = (t_edge_scores - t_es_mean)/t_es_std

        t_es_renorm = t_gamma * t_es_norm + t_beta

        t_edge_scores = tf.reshape(t_es_renorm, (n-1,ck_size,ck_size))

        t_score_matrix, t_selection_matrix = longest_path(t_out, t_edge_scores,ck_size)

        t_best_score = tf.reduce_max(t_score_matrix[-1])
        t_last_tag = tf.argmax(t_score_matrix[-1])

        t_best_path = retrieve_path(t_selection_matrix, t_last_tag)

        t_correct_score = path_score(t_edge_scores, t_y_ck, n)

        t_cost = -t_correct_score

        # # gradiente descendente no custo do perceptron estruturado
        t_optimizer = tf.train.AdamOptimizer(learning_rate=lr)
        t_train = t_optimizer.minimize(t_cost)

        t_outputs.extend([t_score_matrix, t_best_path])
        
        def t_pred(sess, x_word, x_ctx_p_left, x_ctx_p0, x_ctx_p_right, x_marker, x_pos):
            result = sess.run(t_best_path, feed_dict={
                t_inputs[0]:x_word,
                t_inputs[1]:x_ctx_p_left,
                t_inputs[2]:x_ctx_p0,
                t_inputs[3]:x_ctx_p_right,                
                t_inputs[4]:x_marker,
                t_inputs[5]:x_pos
            })
            return result

        def my_t_train(sess, word, ctx_p_left, ctx_p0, ctx_p_right, marker, pos, y_ck):
            _, result = sess.run([t_train, t_cost], feed_dict={
                t_inputs[0]:x_word,
                t_inputs[1]:x_ctx_p_left,
                t_inputs[2]:x_ctx_p0,
                t_inputs[3]:x_ctx_p_right,                
                t_inputs[4]:x_marker,
                t_inputs[5]:x_pos,
                t_targets[0]:y_ck
            })
            return result
    else: #CRF
        t_ck_score = tf.matmul(t_out,t_W_ck)

        t_ck_score_ext = tf.expand_dims(t_ck_score, 0)
        t_y_ck_ext = tf.expand_dims(t_y_ck, 0)

        t_sequence_lengths = tf.shape(t_x_words)[0]
        t_sequence_lengths = tf.expand_dims(t_sequence_lengths,0)

        t_log_likelihood, t_transition_params = tf.contrib.crf.crf_log_likelihood(
            t_ck_score_ext, 
            t_y_ck_ext, 
            t_sequence_lengths)
        
        t_outputs.extend([t_ck_score, t_transition_params])
    
        t_cost = -t_log_likelihood

        # # gradiente descendente no custo do perceptron estruturado
        t_optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
        # optimizer = tf.train.GradientDescentOptimizer(0.003)
        t_train = t_optimizer.minimize(t_cost)

        def t_pred(sess, x_word, x_ctx_p_left, x_ctx_p0, x_ctx_p_right, x_marker, x_pos):
            score, tparams = sess.run([t_ck_score,t_transition_params], feed_dict={
                t_inputs[0]:x_word,
                t_inputs[1]:x_ctx_p_left,
                t_inputs[2]:x_ctx_p0,
                t_inputs[3]:x_ctx_p_right,                
                t_inputs[4]:x_marker,
                t_inputs[5]:x_pos
            })

            return tf.contrib.crf.viterbi_decode(score=score,transition_params=tparams)[0][:-1]
        
        def my_t_train(sess, word, ctx_p_left, ctx_p0, ctx_p_right, marker, pos, y_ck):
            _, result = sess.run([t_train, t_cost], feed_dict={
                t_inputs[0]:x_word,
                t_inputs[1]:x_ctx_p_left,
                t_inputs[2]:x_ctx_p0,
                t_inputs[3]:x_ctx_p_right,                
                t_inputs[4]:x_marker,
                t_inputs[5]:x_pos,
                t_targets[0]:y_ck
            })
            return result
    
    return t_inputs, t_targets, t_outputs, my_t_train, t_pred


In [38]:
t_inputs, t_targets, t_outputs, t_train, t_pred = make_graph(hparams)

(?, 249)
Tensor("strided_slice:0", shape=(), dtype=int32)


In [24]:
sample = 146
x_word, x_ctx_p_left, x_ctx_p0, x_ctx_p_right, x_marker, x_pos = get_inputs(dbtrain, sample)

y_ck = get_outputs(dbtrain, sample, n_targets)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    o_ck = t_pred(sess, x_word, x_ctx_p_left, x_ctx_p0, x_ctx_p_right, x_marker, x_pos)
print(o_ck)
print(y_ck.shape)
print([idx2lex['IOB'][ck] for ck in list(o_ck)])

[ 0 18 34  5 34  5 17 42 27 30 26 34 40 40 40 40 40 40 40 40 40 40 40 40
 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40]
(44, 1)
['B-A0', 'B-AM-PRD', 'I-AM-DIS', 'B-A5', 'I-AM-DIS', 'B-A5', 'B-AM-PNC', 'I-AM-REC', 'I-A4', 'I-AM', 'I-A3', 'I-AM-DIS', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC', 'I-AM-PNC']


## 5.1 Overfit one proposition

In [25]:
sample =146

x_word, x_ctx_p_left, x_ctx_p0, x_ctx_p_right, x_marker, x_pos = get_inputs(dbtrain, sample)
y_ck = get_outputs(dbtrain, sample, n_targets)
y_ck = np.squeeze(y_ck, axis=1)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(200):
    L = t_train(sess, x_word, x_ctx_p_left, x_ctx_p0, x_ctx_p_right, x_marker, x_pos, y_ck)
    if i % 100 == 0:
        print(L)
print(L)

[-32.895603]
[-1502.0531]
[-1928.2069]


## 5.2 Evaluate one proposition

In [26]:
from subprocess import Popen, PIPE, STDOUT
import re

In [27]:
def tag_to_conll(sess, prop_dict, propid, idx2lex):
    gold_list = []
    eval_list = []
        
    x_word, x_ctx_p_left, x_ctx_p0, x_ctx_p_right, x_marker, x_pos = get_inputs(prop_dict, propid)
    targets = get_outputs(prop_dict, propid, n_targets)
    predictions = t_pred(sess, x_word, x_ctx_p_left, x_ctx_p0, x_ctx_p_right, x_marker, x_pos)



    n_words = len(x_word)
    default_ck_list_ = [(i,i + 1, '*') for i in range(n_words)]
    
    pred_array = prop_dict[propid]['PRED']
    pred_array = pred_array.flatten()
    
    arg_array = prop_dict[propid]['ARG']
    arg_array = arg_array.flatten()
    
    pred_list = [idx2lex['PRED'][i] for i in pred_array.tolist()]
    gold_list_ = [idx2lex['ARG'][i] for i in arg_array.tolist()]
     
    gold_list += list(zip(pred_list, gold_list_))
    prop_list =  [propid for _ in range(len(gold_list))]
    eval_list = [idx2lex['IOB'][i] for i in predictions]
    
    #FIXME: removed #end of sentence token
    # now predictions are shorter then gold
    if len(eval_list) < len(prop_list):
        eval_list.append('O')

    eval_list = propbankbr_iob2arg(prop_list, eval_list)
    
#     ck_list_ = []     
#     for triple_ in sorted(chunk_ext, key= lambda x: x[0]):
#         lb, ub, ckid = triple_
#         # filters default value
#         default_ck_list_ = [
#             dck_
#             for dck_ in default_ck_list_ if dck_[0] < lb or dck_[1] > ub
#         ]
#         ck_list_.append((lb, ub, idx2lex['T'][ckid]))        

#     ck_list_ = default_ck_list_ + ck_list_ 

#     arg_list_ = []
#     for triple_ in sorted(ck_list_, key= lambda x: x[0]):
#         lb, ub, cktype = triple_
#         flat_list_ = [ cktype if i == lb else '*' for i in range(lb, ub) ]
            
#         if cktype != '*':
#             flat_list_[0] = '({:}*'.format(flat_list_[0])
#             flat_list_[-1] = '{:})'.format(flat_list_[-1])
#         arg_list_ += flat_list_
        
    eval_list = list(zip(pred_list, eval_list))
#     eval_list.append(None)
#     gold_list.append(None)

    #Change to use CoNLL2004
    if LANG == 'pt':
        gold_list = propbankbr_arg2se(gold_list)
        eval_list = propbankbr_arg2se(eval_list)

    return gold_list, eval_list

In [28]:
def evaluate(gold_list, eval_list, ds_type='train',verbose=True):
    gold_path = '{}_gold.props'.format(ds_type)    
    eval_path = '{}_eval.props'.format(ds_type)

    with open(gold_path, mode='w') as f:        
        for tuple_ in gold_list:
            if tuple_ is None:
                f.write('\n')
            else:
                f.write('{:}\t{:}\n'.format(*tuple_))

    with open(eval_path, mode='w') as f:        
        for tuple_ in eval_list:
            if tuple_ is None:
                f.write('\n')
            else:
                f.write('{:}\t{:}\n'.format(*tuple_))

    pipe = Popen(['perl',PEARL_SRLEVAL_PATH, gold_path, eval_path], stdout=PIPE, stderr=PIPE)

    txt, err = pipe.communicate()
    txt = txt.decode('UTF-8')
    err = err.decode('UTF-8')
    
    print(err)
    if verbose:
        print(txt)
        with open('{}.conll'.format(ds_type), mode='w') as f:
            f.write(txt)

    # overall is a summary from the list
    # is the seventh line
    lines_list = txt.split('\n')        
    
    # get the numbers from the row 
    overall_list = re.findall(r'[-+]?[0-9]*\.?[0-9]+.', lines_list[6])
    f1 = float(overall_list[-1])

    return f1



In [29]:
def evaluate_dataset(sess, prop_dict, idx2lex, ds_type='train'):
    gold_list = []
    eval_list = []
    first = True
    for pid in prop_dict:        
        g_list, e_list = tag_to_conll(sess, prop_dict, pid, idx2lex)

        if not first:
            gold_list.append(None)
            eval_list.append(None)
        else:
            first = False

        gold_list += g_list
        eval_list += e_list

    return evaluate(gold_list, eval_list, ds_type=ds_type, verbose=True)

In [30]:
gold_tags, eval_tags = tag_to_conll(sess, dbtrain, sample, idx2lex)
f1 = evaluate(gold_tags, eval_tags, 'prop_{:}'.format(sample), verbose=True)



Number of Sentences    :           1
Number of Propositions :           1
Percentage of perfect props : 100.00

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall        5       0       0   100.00  100.00  100.00
----------
        A0        1       0       0   100.00  100.00  100.00
        A1        1       0       0   100.00  100.00  100.00
    AM-MOD        1       0       0   100.00  100.00  100.00
    AM-TMP        2       0       0   100.00  100.00  100.00
------------------------------------------------------------
         V        1       0       0   100.00  100.00  100.00
------------------------------------------------------------



In [31]:
f1_valid = evaluate_dataset(sess, dbvalid, idx2lex, ds_type='valid')





Number of Sentences    :        3248
Number of Propositions :        3248
Percentage of perfect props :   0.12

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall       49    6962    8402     0.70    0.58    0.63
----------
        A0       34    2630    2198     1.28    1.52    1.39
        A1        2     214    3171     0.93    0.06    0.12
        A2        0       0     681     0.00    0.00    0.00
        A3        0       0     114     0.00    0.00    0.00
        A4        0       1      65     0.00    0.00    0.00
        A5        0       0       2     0.00    0.00    0.00
        AA        0       0       1     0.00    0.00    0.00
        AM        0       1       0     0.00    0.00    0.00
    AM-ADV        0       0     279     0.00    0.00    0.00
    AM-CAU        0       0      48     0.00    0.00    0.00
    AM-DIR        0       0      36     0.00    0.00    0.00
    AM-DIS        0   

## Treino Total

In [40]:
import time
import datetime

train_sentences = range(*get_db_bounds('train', lang=LANG))
dev_sentences = range(*get_db_bounds('valid', lang=LANG))
if LANG == 'pt':
    test_sentences = range(*get_db_bounds('test', lang=LANG))

In [42]:
lr_search = [0.0005]
hidden_features_search = [50,150,250]

for j in range(len(hidden_features_search)):
    for i in range(len(lr_search)):
        for s in range(2):
            lr = lr_search[i]
            hf = hidden_features_search[j]

            print(j, ' ', len(hidden_features_search))
#             model_meta = {
#                 'vocab_size':len(id_to_word),
#                 'capt_size':len(id_to_capt),
#                 'pos_size':len(id_to_pos),
#                 'char_size':len(id_to_char),
#                 'ck_size':len(id_to_ck),
#                 'embed_size':embedding_matrix.shape[1],
#                 'capt_embed_size':10,
#                 'char_embed_size':30,
#                 'pos_embed_size':10,
#                 'char_hidden_features':30,
#                 'hidden_features':hf,
#                 'state_size':hf,
#                 'learning_rate':lr,
#                 'spn_layer':s==0
#             }
            model_meta = {
                'hidden_features':hf,
                'state_size':hf,
                'learning_rate':lr,
                'spn_layer':s==0
            }
            print('Training Model', model_meta)
            
            last_layer = 'crf'
            if s == 0:
                last_layer = 'spn'

            n_epochs = 50
            model_name = 'bilstm_viterbi_h' + str(hf) + '_lr_' + str(lr) + '_' + last_layer
            model = make_graph(model_meta)


            t_inputs, t_targets, t_outputs, t_train, t_pred = model
            
            #NEW SESSION PER MODEL
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            sess = tf.Session(config=config)
            sess.run(tf.global_variables_initializer())

            indices = np.arange(1, len(train_sentences))
            n = len(indices)

            best_path = None
            best_f1 = 0

            save_dir = '../outputs/1.0/notebooks/spn/models/'
            timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H-%M-%S')
            exp_desc_dir = '{}results/'.format(save_dir)
            saver = tf.train.Saver(max_to_keep=n_epochs*20)

            dev_f1_list = []
            train_f1_list = []
            consecutive_bad_dev = 0

            for epoch in range(n_epochs):
                start = time.time()
                np.random.shuffle(indices)
                it = 0
                for sid in indices:
                    it += 1
                    x_word, x_ctx_p_left, x_ctx_p0, x_ctx_p_right, x_marker, x_pos  = get_inputs(dbtrain, sid)
                    targets = get_outputs(dbtrain, sid, n_targets)
                    y_ck = np.squeeze(targets, axis=1)
                    L = t_train(sess, x_word, x_ctx_p_left, x_ctx_p0, x_ctx_p_right, x_marker, x_pos, y_ck)
                print("Epoca ", (epoch+1))
                f1_valid = evaluate_dataset(sess, dbvalid, idx2lex, ds_type='valid')            

                print("valid f1: ", f1_valid)

                f1_train = evaluate_dataset(sess, dbtrain, idx2lex, ds_type='train')            
                print("train f1: ", f1_train)
                dev_f1_list.append(f1_valid)
                train_f1_list.append(f1_train)
                end = time.time()
                print('Tempo por Epoca: ', (end-start), ' s')
                if f1_valid > best_f1:
                    best_f1 = f1_valid
                    save_path = save_dir + model_name + '_' + timestamp
                    saver.save(sess, save_path)
                    print('## BEST MODEL saved at ', save_path)
                    consecutive_bad_dev = 0
                else:
                    consecutive_bad_dev += 1
                if consecutive_bad_dev == 5:
                    break

            exp_path = exp_desc_dir + model_name + timestamp + '.txt'
            with open(exp_path,'w') as f:
                exp_desc = {
                    'model':model_meta,
                    'f1_train':train_f1_list,
                    'f1_valid':dev_f1_list
                }
                json.dump(exp_desc, f)
                print('save results on ' + exp_path)

0   3
Training Model {'hidden_features': 50, 'state_size': 50, 'learning_rate': 0.0005, 'spn_layer': True}
(?, 249)
Tensor("strided_slice:0", shape=(), dtype=int32)


InvalidArgumentError: slice index -1 of dimension 0 out of bounds.
	 [[Node: gradients/strided_slice_10_grad/StridedSliceGrad = StridedSliceGrad[Index=DT_INT32, T=DT_FLOAT, begin_mask=0, ellipsis_mask=0, end_mask=0, new_axis_mask=0, shrink_axis_mask=1, _device="/job:localhost/replica:0/task:0/device:CPU:0"](gradients/strided_slice_10_grad/Shape, strided_slice_4/stack_1, gradients/Mean_1_grad/Const, gradients/Mean_1_grad/Reshape/shape, gradients/Neg_grad/Neg)]]

Caused by op 'gradients/strided_slice_10_grad/StridedSliceGrad', defined at:
  File "/Users/Varela/.pyenv/versions/3.6.5/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/Varela/.pyenv/versions/3.6.5/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/Users/Varela/.pyenv/versions/3.6.5/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/Users/Varela/.pyenv/versions/3.6.5/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/Users/Varela/.pyenv/versions/3.6.5/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 122, in _handle_events
    handler_func(fileobj, events)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-42-39af4d95bda2>", line 41, in <module>
    model = make_graph(model_meta)
  File "<ipython-input-37-2f8826b72841>", line 138, in make_graph
    t_train = t_optimizer.minimize(t_cost)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 399, in minimize
    grad_loss=grad_loss)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 511, in compute_gradients
    colocate_gradients_with_ops=colocate_gradients_with_ops)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 532, in gradients
    gate_gradients, aggregation_method, stop_gradients)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 701, in _GradientsHelper
    lambda: grad_fn(op, *out_grads))
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 396, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 701, in <lambda>
    lambda: grad_fn(op, *out_grads))
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/array_grad.py", line 278, in _StridedSliceGrad
    shrink_axis_mask=op.get_attr("shrink_axis_mask")), None, None, None
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 8420, in strided_slice_grad
    shrink_axis_mask=shrink_axis_mask, name=name)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3414, in create_op
    op_def=op_def)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1740, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

...which was originally created as op 'strided_slice_10', defined at:
  File "/Users/Varela/.pyenv/versions/3.6.5/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
[elided 23 identical lines from previous traceback]
  File "<ipython-input-42-39af4d95bda2>", line 41, in <module>
    model = make_graph(model_meta)
  File "<ipython-input-37-2f8826b72841>", line 132, in make_graph
    t_correct_score = path_score(t_edge_scores, t_y_ck, n)
  File "<ipython-input-34-682be46d9e63>", line 20, in path_score
    return path_score[-1]
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 523, in _slice_helper
    name=name)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 689, in strided_slice
    shrink_axis_mask=shrink_axis_mask)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 8232, in strided_slice
    name=name)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3414, in create_op
    op_def=op_def)
  File "/Users/Varela/.venv/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1740, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): slice index -1 of dimension 0 out of bounds.
	 [[Node: gradients/strided_slice_10_grad/StridedSliceGrad = StridedSliceGrad[Index=DT_INT32, T=DT_FLOAT, begin_mask=0, ellipsis_mask=0, end_mask=0, new_axis_mask=0, shrink_axis_mask=1, _device="/job:localhost/replica:0/task:0/device:CPU:0"](gradients/strided_slice_10_grad/Shape, strided_slice_4/stack_1, gradients/Mean_1_grad/Const, gradients/Mean_1_grad/Reshape/shape, gradients/Neg_grad/Neg)]]
