In [1]:
import sys
sys.path.insert(0,'../models/')
sys.path.insert(0,'../datasets/')
sys.path.insert(0,'..')

import pandas as pd
import numpy as np
import json
from subprocess import Popen, PIPE, STDOUT
import re
from collections import defaultdict

import tensorflow as tf
import tqdm
from models import PropbankEncoder
import config

INPUT_DIR = '../datasets/binaries/'
PROPBANK_GLO50_PATH = '{:}deep_glo50.pickle'.format(INPUT_DIR)
PEARL_SRLEVAL_PATH = '../srlconll-1.1/bin/srl-eval.pl'

<h1><center>Structured Prediction Network CWIS SRL (BR)</center></h1>

<center>In this notebook we solve the semantic role labeling task using structured predictions networks.</center>

## 1. Builds a "human friendly" version of the dataset

In [2]:
dfgs = pd.read_csv('../datasets/csvs/gs.csv', index_col=0, sep=',', encoding='utf-8')
column_files = [
    '../datasets/csvs/column_chunks/chunks.csv',
    '../datasets/csvs/column_predmarker/predicate_marker.csv',
    '../datasets/csvs/column_shifts_ctx_p/form.csv',
    '../datasets/csvs/column_shifts_ctx_p/gpos.csv',
    '../datasets/csvs/column_shifts_ctx_p/lemma.csv',
    '../datasets/csvs/column_t/t.csv',
    '../datasets/csvs/column_iob/iob.csv'
]

for col_f in column_files:
    _df = pd.read_csv(col_f, index_col=0, encoding='utf-8')
    dfgs = pd.concat((dfgs, _df), axis=1)

DISPLAY_COLUMNS = ['ID', 'P', 'FORM', 'ARG', 'T', 
                   'CHUNK_ID', 'CHUNK_START', 'CHUNK_FINISH', 'CHUNK_LEN', 'CHUNK_CANDIDATE_ID']            
dfgs[DISPLAY_COLUMNS].head(33)    

Unnamed: 0_level_0,ID,P,FORM,ARG,T,CHUNK_ID,CHUNK_START,CHUNK_FINISH,CHUNK_LEN,CHUNK_CANDIDATE_ID
INDEX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,1,1,Brasília,*,*,1,0,1,1,0
1,2,1,Pesquisa_Datafolha,(A0*,A0,2,1,4,3,35
2,3,1,publicada,*,A0,2,1,4,3,35
3,4,1,hoje,*),A0,2,1,4,3,35
4,5,1,revela,(V*),V,3,4,5,1,126
5,6,1,um,(A1*,A1,4,5,32,27,181
6,7,1,dado,*,A1,4,5,32,27,181
7,8,1,supreendente,*,A1,4,5,32,27,181
8,9,1,:,*,A1,4,5,32,27,181
9,10,1,recusando,*,A1,4,5,32,27,181


## 2. Gets encodings

Propbank Encoder holds an indexed version of propbank dataset an answers to FOUR different dataformats: 
* CAT: this is the raw categorical data.
* EMB: tokens are embedding using GloVe embeddings.
* HOT: onehot encoding of the words and tokens.
* IDX: dense indexed representations.

In [3]:
# LOAD ENCODER
propbank_encoder = PropbankEncoder.recover(PROPBANK_GLO50_PATH)
db = propbank_encoder.db
lex2idx = propbank_encoder.lex2idx
idx2lex = propbank_encoder.idx2lex

# FOR TEXTUAL DATA ONLY
embeddings = propbank_encoder.embeddings

n_targets = len(lex2idx['T'])

In [4]:
print('attributes\t',
       len(db),
      '\n',             
      'records\t',
       len(db['ARG'].keys()))

attributes	 44 
 records	 141730


In [5]:
def filter_type(ds_type, db):
    '''Filters only records from train dataset
    '''
    ds_types = ('train', 'test', 'valid')
    if ds_type not in ds_types:
        _msg = 'ds_type must be in {:} got {:}'
        _msg = _msg.format(ds_types, ds_type)
        raise ValueError(_msg)
    elif ds_type in ('train',):
        lb = 0 
        ub = config.DATASET_TRAIN_SIZE
    elif ds_type in ('test',):        
        lb = config.DATASET_TRAIN_SIZE
        ub = lb + config.DATASET_VALID_SIZE         
    elif ds_type in ('valid',):                
        lb = config.DATASET_TRAIN_SIZE + config.DATASET_VALID_SIZE
        ub = lb + config.DATASET_TEST_SIZE         

    sel_keys_ = {key_ for key_, prop_ in db['P'].items() if prop_ > lb and prop_ <= ub}

    return {
                attr_:{ idx_: i_
                        for idx_, i_ in dict_.items() if idx_ in sel_keys_
                      }        
                for attr_, dict_  in db.items()
            }

def make_propositions_dict(db):
    '''Reindex db by propositions creating a nested dict in which the
        outer key is the proposition        
    '''
    
    triple_list = []
    prev_idx = -1
    prev_prop = -1
    for idx, prop in db['P'].items():
        if prev_prop != prop:
            if idx > 0:
                ub = prev_idx
                triple_list.append((lb, ub, prev_prop))
            lb = idx
        prev_prop = prop
        prev_idx = idx
    triple_list.append((lb, prev_idx, prev_prop))
            

        
    prop_set = set(db['P'].values())
    return { prop_:
                    {
                        attr_:{ idx_: dict_[idx_]
                                for idx_ in range(lb_, ub_ + 1, 1)
                          }        
                        for attr_, dict_ in db.items() if attr_ not in ('P',)
                    }
             for lb_, ub_, prop_ in  triple_list
            }, {prop_: ub_ - lb_ + 1 for lb_, ub_, prop_ in  triple_list}   


def numpfy_propositions_dict(prop_dict, proplen_dict):
    '''Converts inner dict examples into numpy arrays
    '''
    prop_dict_ = defaultdict(dict)    
    for prop, columns_dict in prop_dict.items():
        len_ = proplen_dict[prop]
        shape_ = (len_, 1)
        for column, values_dict in columns_dict.items():
            tuple_list = [idx_value 
                          for idx_value in values_dict.items()]
            
            tuple_list = sorted(tuple_list, key=lambda x: x[0])            
            # Converts lexicon (raw/indexed) into token (embedded/indexed)
            if (('FORM' in column) or ('LEMMA' in column)):
                values_list = [idx2lex[column][tuple_[1]]                
                                   for tuple_ in tuple_list]
            else:
                values_list = [tuple_[1] for tuple_ in tuple_list]
            
            prop_dict_[prop][column]  = np.array(values_list).reshape(shape_)
    
    return prop_dict_        


In [6]:
traindb  = filter_type('train', db)
print('attributes\t',
       len(traindb),
      '\n',             
      'records\t',
       len(traindb['ARG'].keys()),
       '\n',             
      'vocab\t',
        max([form for _, form in traindb['FORM'].items()]))

attributes	 44 
 records	 123846 
 vocab	 13289


In [7]:
prop_dict, proplen_dict = make_propositions_dict(traindb)
print('attributes\t',
       len(prop_dict[1]) + 1,
      '\n',             
      'records\t',
       sum([len(d['ARG']) for p, d in prop_dict.items()]),
        '\n',             
      'vocab\t',
        max([form for _, prop in prop_dict.items() for _, form in prop['FORM'].items()]))

attributes	 44 
 records	 123846 
 vocab	 13289


In [8]:
prop_dict1 = numpfy_propositions_dict(prop_dict, proplen_dict)
print('attributes\t',
       len(prop_dict1[1]) + 1,
      '\n',             
      'records\t',
       sum([len_ for _, len_ in proplen_dict.items()]),
        '\n',             
      'vocab\t',
        max([max(form) for _, prop in prop_dict1.items() for form in prop['FORM']]))

attributes	 44 
 records	 123846 
 vocab	 12037


In [9]:
def get_inputs(db1, propid):
    '''Generate inputs
    '''
    propdb = db1[propid] # nested dict of columns and idx value
    if 'CHUNK_SPACE' not in propdb:
        proplen = len(propdb['ID'])
        propdb['CHUNK_SPACE'] = generate_chunk_space(proplen)

    word    = propdb['FORM']
    ctx_p_left  = propdb['FORM_CTX_P-1']
    ctx_p0  = propdb['FORM_CTX_P+0']
    ctx_p_right  = propdb['FORM_CTX_P+1']
    
    marker  = propdb['MARKER']
    pos     = propdb['GPOS']
    chunk_type  = propdb['T']
    chunk_start, chunk_finish = propdb['CHUNK_SPACE']
    
    return word, ctx_p_left, ctx_p0, ctx_p_right, marker, pos, chunk_type, chunk_start, chunk_finish
            
def generate_chunk_space(n):
    '''Generates all possible spaces for chunks
    '''
    start_list = []
    end_list = []
    for i in range(n):
        for j in range(i,n,1):
            start_list.append(i)
            end_list.append(j+1)
    shape_ = (len(start_list), 1)
    start_ = np.array(start_list).reshape(shape_)
    finish_ = np.array(end_list).reshape(shape_)
    return start_, finish_
            

def get_outputs(db1, propid, n_targets):
    ''' Generate outputs
    '''
    propdb = db1[propid] # nested dict of columns and idx value
    if 'OUTPUTS' not in propdb: 
        id_type = np.concatenate(
            ( propdb['CHUNK_CANDIDATE_ID'], propdb['T']), axis=1
        )

        id_type = np.unique( id_type, axis=0)
        propdb['OUTPUTS'] = id_type[:,0] * n_targets + id_type[:,1]

    return propdb['OUTPUTS']

In [10]:
%%timeit
propid = 1120
word, ctx_p_left, ctx_p0, ctx_p_right, marker, pos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict1, propid)
targets = get_outputs(prop_dict1, propid, n_targets)
# worst proposition 1120 size 92!

901 ns ± 20.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


 ## MODEL

In [11]:
propid = 1
word, ctx_p_left, ctx_p0, ctx_p_right, marker, pos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict1,  propid)
targets = get_outputs(prop_dict1, propid, n_targets)
print(targets)
_start  = np.repeat(chunk_start, n_targets)
_finish = np.repeat(chunk_finish, n_targets)
print(_start)
print(_finish)
print(list(zip(_start[targets].flatten(), _finish[targets].flatten())))


[    0  1261 20160  4571  6518]
[ 0  0  0 ..., 32 32 32]
[ 1  1  1 ..., 33 33 33]
[(0, 1), (1, 4), (32, 33), (4, 5), (5, 32)]


In [12]:
import struct_perc.colored_weighted_interval_scheduling as cwis
import struct_perc.weighted_interval_scheduling as wis
import struct_perc.utils as spu

 ## Tensorflow Graph

In [13]:
# Parameters
vocab_size = len(embeddings)
embed_size = 50

n_pos = len(lex2idx['GPOS'])
n_classes = len(lex2idx['T'])
n_features = embed_size * 4 + 1 + n_pos

n_hidden = 100
# W_shape = (n_features, n_hidden)
W_shape = (n_hidden, n_hidden)
b_shape = (1, n_hidden)

W_interval_shape = (2 * n_hidden, n_classes)
# W_shape = (hidden_features, n_classes)
b_interval_shape = (1, n_classes)

# word index 
X_words = tf.placeholder(tf.int64, shape=(None,1), name='word')

# predicate context index (left, predicate, right)
X_ctx_p_left = tf.placeholder(tf.int64, shape=(None,1), name='ctx_p_left')
X_ctx_p = tf.placeholder(tf.int64, shape=(None,1), name='ctx_p0')
X_ctx_p_right = tf.placeholder(tf.int64, shape=(None,1), name='ctx_p_right')

# POS tagging feature
X_pos = tf.placeholder(tf.int64, shape=(None,1), name='gpos')
X_marker = tf.cast( tf.placeholder(tf.int64, shape=(None,1), name='marker'), tf.float32 )
EMBS = tf.Variable(embeddings, trainable=False)

# Embedded representation
with tf.variable_scope("features"):
    EMBS_words = tf.gather_nd(EMBS, X_words, name='word_features')

    EMBS_ctx_pleft = tf.gather_nd(EMBS, X_ctx_p_left, name='EMBS_ctx_pleft')
    EMBS_ctx_p0 = tf.gather_nd(EMBS, X_ctx_p, name='EMBS_ctx_p0')
    EMBS_ctx_pright = tf.gather_nd(EMBS, X_ctx_p_right, name='EMBS_ctx_pright')

    X_pos_flat = tf.reshape(X_pos, [-1], name='gpos_flat')
    X_pos_onehot = tf.one_hot(X_pos_flat, depth=n_pos, name='gpos_onehot')

    X = tf.concat((EMBS_words, EMBS_ctx_pleft, EMBS_ctx_p0,
                   EMBS_ctx_pright, X_pos_onehot, X_marker),
                  axis=1, name='X')
    X_batch = tf.expand_dims(X, 0)

with tf.variable_scope('gru', reuse=tf.AUTO_REUSE):

    fw = tf.nn.rnn_cell.GRUCell(num_units=n_hidden / 2)
    bw = tf.nn.rnn_cell.GRUCell(num_units=n_hidden / 2)
    
    Wo = tf.Variable(tf.truncated_normal(W_shape, stddev=1.0 / np.sqrt(n_features * n_hidden)), name='W' )
    bo = tf.Variable(tf.zeros(b_shape, dtype=tf.float32), name='b')

    hidden_outputs, states = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=fw,
        cell_bw=bw,
        inputs=X_batch,
        dtype=tf.float32
    )
    hidden_fw, hidden_bw = hidden_outputs
    Ho = tf.concat((tf.squeeze(hidden_fw, axis=0) ,tf.squeeze(hidden_fw, axis=0)),axis=1)

    Z = tf.nn.tanh( tf.matmul( Ho, Wo ) + bo, name='hidden_layer' )

# Those are the interval parameters
with tf.variable_scope("interval"):
    W_interval = tf.Variable(tf.random_normal(W_interval_shape, mean=0.0, stddev=1.0 / np.sqrt(1.0 * n_hidden * n_classes)), name='W_interval')
    b_interval = tf.Variable(tf.zeros(b_interval_shape, dtype=tf.float32), name='b_interval')
    
    # begin of interval
    IntervalStart = tf.placeholder(tf.int32, shape=(None,1))
    # end of interval
    IntervalFinish = tf.placeholder(tf.int32, shape=(None,1))

# features from intervals
IntervalFinishZ = tf.gather_nd(Z, IntervalFinish-1)
IntervalStartZ = tf.gather_nd(Z, IntervalStart)

IntervalZ = tf.concat((IntervalFinishZ, IntervalStartZ), axis=1)
IntervalScores = tf.matmul(IntervalZ, W_interval) + b_interval

ScoresFlat = tf.reshape(IntervalScores, (-1,1)) # column array n_classes * ((len + 1 ) * len) / 2 
ScoresMean = tf.reduce_mean(ScoresFlat) # scalar
ScoresDiff = ScoresFlat - ScoresMean   # centralize data --> mean zero

ScoresStd = tf.sqrt(tf.reduce_sum(ScoresDiff * ScoresDiff))
ScoresOp = ScoresDiff /( ScoresStd + 1e-8 )

 ## Tensorflow test session

In [14]:
propid  = 1
words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict1, propid)
targets = get_outputs(prop_dict1, propid, n_targets)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    arg_list = [Z, IntervalZ, IntervalStartZ, IntervalFinishZ, W_interval, b_interval, IntervalScores]
    data_list = sess.run(arg_list, feed_dict={
        X_words:words,
        X_ctx_p_left: ctx_p_left,
        X_ctx_p: ctx_p0,
        X_ctx_p_right: ctx_p_right,        
        X_marker: marker,                
        X_pos:gpos,
        IntervalStart: chunk_start,
        IntervalFinish: chunk_finish
    })
    ZZ, scores, start_scores, finish_scores, Wi, bi, inteval_scores = data_list
    # flat gives the score for each candidate

print(ZZ.shape, scores.shape, start_scores.shape, finish_scores.shape, Wi.shape, bi.shape, inteval_scores.shape)


(33, 100) (561, 200) (561, 100) (561, 100) (200, 36) (1, 36) (561, 36)


## Testing prediction

In [17]:
def pred(sess, x_words, x_ctx_p_left, x_ctx_p0, x_ctx_p_right, x_marker, x_pos, x_chunk_start, x_chunk_finish):
    scores = sess.run(ScoresOp,feed_dict={
        X_words: x_words,
        X_ctx_p_left: x_ctx_p_left,
        X_ctx_p: x_ctx_p0,
        X_ctx_p_right: x_ctx_p_right,        
        X_marker: x_marker,                
        X_pos:x_pos,
        IntervalStart: x_chunk_start,
        IntervalFinish: x_chunk_finish
    })
    # scores is a ((proplen + 1) * (proplen) / 2) * n_classes  
    starts = np.repeat(x_chunk_start, n_classes).reshape((-1,1))
    ends = np.repeat(x_chunk_finish, n_classes).reshape((-1,1))
    
    ck_len = len(x_chunk_start)
    colors = np.array(list(np.arange(n_classes))*ck_len)

    # Finds best allocation given the scores and the chunk_space
    r_int = cwis.compute_schedule(starts.flatten(), ends.flatten(), scores, colors) # index of the cadidates of predicted solution
    r_ext = list(zip(starts[r_int].flatten(),ends[r_int].flatten(), colors[r_int].flatten())) # from integer to triple
    return r_int, r_ext

In [18]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
p, pe = pred(sess, words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_start, chunk_finish)
print(p)
print(pe)

[20163 20091 19983 19839 19659 19471 19219 18903 18579 18247 17823 17391
 16923 16419 15879 15303 14691 14043 13359 12639 11911 11119 10263  9427
  8527  7591  6591  5583  4539  3476  2360  1219    20]
[(32, 33, 3), (31, 32, 3), (30, 31, 3), (29, 30, 3), (28, 29, 3), (27, 28, 31), (26, 27, 31), (25, 26, 3), (24, 25, 3), (23, 24, 31), (22, 23, 3), (21, 22, 3), (20, 21, 3), (19, 20, 3), (18, 19, 3), (17, 18, 3), (16, 17, 3), (15, 16, 3), (14, 15, 3), (13, 14, 3), (12, 13, 31), (11, 12, 31), (10, 11, 3), (9, 10, 31), (8, 9, 31), (7, 8, 31), (6, 7, 3), (5, 6, 3), (4, 5, 3), (3, 4, 20), (2, 3, 20), (1, 2, 31), (0, 1, 20)]


## Training code

In [19]:
# indices of the correct intervals
T = tf.placeholder(tf.int32, shape=(None,), name='T')
L = tf.placeholder(tf.int32, shape=(), name='L')
# I = tf.to_int32(tf.range(L), name='indices')

T_flat = tf.reshape(T, (-1,1), name='T_flat') # column array
MarginFactor = tf.placeholder(tf.float32)
MarginIndex = tf.ones(tf.shape(T_flat)) * MarginFactor
MarginScores = tf.scatter_nd(T_flat, -MarginIndex, tf.shape(ScoresOp)) # oppposite of gather_nd
ScoresWithMargin = ScoresOp + MarginScores


In [20]:
# indices containing the predicted labels from Weighted Interval Scheduling
Y = tf.placeholder(tf.int32, shape=(None,), name='predictions')
Y_flat = tf.reshape(Y, (-1, 1))

# score da estrutura predita
ScoreY = tf.gather_nd(ScoresWithMargin, Y_flat, name='predicted_score')
# score da estrutura correta
ScoreT = tf.gather_nd(ScoresWithMargin, T_flat, name='target_score')

# função de custo do perceptron estruturado
CostOp = tf.reduce_sum(ScoreY) - tf.reduce_sum(ScoreT)

# gradiente descendente no custo do perceptron estruturado
Optimizer = tf.train.AdamOptimizer(0.001)
TrainOp = Optimizer.minimize(CostOp)

In [21]:
predictions, _ = pred(sess, words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_start, chunk_finish)
print(targets.shape)
print(targets)
print(predictions.shape)
print(predictions)

(5,)
[    0  1261 20160  4571  6518]
(33,)
[20163 20091 19983 19839 19659 19471 19219 18903 18579 18247 17823 17391
 16923 16419 15879 15303 14691 14043 13359 12639 11911 11119 10263  9427
  8527  7591  6591  5583  4539  3476  2360  1219    20]


 ## Testing cost operation

In [22]:
predictions, chunk_ext = pred(sess, words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_start, chunk_finish)

interval_start = np.repeat(chunk_start, n_classes).reshape((-1,1))
interval_finish = np.repeat(chunk_finish, n_classes).reshape((-1,1))

scores_, cost_ = sess.run([ScoresOp, CostOp], feed_dict={
    X_words: words,
    X_ctx_p_left: ctx_p_left,
    X_ctx_p: ctx_p0,
    X_ctx_p_right: ctx_p_right,        
    X_marker: marker,        
    X_pos: gpos,
    IntervalStart: interval_start,
    IntervalFinish: interval_finish,
    T: targets.flatten(),
    Y: predictions.flatten(),        
    L: proplen_dict[propid],
    MarginFactor:0.01})


print(scores_.shape)
print(cost_)
print(np.max(predictions))
print(np.sum(scores_[predictions]) - np.sum(scores_[targets]))

(727056, 1)
0.095378
20163
0.045378


In [23]:
colors = np.array(list(np.arange(n_classes))*len(chunk_start))
r_int = cwis.compute_schedule(interval_start.flatten(), interval_finish.flatten(), scores_, colors) 
print(np.mean(scores_[r_int]), ' ', np.mean(scores_[targets]))

0.00162731   0.000318702


In [24]:
np.sum(scores_[predictions].flatten())

0.046971507

In [25]:
np.sum(scores_[targets].flatten())

0.0015935113

In [26]:
print(sorted(predictions))
print(sorted(r_int))

[20, 1219, 2360, 3476, 4539, 5583, 6591, 7591, 8527, 9427, 10263, 11119, 11911, 12639, 13359, 14043, 14691, 15303, 15879, 16419, 16923, 17391, 17823, 18247, 18579, 18903, 19219, 19471, 19659, 19839, 19983, 20091, 20163]
[20, 1208, 2360, 3476, 4556, 5600, 6591, 7563, 8527, 9427, 10291, 11119, 11911, 12667, 13376, 14060, 14719, 15331, 15907, 16447, 16951, 17419, 17851, 18247, 18607, 18931, 19219, 19471, 19687, 19867, 20011, 20119, 20191]


In [27]:
propid  = 1
words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict1, propid)
targets = get_outputs(prop_dict1, propid, n_targets)

starts = np.repeat(chunk_start,n_classes).reshape((-1,1))
ends = np.repeat(chunk_finish, n_classes).reshape((-1,1))

scores_ = sess.run(ScoresOp, feed_dict={
    X_words: words,
    X_ctx_p_left: ctx_p_left,
    X_ctx_p: ctx_p0,
    X_ctx_p_right: ctx_p_right,        
    X_marker: marker,        
    X_pos: gpos,
    IntervalStart: chunk_start,
    IntervalFinish: chunk_finish,
    T: targets.flatten(),
    Y: predictions.flatten(),        
    MarginFactor:0.01
})

colors = np.repeat(np.arange(n_classes), len(chunk_start))
predictions = cwis.compute_schedule(starts.flatten(), ends.flatten(), scores_, colors) 

cost_ = sess.run(CostOp, feed_dict={
    X_words: words,
    X_ctx_p_left: ctx_p_left,
    X_ctx_p: ctx_p0,
    X_ctx_p_right: ctx_p_right,        
    X_marker: marker,        
    X_pos: gpos,
    IntervalStart: chunk_start,
    IntervalFinish: chunk_finish,
    T:targets.flatten(),
    Y:predictions.flatten(),
    MarginFactor:0.01
})

print(cost_)
print(np.sum(scores_[predictions]) - np.sum(scores_[targets]))

0.484868
0.434868


In [28]:
print(scores_.shape)

(20196, 1)


## Training Single Proposition

In [29]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

propid = 1
words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict1, propid)
targets = get_outputs(prop_dict1, propid, n_targets)

starts = np.repeat(chunk_start,n_classes).reshape((-1,1))
ends = np.repeat(chunk_finish, n_classes).reshape((-1,1))

for i in range(10000):
    
    scores_ = sess.run(ScoresOp, feed_dict={
        X_words: words,
        X_ctx_p_left: ctx_p_left,
        X_ctx_p: ctx_p0,
        X_ctx_p_right: ctx_p_right,        
        X_marker: marker,        
        X_pos: gpos,
        IntervalStart: chunk_start,
        IntervalFinish: chunk_finish,
        T: targets.flatten(),
        Y: predictions.flatten(),        
        MarginFactor:0.01
    })
    
    predictions, chunk_ext = pred(sess, words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_start, chunk_finish)    
    cost_ = sess.run(CostOp, feed_dict={
        X_words: words,
        X_ctx_p_left: ctx_p_left,
        X_ctx_p: ctx_p0,
        X_ctx_p_right: ctx_p_right,        
        X_marker: marker,        
        X_pos: gpos,
        IntervalStart: chunk_start,
        IntervalFinish: chunk_finish,
        T: targets.flatten(),
        Y: predictions.flatten(),        
        MarginFactor:0.01
    })
    
    predictions_score = np.sum(scores_[predictions])
    targets_score = np.sum(scores_[targets])
        
    sess.run(TrainOp, feed_dict={
        X_words: words,
        X_ctx_p_left: ctx_p_left,
        X_ctx_p: ctx_p0,
        X_ctx_p_right: ctx_p_right,        
        X_marker: marker,        
        X_pos: gpos,
        IntervalStart: chunk_start,
        IntervalFinish: chunk_finish,
        T: targets.flatten(),
        Y: predictions.flatten(),        
        MarginFactor:0.01})
    
    colors = np.repeat(np.arange(n_classes), len(chunk_start))
    predictions = cwis.compute_schedule(starts.flatten(), ends.flatten(), scores_, colors) 

    targets_set = set(targets.flatten())
    predictions_set = set(predictions.flatten())
    yp_common = targets_set.intersection(predictions_set)
    yp_total = targets_set.union(predictions_set)
    
    acc_int = len(yp_common)/len(yp_total)
    
    if i % 25 == 0:
        print(acc_int, ' ', cost_)
    
    if cost_ < 0:
        break

    if acc_int == 1:
        print(acc_int, ' ', cost_, ' learnt at epoch ', i)
        break


0.0   0.616415
0.0   0.257976
0.02702702702702703   0.186476
0.05555555555555555   0.143267
0.08571428571428572   0.0922629
0.0   0.0878867
0.05555555555555555   0.0844578
0.02702702702702703   0.0726507
0.2727272727272727   0.0357467
0.5   0.0196386
0.1875   0.0233169
1.0   0.0  learnt at epoch  273


In [30]:
targets_list = list(zip(starts[targets].flatten(), ends[targets].flatten(), colors[targets]))

print(sorted(targets_list, key=lambda x: x[0]))

[(0, 1, 0), (1, 4, 2), (4, 5, 8), (5, 32, 11), (32, 33, 35)]


In [31]:
predictions_list = list(zip(starts[predictions].flatten(), ends[predictions].flatten(), colors[predictions]))

print(sorted(predictions_list, key=lambda x: x[0]))

[(0, 1, 0), (1, 4, 2), (4, 5, 8), (5, 32, 11), (32, 33, 35)]


In [32]:
# this should be equal to previous cell result predicitin doesn't changed the weights
predictions1, chunk_ext = pred(sess, words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_start, chunk_finish)    
predictions_test_list = list(zip(starts[predictions1].flatten(), ends[predictions1].flatten(), colors[predictions1]))

print(sorted(predictions_test_list, key= lambda x: x[0]))

[(0, 1, 0), (1, 4, 2), (4, 5, 8), (5, 32, 11), (32, 33, 35)]


 ## ConLL evaluation scripts

In [33]:
def tag_to_conll(sess, prop_dict, propid, idx2lex):
    gold_list = []
    eval_list = []
        
    words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict, propid)
    targets = get_outputs(prop_dict, propid, n_targets)

    predictions, chunk_ext = pred(sess, words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_start, chunk_finish)    


    n_words = len(words)
    default_ck_list_ = [(i,i + 1, '*') for i in range(n_words)]
    
    pred_array = prop_dict[propid]['PRED']
    pred_array = pred_array.flatten()
    
    arg_array = prop_dict[propid]['ARG']
    arg_array = arg_array.flatten()
    
    pred_list = [idx2lex['PRED'][i] for i in pred_array.tolist()]
    gold_list_ = [idx2lex['ARG'][i] for i in arg_array.tolist()]
    
    gold_list += list(zip(pred_list, gold_list_))

    
    ck_list_ = []     
    for triple_ in sorted(chunk_ext, key= lambda x: x[0]):
        lb, ub, ckid = triple_
        # filters default value
        default_ck_list_ = [
            dck_
            for dck_ in default_ck_list_ if dck_[0] < lb or dck_[1] > ub
        ]
        ck_list_.append((lb, ub, idx2lex['T'][ckid]))        

    ck_list_ = default_ck_list_ + ck_list_ 

    arg_list_ = []
    for triple_ in sorted(ck_list_, key= lambda x: x[0]):
        lb, ub, cktype = triple_
        flat_list_ = [ cktype if i == lb else '*' for i in range(lb, ub) ]
            
        if cktype != '*':
            flat_list_[0] = '({:}*'.format(flat_list_[0])
            flat_list_[-1] = '{:})'.format(flat_list_[-1])
        arg_list_ += flat_list_
        
    eval_list += list(zip(pred_list, arg_list_))
    eval_list.append(None)
    gold_list.append(None)
    return gold_list, eval_list

testamos o modelo na frase usada para treino

In [34]:
propid = 1
gold_list, eval_list = tag_to_conll(sess, prop_dict1, propid, idx2lex)
for i in range(proplen_dict[propid]):
    if gold_list[i] and eval_list[i]:
        print('{:}\t{:}\t{:}\t{:}'.format(*gold_list[i], *eval_list[i]))
    else:
        print('\n')
    

print(lex2idx['T'])

-	*	-	*
-	(A0*	-	(A0*
-	*	-	*
-	*)	-	*)
revelar	(V*)	revelar	(V*)
-	(A1*	-	(A1*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*	-	*
-	*)	-	*)
-	*	-	*
{'*': 0, 'A0': 1, 'A1': 2, 'A2': 3, 'A3': 4, 'A4': 5, 'A5': 6, 'AM-ADV': 7, 'AM-CAU': 8, 'AM-DIR': 9, 'AM-DIS': 10, 'AM-EXT': 11, 'AM-LOC': 12, 'AM-MED': 13, 'AM-MNR': 14, 'AM-NEG': 15, 'AM-PNC': 16, 'AM-PRD': 17, 'AM-REC': 18, 'AM-TMP': 19, 'C-A0': 20, 'C-A1': 21, 'C-A2': 22, 'C-A3': 23, 'C-AM-ADV': 24, 'C-AM-CAU': 25, 'C-AM-DIS': 26, 'C-AM-EXT': 27, 'C-AM-LOC': 28, 'C-AM-MNR': 29, 'C-AM-NEG': 30, 'C-AM-PNC': 31, 'C-AM-PRD': 32, 'C-AM-TMP': 33, 'C-V': 34, 'V': 35}


In [35]:
def evaluate(gold_list, eval_list, verbose=True):
    gold_path = 'train_gold.props'    
    eval_path = 'train_eval.props'

    with open(gold_path, mode='w') as f:        
        for tuple_ in gold_list:
            if tuple_ is None:
                f.write('\n')
            else:
                f.write('{:}\t{:}\n'.format(*tuple_))

    with open(eval_path, mode='w') as f:        
        for tuple_ in eval_list:
            if tuple_ is None:
                f.write('\n')
            else:
                f.write('{:}\t{:}\n'.format(*tuple_))

    pipe = Popen(['perl',PEARL_SRLEVAL_PATH, gold_path, eval_path], stdout=PIPE, stderr=PIPE)

    txt, err = pipe.communicate()
    txt = txt.decode('UTF-8')
    err = err.decode('UTF-8')
    
    if verbose:
        print(txt)
        with open('train.conll', mode='w') as f:
            f.write(txt)

    # overall is a summary from the list
    # is the seventh line
    lines_list = txt.split('\n')        
    
    # get the numbers from the row 
    overall_list = re.findall(r'[-+]?[0-9]*\.?[0-9]+.', lines_list[6])
    f1 = float(overall_list[-1])

    return f1



In [36]:
propid = 1
gold_list, eval_list = tag_to_conll(sess, prop_dict1, propid, idx2lex)
f1_score = evaluate(gold_list, eval_list, verbose=True)
print('f1_score: ', f1_score)

Number of Sentences    :           1
Number of Propositions :           1
Percentage of perfect props : 100.00

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall        2       0       0   100.00  100.00  100.00
----------
        A0        1       0       0   100.00  100.00  100.00
        A1        1       0       0   100.00  100.00  100.00
------------------------------------------------------------
         V        1       0       0   100.00  100.00  100.00
------------------------------------------------------------

f1_score:  100.0


In [37]:
import time
start = time.time()
print(1111)
gold_list, eval_list = tag_to_conll(sess, prop_dict1, propid, idx2lex)
f1_score = evaluate(gold_list, eval_list, verbose=True)
end = time.time()
print('evaluate: ', (end-start), 's')
print('f1_score: ', f1_score)

1111
Number of Sentences    :           1
Number of Propositions :           1
Percentage of perfect props : 100.00

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall        2       0       0   100.00  100.00  100.00
----------
        A0        1       0       0   100.00  100.00  100.00
        A1        1       0       0   100.00  100.00  100.00
------------------------------------------------------------
         V        1       0       0   100.00  100.00  100.00
------------------------------------------------------------

evaluate:  0.6659700870513916 s
f1_score:  100.0


## Training

In [38]:
sess= tf.Session()
sess.run(tf.global_variables_initializer())

EPOCHS = 1000
NPROPS = 40
# indices = np.arange(config.DATASET_TRAIN_SIZE)
indices = np.arange(NPROPS) + 1

best_score = 0
saver = tf.train.Saver()

iter_dict = {'iteration': 0, 'epochs': 0, 'accuracy': 0.0, 'cost': 999.0, 'f1_score': 0.0, 'best_score': 0.0}
iter_message = 'iteration:{iteration:05d}\tepochs:{epochs:05d}\t' + \
                'accuracy:{accuracy:0.2f}\tcost:{cost:0.2f}\t' + \
                'f1_score:{f1_score:0.2f}\tbest_score:{best_score:0.2f}'

for j in range(EPOCHS):
    np.random.shuffle(indices)
    total_err = 0
    total_size = 0
    total_cost = 0
    gold_list = []
    eval_list = []         
    for i, propid in enumerate(indices):     
        try: 
            words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_type, chunk_start, chunk_finish = get_inputs(prop_dict1, propid)
            targets = get_outputs(prop_dict1, propid, n_targets)
        except KeyError:
            print(propid)

        predictions, chunk_ext = pred(sess, words, ctx_p_left, ctx_p0, ctx_p_right, marker, gpos, chunk_start, chunk_finish)

        _, cost = sess.run([TrainOp, CostOp], feed_dict={
            X_words: words,
            X_ctx_p_left: ctx_p_left,
            X_ctx_p: ctx_p0,
            X_ctx_p_right: ctx_p_right,        
            X_marker: marker,        
            X_pos: gpos,
            IntervalStart: chunk_start,
            IntervalFinish: chunk_finish,
            T: targets.flatten(),
            Y: predictions.flatten(),        
            MarginFactor:0.01})

        predictions_set = set(predictions.tolist())
        targets_set = set(targets.tolist())
        
        union_set = predictions_set.union(targets_set)
        inter_set = predictions_set.intersection(targets_set)
        
        total_err += len(union_set - inter_set)
        total_size += len(union_set)
        total_cost += cost
        
        gold_list_, eval_list_ = tag_to_conll(sess, prop_dict1, propid, idx2lex)
        gold_list += gold_list_
        eval_list += eval_list_

        iter_dict['iteration'] = i + j * len(indices)
        iter_dict['epochs'] = j
        iter_dict['accuracy'] = 1 - total_err/total_size
        iter_dict['cost'] = total_cost

        
    iter_dict['f1_score'] = evaluate(gold_list, eval_list, verbose=False)    
    if iter_dict['f1_score'] > iter_dict['best_score']:
        iter_dict['best_score'] = iter_dict['f1_score']
        evaluate(gold_list, eval_list, verbose=True)    
        save_path = saver.save(sess, "/tmp/model_spn-pt.ckpt")
    
    print(iter_message.format(**iter_dict))        
    
    if iter_dict['best_score'] > 98:
        print('best_score:',iter_dict['best_score'],'exiting')
        break    

Number of Sentences    :          40
Number of Propositions :          40
Percentage of perfect props :   0.00

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall        1     463      97     0.22    1.02    0.36
----------
        A0        0      55      21     0.00    0.00    0.00
        A1        1      72      37     1.37    2.63    1.80
        A2        0       3      10     0.00    0.00    0.00
        A3        0       6       2     0.00    0.00    0.00
        A4        0      44       1     0.00    0.00    0.00
        A5        0      47       0     0.00    0.00    0.00
    AM-ADV        0       0       2     0.00    0.00    0.00
    AM-CAU        0       3       2     0.00    0.00    0.00
    AM-DIS        0      14       3     0.00    0.00    0.00
    AM-EXT        0      52       0     0.00    0.00    0.00
    AM-LOC        0      38       5     0.00    0.00    0.00
    AM-MNR        0    

Number of Sentences    :          40
Number of Propositions :          40
Percentage of perfect props :   0.00

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall        3     180      95     1.64    3.06    2.14
----------
        A0        1      27      20     3.57    4.76    4.08
        A1        2      68      36     2.86    5.26    3.70
        A2        0       7      10     0.00    0.00    0.00
        A3        0       2       2     0.00    0.00    0.00
        A4        0       1       1     0.00    0.00    0.00
    AM-ADV        0       2       2     0.00    0.00    0.00
    AM-CAU        0      12       2     0.00    0.00    0.00
    AM-DIS        0       1       3     0.00    0.00    0.00
    AM-EXT        0       9       0     0.00    0.00    0.00
    AM-LOC        0      17       5     0.00    0.00    0.00
    AM-MNR        0       5       1     0.00    0.00    0.00
    AM-NEG        0    

iteration:01039	epochs:00025	accuracy:0.07	cost:0.05	f1_score:8.89	best_score:8.89
iteration:01079	epochs:00026	accuracy:0.11	cost:0.05	f1_score:8.33	best_score:8.89
Number of Sentences    :          40
Number of Propositions :          40
Percentage of perfect props :   0.00

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall       14      86      84    14.00   14.29   14.14
----------
        A0        1      29      20     3.33    4.76    3.92
        A1        6      22      32    21.43   15.79   18.18
        A2        3       6       7    33.33   30.00   31.58
        A3        0       1       2     0.00    0.00    0.00
        A4        1       6       0    14.29  100.00   25.00
    AM-ADV        0       1       2     0.00    0.00    0.00
    AM-CAU        0       5       2     0.00    0.00    0.00
    AM-DIS        0       0       3     0.00    0.00    0.00
    AM-LOC        2      10       3    1

iteration:01639	epochs:00040	accuracy:0.21	cost:0.04	f1_score:31.58	best_score:31.58
iteration:01679	epochs:00041	accuracy:0.17	cost:0.04	f1_score:26.42	best_score:31.58
Number of Sentences    :          40
Number of Propositions :          40
Percentage of perfect props :  17.50

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall       32      69      66    31.68   32.65   32.16
----------
        A0        8      17      13    32.00   38.10   34.78
        A1       14      26      24    35.00   36.84   35.90
        A2        1       9       9    10.00   10.00   10.00
        A3        0       0       2     0.00    0.00    0.00
        A4        1       0       0   100.00  100.00  100.00
    AM-ADV        1       1       1    50.00   50.00   50.00
    AM-CAU        1       3       1    25.00   50.00   33.33
    AM-DIS        1       0       2   100.00   33.33   50.00
    AM-LOC        3       1       2 

iteration:02199	epochs:00054	accuracy:0.34	cost:0.03	f1_score:45.83	best_score:45.83
Number of Sentences    :          40
Number of Propositions :          40
Percentage of perfect props :  22.50

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall       49      46      49    51.58   50.00   50.78
----------
        A0        7      11      14    38.89   33.33   35.90
        A1       21      14      17    60.00   55.26   57.53
        A2        6       5       4    54.55   60.00   57.14
        A3        1       1       1    50.00   50.00   50.00
        A4        1       0       0   100.00  100.00  100.00
    AM-ADV        1       0       1   100.00   50.00   66.67
    AM-CAU        1       6       1    14.29   50.00   22.22
    AM-DIS        1       4       2    20.00   33.33   25.00
    AM-LOC        4       1       1    80.00   80.00   80.00
    AM-MNR        0       0       1     0.00    0.00    0.00

iteration:03919	epochs:00097	accuracy:0.51	cost:0.02	f1_score:64.52	best_score:64.52
Number of Sentences    :          40
Number of Propositions :          40
Percentage of perfect props :  47.50

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall       67      32      31    67.68   68.37   68.02
----------
        A0       15       9       6    62.50   71.43   66.67
        A1       28      12      10    70.00   73.68   71.79
        A2        8       1       2    88.89   80.00   84.21
        A3        2       4       0    33.33  100.00   50.00
        A4        1       0       0   100.00  100.00  100.00
    AM-ADV        2       0       0   100.00  100.00  100.00
    AM-CAU        0       0       2     0.00    0.00    0.00
    AM-DIS        2       1       1    66.67   66.67   66.67
    AM-LOC        3       1       2    75.00   60.00   66.67
    AM-MNR        0       1       1     0.00    0.00    0.00

iteration:04839	epochs:00120	accuracy:0.77	cost:0.01	f1_score:86.60	best_score:86.60
Number of Sentences    :          40
Number of Propositions :          40
Percentage of perfect props :  77.50

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall       89      14       9    86.41   90.82   88.56
----------
        A0       17       4       4    80.95   80.95   80.95
        A1       36       2       2    94.74   94.74   94.74
        A2       10       1       0    90.91  100.00   95.24
        A3        2       0       0   100.00  100.00  100.00
        A4        1       0       0   100.00  100.00  100.00
    AM-ADV        1       2       1    33.33   50.00   40.00
    AM-CAU        2       1       0    66.67  100.00   80.00
    AM-DIS        2       2       1    50.00   66.67   57.14
    AM-LOC        4       1       1    80.00   80.00   80.00
    AM-MNR        1       0       0   100.00  100.00  100.00

iteration:06719	epochs:00167	accuracy:0.92	cost:0.00	f1_score:97.44	best_score:97.44
iteration:06759	epochs:00168	accuracy:0.91	cost:0.00	f1_score:96.94	best_score:97.44
iteration:06799	epochs:00169	accuracy:0.95	cost:0.00	f1_score:97.44	best_score:97.44
Number of Sentences    :          40
Number of Propositions :          40
Percentage of perfect props :  90.00

              corr.  excess  missed    prec.    rec.      F1
------------------------------------------------------------
   Overall       97       4       1    96.04   98.98   97.49
----------
        A0       20       3       1    86.96   95.24   90.91
        A1       38       0       0   100.00  100.00  100.00
        A2       10       0       0   100.00  100.00  100.00
        A3        2       0       0   100.00  100.00  100.00
        A4        1       0       0   100.00  100.00  100.00
    AM-ADV        2       0       0   100.00  100.00  100.00
    AM-CAU        2       0       0   100.00  100.00  100.00
    AM-DIS  