In [1]:
import codecs
import numpy as np
import tensorflow as tf
import tensorflow.contrib.keras as keras
from tensorflow.contrib.keras import utils

In [2]:
fold_1 = {'train':"./seg_data/egy/data_1.train.conll", 
          'dev':"./seg_data/egy/data_1.dev.conll", 
          'test':"./seg_data/egy/data_1.test.conll"}

fold_2 = {'train':"./seg_data/egy/data_2.train.conll", 
          'dev':"./seg_data/egy/data_2.dev.conll", 
          'test':"./seg_data/egy/data_2.test.conll"}

folds = {'1':fold_1, '2':fold_2}

Prepare data for training
===

In [3]:
train_sentences = list()
train_trgs = list()

index2trg = ['S', 'B', 'E', 'M', 'WB', 'EOT']
trg2index = {'S': 0, 'B': 1, 'E': 2, 'M': 3, 'WB':4, 'EOT':5}

ch_set = set()
index2ch = ['<PAD>', '<UNK>']
ch2index = dict() #{'<PAD>': 0, '<UNK>': 1}

with codecs.open(folds['1']['train'], encoding="utf-8") as eg_tr1:
    sentence = list()
    sentence_trg = list()
    for i, line in enumerate(eg_tr1):
        
        line_elements = line.strip().split()
        if line_elements:
            ch_set.add(line_elements[0])
            sentence.append(line_elements[0])
            sentence_trg.append(line_elements[1])
        else:
            ch_set.add("EOT")
            sentence.append("EOT")
            sentence_trg.append("EOT")
            # print(sentence)
            train_sentences.append(sentence)
            train_trgs.append(sentence_trg)
        
            # print(train_sentences[0])
            sentence = list()
            sentence_trg = list()
            
            #             del sentence[:]
            #             del sentence_trg[:]
    else:
        chars = list(ch_set)
        index2ch.extend(chars)
        for i, ch in enumerate(index2ch):
            ch2index[ch] = i

n_instances = len(train_sentences)
n_steps = max(map(lambda x: len(x), train_sentences))
n_inputs = len(index2ch)

print('number of tweets (instances):',n_instances)
print('number of steps (cells):', n_steps)
print('number of units in the input layer (# features)', n_inputs)
print()
print('The shape of the X_batch should be: (m, {}, {}), where `m` is the number of instances'.format(n_steps, n_inputs))

number of tweets (instances): 257
number of steps (cells): 156
number of units in the input layer (# features) 127

The shape of the X_batch should be: (m, 156, 127), where `m` is the number of instances


Prepare inputs for the network
===

In [4]:
#################
#####  1  #######
#################
train_char_index = list()

for sentence in train_sentences:
    char_index = list(map(lambda ch: ch2index[ch], sentence))
    train_char_index.append(char_index)

# the_length of_the_input_sequence_for_each_instance
the_length_of_the_input_sequence_for_each_instance = list(map(lambda x: len(x), train_sentences))


###################################################
#####  2 Padding for short sentences  #############
###################################################
# https://stackoverflow.com/questions/38592324/one-hot-encoding-using-numpy
# https://stackoverflow.com/questions/37674306/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-t
X = keras.preprocessing.sequence.pad_sequences(train_char_index, maxlen=n_steps, padding='post')
print(X.shape)

###########################################################
#####  3 replace  char index with hot vector  #############
###########################################################
# https://stackoverflow.com/questions/36960320/convert-a-2d-matrix-to-a-3d-one-hot-matrix-numpy
# http://localhost:8888/notebooks/ml/Neural%20Network%20-%20Backpropagation.ipynb
targets = np.array(X).reshape(-1)
X_one_hot = np.eye(n_inputs)[targets]
X_batch = X_one_hot.reshape(n_instances,n_steps,n_inputs)
# X_batch = tf.one_hot(X, n_inputs)
print(X_batch.shape)

# S = np.array([[1,2,1], [6,6,6], [0,1,1]])
# S_hot = (np.arange(S.max()+1) == S[...,None]).astype(int)

(257, 156)
(257, 156, 127)


In [5]:
A = np.array([[[1, 0, 4], [2, 4, 2], [3, 3, 0]], [[1, 4, 3], [1, 0, 0], [2, 2, 0]]])
# utils.np_utils
# type(keras.utils.np_utils)
#from keras.utils.np_utils import to_categorical
#incidence_y = to_categorical(y)
incidence_y = np.zeros((*A.shape, 5))
y_1 = A.ravel()
# incidence_y[np.arange(A.shape[0]), np.arange(A.shape[1]), np.arange(A.shape[2]), A.ravel()] = 1 

use the `sequence_length` argument to the static_rnn() or dynamic_rnn() functions to specify each sentence’s length (as discussed earlier)

Recurrent Neural Network (Static Unrolling Through Time)
===

This approach still builds a graph containing one cell per time step. With such as large graph, you may even get out-of-memory (OOM) errors during backpropagation (especially with the limited memory of GPU cards), since it must store all tensor values during the forward pass so it can use them to compute gradients during the  reverse pass.

In [6]:
tf.reset_default_graph()

n_neurons = 500

# input tensors
# X = [tf.placeholder(tf.float32, shape=[None, n_inputs], name='X'+str(1)) for i in range(n_steps)]
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

# swap the first two dimensions .. now in the form of (n_steps, n_examples, n_inputs) 
X_swap = tf.transpose(X, perm=[1, 0, 2])

# X in a form of sequence
X_seqs = tf.unstack(X_swap)
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)

# 'output_seqs': is a Python list containing the output tensors for each time step. 
# 'states': is a tensor containing the final states of the network
# When you are using basic cells, the final state is simply equal to the last output.
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs, dtype=tf.float32)
outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])

init = tf.global_variables_initializer()


In [7]:
with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})

# print(outputs_val)

The dynamic_rnn()  function uses a while_loop() operation to run over the cell the appropriate number of times, and you can set swap_memory=True if you want it to swap the GPU’s memory to the CPU’s memory during backpropagation to avoid OOM errors.

Conveniently, it also accepts a single tensor for all inputs at every time step (shape [None, n_steps, n_inputs]) and it outputs a single tensor for all outputs at every time step (shape [None, n_steps, n_neurons]); there is no need to stack, unstack, or transpose.

In [11]:
tf.reset_default_graph()
n_neurons = 500
seq_length = tf.placeholder(tf.int32, [None])

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32, sequence_length=seq_length)

init = tf.global_variables_initializer()

In [14]:
with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run(
        [outputs, states], feed_dict={X: X_batch, seq_length: the_length_of_the_input_sequence_for_each_instance})

In [19]:
outputs_val

array([[[ 0.01603885,  0.04096248,  0.07111038, ...,  0.0232278 ,
          0.03350134,  0.00360562],
        [ 0.05326501,  0.11043419,  0.04752845, ...,  0.09994518,
         -0.03508453, -0.0374379 ],
        [ 0.00754924,  0.00409045, -0.02264069, ...,  0.08742078,
         -0.10536283, -0.02145722],
        ..., 
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]],

       [[-0.04733785, -0.01651455,  0.03473307, ..., -0.02926578,
          0.01879792, -0.01249318],
        [-0.02724044, -0.10794573,  0.06978463, ...,  0.03191889,
          0.02742284, -0.10099661],
        [ 0.0310605 , -0.09034449,  0.04587358, ..., -0.00392174,
         -0.07773848, -0.07894513],
        ..., 
        [ 0.        ,  0.        ,  0.        , ...,