In [1]:
# Reference : 
# https://github.com/RiaanZoetmulder/Master-Thesis/tree/master/rationale
# https://github.com/taolei87/rcnn/tree/master/code/rationale

import sys
import os

# os.environ['THEANO_FLAGS'] = "device=cuda*"
os.environ['KERAS_BACKEND']='tensorflow'
os.environ['MKL_THREADING_LAYER']='GNU'

import numpy as np
import pandas as pd
from collections import defaultdict
import re

In [2]:
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.layers import Embedding
from keras.layers import Dense, Input, Flatten
from keras.layers import Conv1D, MaxPooling1D, Embedding, Dropout, LSTM, GRU, Bidirectional, TimeDistributed, concatenate, multiply, add, Add
# Merge
from keras.models import Model

from keras import backend as K
from keras.engine.topology import Layer, InputSpec
# from keras import initializations
from keras import initializers
from keras import regularizers
from keras import optimizers
from keras import constraints

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
MAX_SENT_LENGTH = 100
MAX_SENTS = 15
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2

MAX_SEQUENCE = 100

In [4]:
import pickle

def save_pickle(path, X):
    with open(path, 'wb') as f:
        pickle.dump(X, f)

def open_pickle(path):
    with open(path, 'rb') as f:
        X = pickle.load(f)
    return X

X_train = open_pickle("../../data/imdb/imdb_original_preprocessed_xtrain.pickle")
X_test = open_pickle("../../data/imdb/imdb_original_preprocessed_xtest.pickle")
y_tr = open_pickle("../../data/imdb/imdb_original_preprocessed_ytrain.pickle")
y_test = open_pickle("../../data/imdb/imdb_original_preprocessed_ytest.pickle")

In [5]:
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(X_train)

In [6]:
# create a placeholder

data = np.zeros((len(X_train), MAX_SEQUENCE),dtype='int32')

In [7]:
data.shape

(25000, 100)

In [8]:
wordTokens = text_to_word_sequence(X_train[0])

In [9]:
wordTokens[0]

'silent'

In [10]:
tokenizer.word_index['silent']

1234

In [11]:
for i, doc in enumerate(X_train):
    wordTokens = text_to_word_sequence(doc)
    for j, word in enumerate(wordTokens):
        if j<MAX_SEQUENCE and tokenizer.word_index[word]<MAX_NB_WORDS:
            data[i,j] = tokenizer.word_index[word]

In [12]:
labels = to_categorical(np.asarray(y_tr))
labels = np.asarray(y_tr)

In [13]:
word_index = tokenizer.word_index

In [14]:
print('Total %s unique tokens.' % len(word_index))
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', labels.shape)

Total 85439 unique tokens.
Shape of data tensor: (25000, 100)
Shape of label tensor: (25000,)


In [15]:
indices = np.arange(data.shape[0])
np.random.seed(1234)
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])

In [16]:
x_train = data[:-nb_validation_samples]
y_train = labels[:-nb_validation_samples]
x_val = data[-nb_validation_samples:]
y_val = labels[-nb_validation_samples:]

In [17]:
print('Number of positive and negative reviews in training and validation set')
print(y_train.sum(axis=0))
print(y_val.sum(axis=0))

Number of positive and negative reviews in training and validation set
9964
2536


### GloVe

In [18]:
GLOVE_DIR = "../../data/glove.6B"
embeddings_index = {}
f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'), 'rb')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

In [19]:
print('Total %s word vectors.' % len(embeddings_index))

Total 400000 word vectors.


In [20]:
embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))

for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector
    
embedding_layer = Embedding(len(word_index)+1,
                               EMBEDDING_DIM,
                               weights=[embedding_matrix],
                               input_length=MAX_SEQUENCE,
                               trainable=False)



In [21]:
len(word_index)

85439

In [22]:
# Test the embedding layer output

doc_input = Input(shape=(MAX_SEQUENCE,), dtype='int32')
embedded_sequences = embedding_layer(doc_input)

model = Model(doc_input, embedded_sequences)

In [23]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 100)               0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 100, 100)          8544000   
Total params: 8,544,000
Trainable params: 0
Non-trainable params: 8,544,000
_________________________________________________________________


In [24]:
len(word_index)*100

8543900

In [25]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

In [26]:
output = model.predict(x_train[:2,:])

In [27]:
output.shape

(2, 100, 100)

### Advanced Layer

Supposed you have two inputs x1 and x2 at each step of the RNN/LSTM. Your RNN function looks like:
h(t) = (Wh * h(t-1) + W1 * x1 + W2 *x2),

then you can have a

Dense layer to perform (W1 * x1 +b1) --->Dense1 <br>
Dense layer to perform (W2 * x2 +b2) --->Dense2 <br>
Merge Layer to sum Dense1 and Dense2, so you get: $(W_1 * x_1 + W_2 *x_2)$<br>

In [34]:

document_input = Input(shape=(MAX_SEQUENCE,), dtype='int32', name='document_input')
embedding = embedding_layer(document_input)

# Generator
gen_GRU = Bidirectional(GRU(50), merge_mode='concat', name='gen_GRU')(embedding)

# linear equation inside sigmoid should be multiplied using two weights. (trainable)
dense_1 = Dense(1, activation='linear', bias=True)(embedding)
dense_2 = Dense(1, activation='linear', bias=True)(gen_GRU)
merge_dense = add([dense_1, dense_2])
flat = Flatten()(merge_dense)
GRU_sigmoid = Dense(MAX_SEQUENCE, activation='sigmoid', kernel_initializer='uniform')(flat)

cast_layer = CastLayer()(GRU_sigmoid)
x = Lambda(lambda a: K.stop_gradient(a))(cast_layer)
# We need to implement the sampling on z layer
# and stop gradient

# Encoder
concat_layer = multiply([cast_layer, document_input])
embedding_2 = embedding_layer(concat_layer)
encoder_RNN = GRU(100)(embedding_2)
output_layer = Dense(1, activation='sigmoid')(encoder_RNN)

generator = Model(document_input, output_layer)

  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.


In [35]:
generator.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
document_input (InputLayer)     (None, 100)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 100, 100)     8544000     document_input[0][0]             
                                                                 multiply_1[0][0]                 
__________________________________________________________________________________________________
gen_GRU (Bidirectional)         (None, 100)          45300       embedding_1[2][0]                
__________________________________________________________________________________________________
dense_4 (Dense)                 (None, 100, 1)       101         embedding_1[2][0]                
__________

In [36]:
x = generator.predict(x_val[:2])

In [37]:
x.shape

(2, 1)

In [38]:
x

array([[0.60652936],
       [0.6173408 ]], dtype=float32)

In [39]:
y_dummy = np.random.randint(0, high=2, size=(x_val.shape[0], MAX_SEQUENCE))

In [40]:
y_dummy.shape

(5000, 100)

In [41]:
generator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

In [42]:
generator.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
document_input (InputLayer)     (None, 100)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 100, 100)     8544000     document_input[0][0]             
                                                                 multiply_1[0][0]                 
__________________________________________________________________________________________________
gen_GRU (Bidirectional)         (None, 100)          45300       embedding_1[2][0]                
__________________________________________________________________________________________________
dense_4 (Dense)                 (None, 100, 1)       101         embedding_1[2][0]                
__________

In [44]:
# generator.fit(x_train,y_train, validation_data=(x_val,y_val), 
#           epochs=2, batch_size=50, verbose=1)

# generator.fit(x_val, y_val, epochs=2, batch_size=50, verbose=1)

In [45]:
y = generator.evaluate(x_train, y_train)



In [46]:
y = generator.predict(x_val)

In [47]:
y.shape

(5000, 1)

In [48]:
y[0]

array([0.6065294], dtype=float32)

In [49]:
# y[0]

In [50]:
generator.layers[0]

<keras.engine.input_layer.InputLayer at 0x237fe995a90>

In [51]:
# Get an intermediate layer output 

from keras import backend as K

# with a Sequential model
get_bidirectional_output = K.function([generator.layers[0].input],
                                  [generator.layers[3].output])
layer_output = get_bidirectional_output([x_val])[0]

In [52]:
layer_output.shape

(5000, 100, 1)

In [53]:
x[0]

array([0.60652936], dtype=float32)

In [33]:
class CastLayer(Layer):

    def __init__(self,
                 **kwargs):
        super(CastLayer, self).__init__(**kwargs)

    def build(self, input_shape):
#         Create a trainable weight variable for this layer.
        self.z = self.add_weight(name='zpred', 
                                      shape=(input_shape[0], input_shape[1]),
                                      initializer='uniform',
                                      trainable=False)
        super(CastLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
#         tf.cast(tf.less_equal(tf.random_uniform(pz_t.get_shape(),
#                                                       dtype=tf.float32, seed=seed),
#                                                         pz_t),
#                                                       tf.float32)
        self.z = K.cast(K.greater(x, 0.5), dtype='int32')
        
#         return tf.multiply(doc_input, tf.reshape(z, (-1, 1)))
        return self.z
        
    def compute_output_shape(self, input_shape):
#         return input_shape[0], MAX_SEQUENCE, EMBEDDING_DIM
        return input_shape[0], input_shape[-1]

In [30]:
import tensorflow as tf

In [32]:
# https://gist.github.com/cbaziotis/7ef97ccf71cbc14366835198c09809d2
# other reference : 
# https://gist.github.com/cbaziotis/6428df359af27d58078ca5ed9792bd6d

def dot_product(x, kernel):
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel)
    
class Zlayer(Layer):
    def __init__(self,
        w1_regularizer=None, w2_regularizer=None,
        w1_constraint=None, w2_constraint=None,
        bias=True, **kwargs):
            
        self.supports_masking = False
        self.init = initializers.get('uniform')
        
        self.w1_regularizer = regularizers.get(w1_regularizer)
        self.w2_regularizer = regularizers.get(w2_regularizer)
        
        self.w1_constraint = constraints.get(w1_constraint)
        self.w2_constraint = constraints.get(w2_constraint)
    
        self.bias = bias
        
        super(Zlayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3
    
        self.w1 = self.add_weight((input_shape[-1], 1,),
                                  initializer=self.init,
                                  name='{}_w1'.format(self.name),
                                  regularizer=self.w1_regularizer,
                                  constraint=self.w1_constraint
                                )
            
        self.w2 = self.add_weight((input_shape[-1],),
                                initializer=self.init,
                                name='{}_w2'.format(self.name),
                                regularizer=self.w2_regularizer,
                                constraint=self.w2_constraint)
        
        if self.bias:
            self.b = self.add_weight((input_shape[-1],),
                                    initializer='zero',
                                    name='{}_b'.format(self.name),
                                    regularizer=self.b_regularizer,
                                    constraint=self.b_constraint)
            
        
    
        super(Zlayer, self).build(input_shape)
    
    def compute_mask(self, input, input_mask=None):
        return None
        
    def call(self, x, mask=None):
        # sampling first
        # we got x. Apply 
        # x
        
        self.z_sampling = K.sigmoid(tf.matmul(w))
        # stop gradient
        
        # 
            
        return z
        
    def compute_output_shape(self, input_shape):
        return input_shape[0], input_shape[-1]
    
    def sample_all(self, x):
        # x is already the hidden unit
        return x
        

In [None]:
# z-layer