In [1]:
import numpy as np
import tensorflow as tf
from sklearn import datasets
from sklearn.model_selection import train_test_split
import pylab as pl
from IPython import display
import sys
%matplotlib inline

In [2]:
class LSTM_cell(object):

    """
    LSTM cell object which takes 3 arguments for initialization.
    input_size = Input Vector size
    hidden_layer_size = Hidden layer size
    target_size = Output vector size

    """

    def __init__(self, input_size, hidden_layer_size, target_size):

        # Initialization of given values
        self.input_size = input_size
        self.hidden_layer_size = hidden_layer_size
        self.target_size = target_size


        self.Wf = tf.Variable(tf.zeros(
            [self.input_size, self.hidden_layer_size]))
        self.Uf = tf.Variable(tf.zeros(
            [self.hidden_layer_size, self.hidden_layer_size]))
        self.bf = tf.Variable(tf.zeros([self.hidden_layer_size]))        

        
        self.Wc = tf.Variable(tf.zeros(
            [self.input_size, self.hidden_layer_size]))
        self.Uc = tf.Variable(tf.zeros(
            [self.hidden_layer_size, self.hidden_layer_size]))
        self.bc = tf.Variable(tf.zeros([self.hidden_layer_size]))
        
          # Weights for output layers
        self.Wo = tf.Variable(tf.truncated_normal([self.hidden_layer_size, self.target_size], mean=0, stddev=.01))
        self.bo = tf.Variable(tf.truncated_normal([self.target_size], mean=0, stddev=.01))
        
        
        self._inputs = tf.placeholder(tf.float32,
                                      shape=[None, None, self.input_size],
                                      name='inputs')

        # Processing inputs to work with scan function
        self.processed_input = process_batch_input_for_RNN(self._inputs)

        '''
        Initial hidden state's shape is [1,self.hidden_layer_size]
        In First time stamp, we are doing dot product with weights to
        get the shape of [batch_size, self.hidden_layer_size].
        For this dot product tensorflow use broadcasting. But during
        Back propagation a low level error occurs.
        So to solve the problem it was needed to initialize initial
        hiddden state of size [batch_size, self.hidden_layer_size].
        So here is a little hack !!!! Getting the same shaped
        initial hidden state of zeros.
        '''

        self.initial_hidden = self._inputs[:, 0, :]
        self.initial_hidden= tf.matmul(
            self.initial_hidden, tf.zeros([input_size, hidden_layer_size]))
        
        
        self.initial_hidden=tf.stack([self.initial_hidden,self.initial_hidden])
    # Function for LSTM cell.
    def Lstm(self, previous_hidden_memory_tuple, x):
        """
        This function takes previous hidden state and memory tuple with input and
        outputs current hidden state.
        """
        
        previous_hidden_state,c_prev=tf.unstack(previous_hidden_memory_tuple)

        #Forget Gate
        f= tf.sigmoid(
            tf.matmul(x,self.Wf)+tf.matmul(previous_hidden_state,self.Uf) + self.bf 
        )
        

        c_ = tf.tanh(
            tf.matmul(x,self.Wc)+tf.matmul(previous_hidden_state,self.Uc) + self.bc 
        )
        Final Memory cell
        c= f*c_prev + (1-f)*c_
        
        #Current Hidden state
        current_hidden_state =c


        return tf.stack([current_hidden_state,c])

    # Function for getting all hidden state.
    def get_states(self):
        """
        Iterates through time/ sequence to get all hidden state
        """

        # Getting all hidden state throuh time
        all_hidden_states = tf.scan(self.Lstm,
                                    self.processed_input,
                                    initializer=self.initial_hidden,
                                    name='states')
        all_hidden_states=all_hidden_states[:,0,:,:]
        
        return all_hidden_states

    # Function to get output from a hidden layer
    def get_output(self, hidden_state):
        """
        This function takes hidden state and returns output
        """
        output = tf.matmul(hidden_state, self.Wo) + self.bo

        return output

    # Function for getting all output layers
    def get_outputs(self):
        """
        Iterating through hidden states to get outputs for all timestamp
        """
        all_hidden_states = self.get_states()

        all_outputs = tf.map_fn(self.get_output, all_hidden_states)

        return all_outputs


# Function to convert batch input data to use scan ops of tensorflow.
def process_batch_input_for_RNN(batch_input):
    """
    Process tensor of size [5,3,2] to [3,5,2]
    """
    batch_input_ = tf.transpose(batch_input, perm=[2, 0, 1])
    X = tf.transpose(batch_input_)

    return X


In [3]:
hidden_layer_size = 3
input_size = 1
target_size = 1

In [4]:
y = tf.placeholder(tf.float32, shape=[None, target_size],name='inputs')

In [5]:
#Initializing rnn object
rnn=LSTM_cell( input_size, hidden_layer_size, target_size)

In [6]:
#Getting all outputs from rnn
outputs = rnn.get_outputs()

In [7]:
#Getting final output through indexing after reversing
#last_output = tf.reverse(outputs,[True,False,False])[0,:,:]
last_output = outputs[-1]

In [8]:
#As rnn model output the final layer through Relu activation softmax is used for final output.
output=tf.nn.sigmoid(last_output)

In [9]:
#Computing the Cross Entropy loss 
cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y,logits=last_output))

In [10]:
# Trainning with Adadelta Optimizer
lr = tf.placeholder(dtype=tf.float32,shape=[])
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

In [11]:
#Calculatio of correct prediction and accuracy
correct_prediction = tf.equal(y,tf.round(output))
accuracy = (tf.reduce_mean(tf.cast(correct_prediction, tf.float32)))*100

In [12]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Dataset Preparation

In [13]:
k = 2
j = 0
while(1):
    j += 1
    choose = np.random.randint(0,1401,100)
    X=np.random.binomial(n=1,p=0.5,size=(64,k,1))
    Y = np.sum(X,1) % 2
    sess.run(train_step,feed_dict={rnn._inputs:X, y:Y,lr:0.001})
    X = np.random.binomial(n=1,p=0.5,size=(128,k,1))
    Y = np.sum(X,1) % 2
    Loss,acc = sess.run([cross_entropy,accuracy],feed_dict={rnn._inputs:X, y:Y}) 
    if Loss < 0.2 - k/100:
        print(acc)
        k += 1
    if k == 9:
        break
X_test = np.random.binomial(n=1, p=0.5, size=(2**16,16,1))
y_test = np.sum(X_test,1) % 2
Test_accuracy=str(sess.run(accuracy,feed_dict={rnn._inputs:X_test, y:y_test}))
print('Result',Test_accuracy,'Iterations:',j)


100.0
100.0
98.4375
97.65625
94.53125
98.4375
98.4375
Result 91.88385 Iterations: 5942
