In [1]:
import os
import numpy as np
import time
import tensorflow as tf
import datetime
import matplotlib.pyplot as plt
seed = 5693
np.random.seed(seed)
tf.random.set_seed(seed)

In [2]:
import numpy as np

train_val_test_file = np.load('./data/g2_train_val_test_data_lstar.npz')


x_train = train_val_test_file['train_x']
m_train = np.sum(train_val_test_file['train_m'], axis=1)
y_train = train_val_test_file['train_y']

x_val = train_val_test_file['val_x']
m_val = np.sum(train_val_test_file['val_m'], axis=1)
y_val = train_val_test_file['val_y']

x_test = train_val_test_file['test_x']
m_test = np.sum(train_val_test_file['test_m'], axis=1)
y_test = train_val_test_file['test_y']


one_hot_y = lambda t: [1 if i == t else 0 for i in range(2)]
y_train, y_val, y_test = np.array([one_hot_y(y) for y in y_train]), np.array([one_hot_y(y) for y in y_val]), np.array([one_hot_y(y) for y in y_test])




In [4]:
size_input = int(tf.shape(x_train)[1])
size_output = int(tf.shape(y_train)[1])
number_of_train_examples = int(tf.shape(x_train)[0])
number_of_test_examples = int(tf.shape(x_val)[0])
print(size_input,size_output,number_of_train_examples,number_of_test_examples)

14 2 236 160


236

In [14]:
class RNNModel(tf.keras.Model):
    def __init__(self, size_input, size_hidden, size_output, device=None):
        super(RNNModel, self).__init__()
        """
        size_input: int, size of input layer
        size_hidden: int, size of hidden layer
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
        """
        self.size_input, self.size_hidden, self.size_output, self.device =\
        size_input, size_hidden, size_output, device
        
        # Initialize weights between input layer and hidden layer 1
        self.W_xh = tf.Variable(tf.random.normal([self.size_input, self.size_hidden], seed=seed, stddev=0.1))
        # Initialize weights between hidden layer t-1 and hidden layer t
        self.W_hh = tf.Variable(tf.random.normal([self.size_hidden, self.size_hidden], seed=seed, stddev=0.1))
        self.b_h = tf.Variable(tf.random.normal([1, self.size_hidden], seed=seed))
        # Initialize weights between hidden layer and output layer
        self.W_hq = tf.Variable(tf.random.normal([self.size_hidden, self.size_output], seed=seed, stddev=0.1))
        self.b_q = tf.Variable(tf.random.normal([1, self.size_output], seed=seed))

        # Define variables to be updated during backpropagation
        self.RNN_variables = [self.W_xh, self.W_hh, self.b_h, self.W_hq, self.b_q]

    
    def forward(self, X, m):
        """
        forward pass
        X: Tensor, inputs
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
                self.y = self.compute_output(X, m)
        else:
            self.y = self.compute_output(X, m)

        return self.y
  
    def loss(self, y_pred, y_true, L1=0, L2=0):
        '''
        y_pred - Tensor of shape (batch_size, size_output)
        y_true - Tensor of shape (batch_size, size_output)
        '''
        y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        l2_penlty = (tf.nn.l2_loss(self.W_xh)+tf.nn.l2_loss(self.W_hh)+tf.nn.l2_loss(self.W_hq))*L2
        loss_with_l2 = l2_penlty+tf.losses.categorical_crossentropy(y_true_tf, y_pred_tf)
#         print("y_true_tf",y_true_tf,"y_pred_tf",y_pred_tf)
        return loss_with_l2
  
    def backward(self, X_train, y_train, m, learning_rate, L1=0, L2=0.1):
        """
        backward pass
        """
#         optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train, m)
            current_loss = self.loss(predicted, y_train, L2=L2)
        grads = tape.gradient(current_loss, self.RNN_variables)
        
        
        optimizer.apply_gradients(zip(grads, self.RNN_variables))
        return current_loss, predicted
        
        
    def compute_output(self, X, m):
        """
        Custom method to obtain output tensor during forward pass
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # Initialize hidden layer
        h = tf.zeros((X_tf.shape[0], self.size_hidden))
        for i in range(m):
            h = X_tf[0][i] * self.W_xh + tf.matmul(h, self.W_hh) + self.b_h
            h = tf.nn.relu(h)
        
        # Compute output
        output = tf.matmul(h, self.W_hq) + self.b_q
        #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
        #Second add tf.Softmax(output) and then return this variable
        return tf.nn.softmax(output)


In [16]:
BATCH_SIZE = 1
HIDDEN_SIZE = 6
NUM_EPOCHS = 10
LEARNING_RATE = 0.001


size_hidden = HIDDEN_SIZE
size_input = 1
size_output = int(tf.shape(y_train)[1])
number_of_train_examples = int(tf.shape(x_train)[0])
number_of_test_examples = int(tf.shape(x_val)[0])


rnn_on_gpu = RNNModel(size_input, size_hidden, size_output, device='cpu')
time_start = time.time()
epoch = 1
loss_diff,last_loss = 1,0

while epoch <= NUM_EPOCHS and abs(loss_diff) > 0.00001:
    loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
    train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train, m_train)).shuffle(BATCH_SIZE+BATCH_SIZE//4, seed=epoch*(seed)).batch(BATCH_SIZE)
    for inputs, outputs, m in train_ds:
        cur_loss, preds = rnn_on_gpu.backward(inputs, outputs, m.numpy()[0], learning_rate=LEARNING_RATE)
        loss_total_gpu += cur_loss
        
  # Calculate Accuracy
#     train_accuracy, test_accuracy = tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.CategoricalAccuracy()
#     train_accuracy.update_state(y_train, rnn_on_gpu.forward(x_train, m_train))
#     test_accuracy.update_state(y_val, rnn_on_gpu.forward(x_val, m_train))
    train_loss = np.sum(loss_total_gpu) / x_train.shape[0]
    # acc
    val_acc = 0
    train_acc = 0
    for i in range(len(x_train)):
        predicts = rnn_on_gpu.forward([x_train[i]], m_train[i])
        if (predicts[0][0] > predicts[0][1] and y_train[i][0] > y_train[i][1]) or (predicts[0][0] < predicts[0][1] and y_train[i][0] < y_train[i][1]):
            train_acc += 1
    for i in range(len(x_val)):
        predicts = rnn_on_gpu.forward([x_val[i]], m_val[i])
        if (predicts[0][0] > predicts[0][1] and y_val[i][0] > y_val[i][1]) or (predicts[0][0] < predicts[0][1] and y_val[i][0] < y_val[i][1]):
            val_acc += 1
#     test_loss = np.sum(rnn_on_gpu.loss(rnn_on_gpu.forward(x_val, m_train), y_val)) / x_val.shape[0]



    loss_diff = train_loss - last_loss
    last_loss = train_loss
    print(f'Number of Epoch = {epoch} - Training Cross Entropy:= {np.sum(loss_total_gpu) / x_train.shape[0]} - Training Accuracy:= {train_acc/len(x_train)} - Test Accuracy:= {val_acc/len(x_val)}')
    time_taken = time.time() - time_start
    print('Time taken (in seconds): {:.2f}'.format(time_taken))
    time_start = time.time()
    epoch += 1

# record loss and accuracy for final test set:
# final_test_loss = np.sum(rnn_on_gpu.loss(rnn_on_gpu.forward(x_final_test), y_final_test)) / x_final_test.shape[0]
# final_test_acc = tf.keras.metrics.CategoricalAccuracy()
# final_test_acc.update_state(y_final_test, rnn_on_gpu.forward(x_final_test))
# result_for_ten.append([final_test_loss,final_test_acc.result().numpy()])

    

Number of Epoch = 1 - Training Cross Entropy:= 0.17788093372926875 - Training Accuracy:= 0.9788135593220338 - Test Accuracy:= 0.9875
Time taken (in seconds): 18.81
Number of Epoch = 2 - Training Cross Entropy:= 0.17180761240296444 - Training Accuracy:= 0.9788135593220338 - Test Accuracy:= 0.9875
Time taken (in seconds): 18.12
Number of Epoch = 3 - Training Cross Entropy:= 0.16657974760411148 - Training Accuracy:= 0.9788135593220338 - Test Accuracy:= 0.9875
Time taken (in seconds): 18.00
Number of Epoch = 4 - Training Cross Entropy:= 0.16203060796705343 - Training Accuracy:= 0.9788135593220338 - Test Accuracy:= 0.9875
Time taken (in seconds): 17.98
Number of Epoch = 5 - Training Cross Entropy:= 0.15803548845194154 - Training Accuracy:= 0.9788135593220338 - Test Accuracy:= 0.9875
Time taken (in seconds): 18.71
Number of Epoch = 6 - Training Cross Entropy:= 0.15449826192047636 - Training Accuracy:= 0.9788135593220338 - Test Accuracy:= 0.9875
Time taken (in seconds): 17.95
Number of Epoch 

In [20]:
for i in range(len(x_train)):
#     if y_train[i] == []
    print(train_val_test_file['train_y'][i])
    

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


In [70]:
m_val

array([16, 19, 22, 22, 22, 16, 19, 13,  7, 13, 10, 19, 19, 19, 16,  7, 16,
       19, 16, 25, 16, 19, 22, 19, 10, 16, 22, 16, 16, 22, 19,  7, 16,  1,
       16,  4, 22, 22,  7, 13, 10, 19, 10, 19, 19, 19, 25, 22,  1, 13, 16,
       16, 13, 25, 25,  4,  4,  7, 10,  7, 10, 13, 19, 22, 16, 16, 22, 10,
       16, 22, 25,  4, 10, 13, 13, 25, 22, 22, 10, 10, 10,  7, 13,  4, 10,
        7,  7, 25, 13, 10,  4,  4, 25,  4,  4, 25,  7, 10, 25, 13,  7, 16,
       25, 10, 25,  7, 10, 25, 13, 19, 19, 13,  7, 16, 25, 13,  4, 16, 19,
       13, 10, 10, 10,  4,  4, 10, 10,  7, 13,  7, 19, 10, 22, 13,  4,  4,
       22,  7, 19, 25, 25, 13, 22, 25, 22, 25, 13, 16,  7,  7, 25, 25, 19,
       22,  7,  7, 22, 13,  4,  4])