In [7]:
import os
import numpy as np
import time
import tensorflow as tf
import datetime
import matplotlib.pyplot as plt
seed = 5693
np.random.seed(seed)
tf.random.set_seed(seed)
NUM_OF_CATEGORY = 10

In [8]:
mnist = tf.keras.datasets.fashion_mnist
# mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_final_test, y_final_test = x_test, y_test

x_test = x_train[50000:60000]
x_train = x_train[0:50000]
y_test = y_train[50000:60000]
y_train = y_train[0:50000]

x_train, x_test, x_final_test = np.reshape(x_train / 255.0, (int(tf.shape(x_train)[0]),-1)), np.reshape(x_test / 255.0, (int(tf.shape(x_test)[0]),-1)), np.reshape(x_final_test / 255.0, (int(tf.shape(x_final_test)[0]),-1))
one_hot_y = lambda t: [1 if i == t else 0 for i in range(NUM_OF_CATEGORY)]
y_train, y_test, y_final_test = np.array([one_hot_y(y) for y in y_train]), np.array([one_hot_y(y) for y in y_test]), np.array([one_hot_y(y) for y in y_final_test])


In [9]:
class MLP(tf.keras.Model):
    def __init__(self, size_input, size_hidden, size_output, device=None):
        super(MLP, self).__init__()
        """
        size_input: int, size of input layer
        size_hidden: int, size of hidden layer
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
        """
        self.size_input, self.size_hidden, self.size_output, self.device =\
        size_input, size_hidden, size_output, device

        # Initialize weights between input layer and hidden layer 1
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden], seed=seed, stddev=0.1))
        # Initialize biases for hidden layer 1
        self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden], seed=seed))
         # Initialize weights between hidden layer 1 and hidden layer 2
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden, self.size_hidden], seed=seed, stddev=0.1))
        # Initialize biases for hidden layer 2
        self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden], seed=seed))
         # Initialize weights between hidden layer 2 and output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden, self.size_output], seed=seed, stddev=0.1))
        # Initialize biases for output layer
        self.b3 = tf.Variable(tf.random.normal([1, self.size_output], seed=seed))

        # Define variables to be updated during backpropagation
        self.MLP_variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
        # self.MLP_variables = [self.W1, self.W3, self.b1, self.b3]
        
        # Initialize parameters for Stochastic optimization
        self.beta1 = 0.9
        self.beta2 = 0.999
        self.beta3 = 0.999987
        self.epsilon = pow(10,-8)
        # Initialize variables for Stochastic optimization
        self.t = 0
        self.m = []
        self.v = []
        self.u = []
        for var in self.MLP_variables:
            self.m.append(tf.Variable(tf.zeros_like(var), trainable=False))
            self.v.append(tf.Variable(tf.zeros_like(var), trainable=False))
            self.u.append(tf.Variable(tf.zeros_like(var), trainable=False))
        
    
    def forward(self, X, dropout_rate=0):
        """
        forward pass
        X: Tensor, inputs
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
                self.y = self.compute_output(X, dropout_rate=dropout_rate)
        else:
            self.y = self.compute_output(X, dropout_rate=dropout_rate)

        return self.y
  
    def loss(self, y_pred, y_true, L1=0, L2=0):
        '''
        y_pred - Tensor of shape (batch_size, size_output)
        y_true - Tensor of shape (batch_size, size_output)
        '''
        y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        l2_penlty = (tf.nn.l2_loss(self.W1)+tf.nn.l2_loss(self.W2)+tf.nn.l2_loss(self.W3))*L2
        loss_with_l2 = l2_penlty+tf.losses.categorical_crossentropy(y_true_tf, y_pred_tf)
#         print("y_true_tf",y_true_tf,"y_pred_tf",y_pred_tf)
        return loss_with_l2
  
    def backward(self, X_train, y_train, dropout_rate, learning_rate, L1=0, L2=0):
        """
        backward pass
        """
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train,dropout_rate=dropout_rate)
            current_loss = self.loss(predicted, y_train, L2=L2)
        grads = tape.gradient(current_loss, self.MLP_variables)
        
        self.t += 1
        for i in range(len(self.MLP_variables)):
            
            self.m[i].assign(self.beta1*self.m[i]+(1-self.beta1)*grads[i])
            self.v[i].assign(self.beta2*self.v[i]+(1-self.beta2)*grads[i]*grads[i])
            self.u[i].assign(self.beta3*self.u[i]+(1-self.beta3)*grads[i]*grads[i]*grads[i])
            
            
            m_hat = self.m[i]/(1-pow(self.beta1,self.t))
            v_hat = self.v[i]/(1-pow(self.beta2,self.t))
            u_hat = self.u[i]/(1-pow(self.beta3,self.t))
            
            update_var = self.MLP_variables[i] - learning_rate*m_hat/(tf.pow(v_hat, 1/2)+tf.sign(u_hat)*tf.pow(tf.abs(u_hat), 1/3)*pow(10,-8)+pow(10,-8))
            self.MLP_variables[i].assign(update_var)

        return current_loss, predicted
        
        
    def compute_output(self, X, dropout_rate=0):
        """
        Custom method to obtain output tensor during forward pass
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        #Remember to normalize your dataset before moving forward
        # Compute values in hidden layer 1
        what = tf.matmul(X_tf, self.W1) + self.b1
        hhat = tf.nn.relu(what)
        # Implement Dropout
#         hhat = tf.nn.dropout(hhat, rate = dropout_rate, seed=seed)

        # Compute values in hidden layer 2
        what = tf.matmul(hhat, self.W2) + self.b2
        hhat = tf.nn.relu(what)
        # Implement Dropout
#         hhat = tf.nn.dropout(hhat, rate = dropout_rate, seed=seed)

        # Compute output
        output = tf.matmul(hhat, self.W3) + self.b3
        #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
        #Second add tf.Softmax(output) and then return this variable
        return tf.nn.softmax(output)
#         return output


In [10]:
result_for_ten = []
for s in range(10):
    print(s,"!!!!!!!!!!!!!!!!!")
    DROPOUT_RATE = 0
    BATCH_SIZE = 200
    HIDDEN_SIZE = 128
    NUM_EPOCHS = 20
    LEARNING_RATE = 0.001
    L2_PENLTY = 0

    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    path = f"OPT-SELFALG_SEED_{s}-LearnRate_{LEARNING_RATE}-L2_{L2_PENLTY}-TIME_{current_time}"

#     Set log summary

    train_log_dir = 'logs/mnist_fashion/' + path + '/train'
    test_log_dir = 'logs/mnist_fashion/' + path + '/test'
#     train_log_dir = 'logs/mnist/' + path + '/train'
#     test_log_dir = 'logs/mnist/' + path + '/test'
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    test_summary_writer = tf.summary.create_file_writer(test_log_dir)


    size_input = int(tf.shape(x_train)[1])
    size_hidden = HIDDEN_SIZE
    size_output = int(tf.shape(y_train)[1])
    number_of_train_examples = int(tf.shape(x_train)[0])
    number_of_test_examples = int(tf.shape(x_test)[0])


    # print("size_input",size_input)
    # print("size_output",size_output)
    # print("number_of_train_examples",number_of_train_examples)
    # print("number_of_test_examples",number_of_test_examples)


    mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='cpu')
    time_start = time.time()
    epoch = 1
    loss_diff,last_loss = 1,0

    while epoch <= NUM_EPOCHS and abs(loss_diff) > 0.00001:
        loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
        train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(BATCH_SIZE+BATCH_SIZE//4, seed=epoch*(seed)).batch(BATCH_SIZE)
        for inputs, outputs in train_ds:
            cur_loss, preds = mlp_on_gpu.backward(inputs, outputs, dropout_rate=DROPOUT_RATE, learning_rate=LEARNING_RATE, L2=L2_PENLTY)
            loss_total_gpu += cur_loss
      # Calculate Accuracy
        train_accuracy, test_accuracy = tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.CategoricalAccuracy()
        train_accuracy.update_state(y_train, mlp_on_gpu.forward(x_train))
        test_accuracy.update_state(y_test, mlp_on_gpu.forward(x_test))
        train_loss = np.sum(loss_total_gpu) / x_train.shape[0]
        test_loss = np.sum(mlp_on_gpu.loss(mlp_on_gpu.forward(x_test), y_test)) / x_test.shape[0]

        with train_summary_writer.as_default():
            tf.summary.scalar('loss', train_loss, step=epoch)
            tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)

        with test_summary_writer.as_default():
            tf.summary.scalar('loss', test_loss, step=epoch)
            tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch)

        
        
        loss_diff = train_loss - last_loss
        last_loss = train_loss
        print(f'Number of Epoch = {epoch} - Training Cross Entropy:= {np.sum(loss_total_gpu) / x_train.shape[0]} - Training Accuracy:= {train_accuracy.result().numpy()} - Test Accuracy:= {test_accuracy.result().numpy()}')
        time_taken = time.time() - time_start
        print('Time taken (in seconds): {:.2f}'.format(time_taken))
        time_start = time.time()
        epoch += 1

    # record loss and accuracy for final test set:
    final_test_loss = np.sum(mlp_on_gpu.loss(mlp_on_gpu.forward(x_final_test), y_final_test)) / x_final_test.shape[0]
    final_test_acc = tf.keras.metrics.CategoricalAccuracy()
    final_test_acc.update_state(y_final_test, mlp_on_gpu.forward(x_final_test))
    result_for_ten.append([final_test_loss,final_test_acc.result().numpy()])
#     print(result_for_ten)
    
    

0 !!!!!!!!!!!!!!!!!
Number of Epoch = 1 - Training Cross Entropy:= 0.6361574609375 - Training Accuracy:= 0.8408200740814209 - Test Accuracy:= 0.8286000490188599
Time taken (in seconds): 7.50
Number of Epoch = 2 - Training Cross Entropy:= 0.419225 - Training Accuracy:= 0.8639600276947021 - Test Accuracy:= 0.8546000123023987
Time taken (in seconds): 6.96
Number of Epoch = 3 - Training Cross Entropy:= 0.368776875 - Training Accuracy:= 0.8731800317764282 - Test Accuracy:= 0.863800048828125
Time taken (in seconds): 7.33
Number of Epoch = 4 - Training Cross Entropy:= 0.341617890625 - Training Accuracy:= 0.8844600319862366 - Test Accuracy:= 0.8710000514984131
Time taken (in seconds): 7.06
Number of Epoch = 5 - Training Cross Entropy:= 0.31869255859375 - Training Accuracy:= 0.8883400559425354 - Test Accuracy:= 0.8717000484466553
Time taken (in seconds): 7.20
Number of Epoch = 6 - Training Cross Entropy:= 0.30041802734375 - Training Accuracy:= 0.8984400629997253 - Test Accuracy:= 0.877400040626

Number of Epoch = 9 - Training Cross Entropy:= 0.25737583984375 - Training Accuracy:= 0.9118000864982605 - Test Accuracy:= 0.8839000463485718
Time taken (in seconds): 6.48
Number of Epoch = 10 - Training Cross Entropy:= 0.248928046875 - Training Accuracy:= 0.9127200841903687 - Test Accuracy:= 0.8848000168800354
Time taken (in seconds): 6.28
Number of Epoch = 11 - Training Cross Entropy:= 0.241024453125 - Training Accuracy:= 0.9171200394630432 - Test Accuracy:= 0.8851000666618347
Time taken (in seconds): 6.50
Number of Epoch = 12 - Training Cross Entropy:= 0.231288828125 - Training Accuracy:= 0.9164400696754456 - Test Accuracy:= 0.8826000690460205
Time taken (in seconds): 6.59
Number of Epoch = 13 - Training Cross Entropy:= 0.2220045703125 - Training Accuracy:= 0.9248800873756409 - Test Accuracy:= 0.8873000144958496
Time taken (in seconds): 6.36
Number of Epoch = 14 - Training Cross Entropy:= 0.21379455078125 - Training Accuracy:= 0.9241800904273987 - Test Accuracy:= 0.8872000575065613


Number of Epoch = 17 - Training Cross Entropy:= 0.18955412109375 - Training Accuracy:= 0.9303000569343567 - Test Accuracy:= 0.8890000581741333
Time taken (in seconds): 6.80
Number of Epoch = 18 - Training Cross Entropy:= 0.18356037109375 - Training Accuracy:= 0.9301600456237793 - Test Accuracy:= 0.8858000636100769
Time taken (in seconds): 6.47
Number of Epoch = 19 - Training Cross Entropy:= 0.175531640625 - Training Accuracy:= 0.9346200823783875 - Test Accuracy:= 0.8908000588417053
Time taken (in seconds): 6.29
Number of Epoch = 20 - Training Cross Entropy:= 0.1703946484375 - Training Accuracy:= 0.9366400837898254 - Test Accuracy:= 0.8895000219345093
Time taken (in seconds): 6.51
5 !!!!!!!!!!!!!!!!!
Number of Epoch = 1 - Training Cross Entropy:= 0.675336796875 - Training Accuracy:= 0.8413400650024414 - Test Accuracy:= 0.8336000442504883
Time taken (in seconds): 6.56
Number of Epoch = 2 - Training Cross Entropy:= 0.4229575390625 - Training Accuracy:= 0.8660600781440735 - Test Accuracy:=

Number of Epoch = 5 - Training Cross Entropy:= 0.31472658203125 - Training Accuracy:= 0.8920600414276123 - Test Accuracy:= 0.8759000301361084
Time taken (in seconds): 7.49
Number of Epoch = 6 - Training Cross Entropy:= 0.296533203125 - Training Accuracy:= 0.8987800478935242 - Test Accuracy:= 0.8806000351905823
Time taken (in seconds): 7.25
Number of Epoch = 7 - Training Cross Entropy:= 0.2819271875 - Training Accuracy:= 0.9036000370979309 - Test Accuracy:= 0.8820000290870667
Time taken (in seconds): 6.94
Number of Epoch = 8 - Training Cross Entropy:= 0.26929544921875 - Training Accuracy:= 0.9053200483322144 - Test Accuracy:= 0.8824000358581543
Time taken (in seconds): 7.37
Number of Epoch = 9 - Training Cross Entropy:= 0.25708189453125 - Training Accuracy:= 0.9116800427436829 - Test Accuracy:= 0.8859000205993652
Time taken (in seconds): 6.95
Number of Epoch = 10 - Training Cross Entropy:= 0.2481984765625 - Training Accuracy:= 0.9163200855255127 - Test Accuracy:= 0.8884000182151794
Time

Number of Epoch = 13 - Training Cross Entropy:= 0.2218828125 - Training Accuracy:= 0.9195600748062134 - Test Accuracy:= 0.8837000131607056
Time taken (in seconds): 7.40
Number of Epoch = 14 - Training Cross Entropy:= 0.2131425390625 - Training Accuracy:= 0.9230000376701355 - Test Accuracy:= 0.882900059223175
Time taken (in seconds): 7.00
Number of Epoch = 15 - Training Cross Entropy:= 0.2072130859375 - Training Accuracy:= 0.9230000376701355 - Test Accuracy:= 0.8820000290870667
Time taken (in seconds): 7.49
Number of Epoch = 16 - Training Cross Entropy:= 0.201791875 - Training Accuracy:= 0.9254600405693054 - Test Accuracy:= 0.8819000124931335
Time taken (in seconds): 7.16
Number of Epoch = 17 - Training Cross Entropy:= 0.194202890625 - Training Accuracy:= 0.9286400675773621 - Test Accuracy:= 0.8826000690460205
Time taken (in seconds): 7.14
Number of Epoch = 18 - Training Cross Entropy:= 0.18708087890625 - Training Accuracy:= 0.9291800856590271 - Test Accuracy:= 0.886400043964386
Time ta

In [11]:
result_for_ten

[[0.366101904296875, 0.88280004],
 [0.372931298828125, 0.88280004],
 [0.379409130859375, 0.88030005],
 [0.363632275390625, 0.88310003],
 [0.375222216796875, 0.88110006],
 [0.3708097412109375, 0.8801001],
 [0.372384619140625, 0.8823],
 [0.3681942138671875, 0.88220006],
 [0.365265966796875, 0.8834],
 [0.36305341796875, 0.88060004]]

In [12]:
print(sum(map(lambda x: x[0], result_for_ten))/len(result_for_ten),
      sum(map(lambda x: x[1], result_for_ten))/len(result_for_ten))

0.369700478515625 0.8818700432777404
