In [44]:
from numpy.core.arrayprint import format_float_scientific
import os
import numpy as np
import time
import tensorflow as tf
import keras
from sklearn.preprocessing import LabelBinarizer
seed = 1234
np.random.seed(seed)
tf.random.set_seed(seed)
dropout = False
L2 = False
FMINST = False
size_hidden1 = 1024
size_hidden2 = 512
regularizer_rate = 0.01
learning_rate = .01
drop_percent = 0.2
BATCH_SIZE = 100

In [19]:
def load_dataset():
  if FMINST:
    (X_train, y_train), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
  else:
    (X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

  X_train = X_train.astype(float) / 255.
  X_test = X_test.astype(float) / 255.

  X_train = X_train.reshape((X_train.shape[0],X_train.shape[1]*X_train.shape[2]))
  X_test = X_test.reshape((X_test.shape[0],X_test.shape[1]*X_test.shape[2]))

  X_train, X_val = X_train[:-10000], X_train[-10000:]
  y_train, y_val = y_train[:-10000], y_train[-10000:]

  ## Changing labels to one-hot encoded vector
  lb = LabelBinarizer()
  y_train = lb.fit_transform(y_train)
  y_test = lb.transform(y_test)
  y_val = lb.transform(y_val)

  return X_train, X_val, X_test, y_train, y_val, y_test


In [20]:
X_train, X_val, X_test, y_train, y_val, y_test = load_dataset()
size_input = X_train.shape[1]
size_output = y_train.shape[1]
number_of_train_examples = X_train.shape[0]
number_of_test_examples = X_test.shape[0]

In [21]:
class MLP(tf.keras.Model):
  def __init__(self, size_input, size_hidden1, size_hidden2, size_output, device=None):
    super(MLP, self).__init__()
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    #Hyper parameters: Number of nodes in layer 1, layer 2, learning rate, activation function
    # self.size_input = 32
    # self.size_hidden = 128
    # self.size_output = 1
    # self.device = 'gpu'
    self.size_input, self.size_hidden1, self.size_hidden2, self.size_output, self.device =\
    size_input, size_hidden1, size_hidden2,size_output, device
    
    # Initialize weights between input layer and hidden layer
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1]))
    # Initialize biases for hidden layer
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden1]))
     # Initialize weights between hidden layer and output layer
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2]))
    # Initialize biases for output layer
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden2]))

    self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output]))
    self.b3 = tf.Variable(tf.random.normal([1,self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.MLP_variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    #cross entropy add l1, l2
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pred_tf, labels=y_true_tf))
    if L2:
      regularizers = tf.nn.l2_loss(self.W1) + tf.nn.l2_loss(self.W2)
      loss = tf.reduce_mean(loss + regularizer_rate * regularizers)
    return loss
  
  def accuracy_score(self, y_pred, y_true):
    y_pred = tf.nn.softmax(y_pred)
    correct_prediction = tf.equal(tf.argmax(y_true,1), tf.argmax(y_pred,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    return accuracy

  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate= learning_rate)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      #print(predicted)
      current_loss = self.loss(predicted, y_train)

    grads = tape.gradient(current_loss, self.MLP_variables)
    optimizer.apply_gradients(zip(grads, self.MLP_variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    if dropout:
      hhat1 = tf.nn.dropout(hhat1,drop_percent)
    # Compute output
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    if dropout:
      hhat2 = tf.nn.dropout(hhat2,drop_percent)
    output = tf.matmul(hhat2, self.W3) + self.b3
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [22]:
X_train.shape[0]

50000

In [None]:
# Set number of epochs
NUM_EPOCHS = 10
# Initialize model using GPU
#mlp_on_gpu = MLP()
mlp_on_gpu = MLP(size_input, size_hidden1, size_hidden2, size_output)
time_start = time.time()
training_accuracy = []
for epoch in range(NUM_EPOCHS):
  loss_total = tf.Variable(0, dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(seed)).batch(BATCH_SIZE)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs)
    loss_total = loss_total + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs) 
  preds = mlp_on_gpu.forward(X_train)
  train_accuracy = mlp_on_gpu.accuracy_score(preds,y_train)
  print('Number of Epoch = {} - loss:= {:.4f} - Training Accuracy = {:.4f}'.format(epoch + 1, loss_total.numpy() / X_train.shape[0], train_accuracy.numpy()*100))
  val_pred = mlp_on_gpu.compute_output(X_val)
  val_accuracy = mlp_on_gpu.accuracy_score(val_pred,y_val)
  val_accuracy = val_accuracy * 100
  print ("Validation Accuracy = {:.4f}".format(val_accuracy.numpy()))

  test_pred = mlp_on_gpu.compute_output(X_test)
  test_accuracy = mlp_on_gpu.accuracy_score(test_pred,y_test)
  test_accuracy = test_accuracy*100
  print ("Test Accuracy = {:.4f}".format(test_accuracy.numpy()))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))

Number of Epoch = 1 - loss:= 2.0234 - Training Accuracy = 92.7100
Validation Accuracy = 91.5100
Test Accuracy = 91.0600
Number of Epoch = 2 - loss:= 0.1710 - Training Accuracy = 94.9240
Validation Accuracy = 92.4600
Test Accuracy = 92.1600
Number of Epoch = 3 - loss:= 0.0856 - Training Accuracy = 96.3000
Validation Accuracy = 92.9400
Test Accuracy = 92.7800
Number of Epoch = 4 - loss:= 0.0500 - Training Accuracy = 96.8980
Validation Accuracy = 92.9500
Test Accuracy = 92.5300
Number of Epoch = 5 - loss:= 0.0317 - Training Accuracy = 97.3340
Validation Accuracy = 93.0800
Test Accuracy = 92.7600
Number of Epoch = 6 - loss:= 0.0194 - Training Accuracy = 98.0620
Validation Accuracy = 93.0900
Test Accuracy = 92.9900
