<a href="https://colab.research.google.com/github/deepak-ucfknight/Machine_Learning_Spring19/blob/master/Softmax_regression_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#import headers and packages
import keras
from keras.datasets import mnist
from keras import backend as K
import numpy as np

In [0]:
#variables
batch_size = 32
num_classes = 10
epochs = 12

In [0]:
#Image Dimensions of MNIST
rows = 28
cols = 28

In [0]:
#loading MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, rows, cols)
    x_test = x_test.reshape(x_test.shape[0], 1, rows, cols)
    input_shape = (1, rows, cols)
else:
    x_train = x_train.reshape(x_train.shape[0], rows, cols, 1)
    x_test = x_test.reshape(x_test.shape[0], rows, cols, 1)
    input_shape = (rows, cols, 1)

In [0]:
# Data Normalization

x_train = x_train / 255;
x_test = x_test / 255;


In [0]:
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

In [0]:
# Reshape data

x_train = x_train.reshape(x_train.shape[0], -1).T
x_test = x_test.reshape(x_test.shape[0], -1).T
y_train = y_train.T
y_test = y_test.T

***Helper Functions***

In [0]:
# Function to compute Softmax

def softmax(z):
  z -= np.max(z)
  return np.exp(z) / np.sum(np.exp(z), axis = 0)

In [0]:
# manual way converting train and test labels to categorical values - 
# we have used keras conversion in this assignment, 
# but professor asked us to write the logic atlest

def convert_to_categorical(Y):
  categorical_y = np.zeros(shape=(Y.shape[0],num_classes))
  
  print(categorical_y.shape)
  
  for idx,sample in enumerate(categorical_y):
    sample[Y[idx]] = 1;
  
  return categorical_y
  

In [0]:
# Function to compute minibatches

def mini_batches(X, Y, batchsize):
    for start_idx in range(0, X.shape[0] - batchsize + 1, batchsize):
        excerpt = slice(start_idx, start_idx + batchsize)
        yield X[excerpt], Y[excerpt]

In [0]:
# Function to compute cross entropy loss
def cross_entropy(Y, Y_hat):
    return -np.mean(Y * np.log(Y_hat))

In [0]:
# Function to compute sigmoid
def sigmoid(z):
   return 1.0/(1.0+np.exp(-z))

In [0]:
# Function to compute sigmoid derivative
def sigmoid_prime(z):
  return sigmoid(z)*(1-sigmoid(z))

`**Model Architecture**`

https://github.com/deepak-ucfknight/Machine_Learning_Spring19/blob/master/network.png

In [0]:
# Final Model
def model_with_hidden_layer(X_train, Y_train, X_test, Y_test, epochs,learning_rate):
  
 
  
  n_x = X_train.shape[0]
  n_h = 64 # neurons in the  hidden  layer
  n_y = Y_train.shape[0]
  
 
  #initialization of weights
  W1 = np.random.randn(n_h, n_x) * 0.01
  b1 = np.zeros(shape=(n_h, 1))
  W2 = np.random.randn(n_y, n_h) * 0.01
  b2 = np.zeros(shape=(n_y, 1))
  
  
  #training
  for i in range(epochs):
     
     #splitting the training and test  set according to bacth sizes
     for batch in mini_batches(X_train.T, Y_train.T, batch_size):
 
        X, Y = batch
  
        X = X.T
    
        Y = Y.T
  
        m = X.shape[0]
        
        #forward propogation
        Z1 = np.dot(W1, X) + b1
        A1 = sigmoid(Z1)
        Z2 = np.dot(W2, A1) + b2
        A2 = softmax(Z2);

        cost = cross_entropy(Y, A2)
        
        #back propogation - gradient descent
        dZ2 = A2-Y

        dW2 = (1/m) * np.matmul(dZ2, A1.T)
        db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)

        dA1 = np.dot(W2.T, dZ2)
        dZ1 = dA1 * sigmoid_prime(Z1)
        dW1 = (1/m) * np.dot(dZ1, X.T)
        db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

        W2 = W2 - learning_rate * dW2
        b2 = b2 - learning_rate * db2
        W1 = W1 - learning_rate * dW1
        b1 = b1 - learning_rate * db1

     print("Epoch", i, "cost: ", cost)
    
  
  # final evaluation for training data
  Z1 = np.dot(W1, X_train) + b1
  A1 = sigmoid(Z1)
  Z2 = np.dot(W2, A1) + b2
  A2 = softmax(Z2)

  # final evaluation for test data
  Z3 = np.dot(W1, X_test) + b1
  A3 = sigmoid(Z3)
  Z4 = np.dot(W2, A3) + b2
  A4 = softmax(Z4)

  
  # converting the data to (samples, vector) format
  A4  =  A4.T
  A2  =  A2.T
  Y_train = Y_train.T
  Y_test  = Y_test.T
  
  # creating one hot encoding of the softmax ouptut
  Y_train_prediction = np.zeros_like(Y_train)
  Y_train_prediction[np.arange(len(A2)), A2.argmax(1)] = 1
  
  Y_test_prediction = np.zeros_like(Y_test)
  Y_test_prediction[np.arange(len(A4)), A4.argmax(1)] = 1
  
  
  # comparing accuracies
  print("train accuracy : {} %".format(100 - np.mean(np.abs(Y_train_prediction - Y_train)) * 100))
  print("test accuracy  : {} %".format(100 - np.mean(np.abs(Y_test_prediction - Y_test)) * 100))
      

  d = { "costs": cost,
        "Y_prediction_test": A4, 
        "Y_prediction_train" : A2, 
      }
  
  return d;

In [0]:
# Final Model - with no hidden layer - easy backprop
def model_with_no_hidden_layer(X_train, Y_train, X_test, Y_test, epochs,learning_rate):
  
 
  
  n_x = X_train.shape[0]
  n_h = 64 # neurons in the  hidden  layer
  n_y = Y_train.shape[0]
  
 
  #initialization of weights
  W1 = np.random.randn(n_y, n_x) * 0.01
  b1 = np.zeros(shape=(n_y, 1))
 
  
  
  #training
  for i in range(epochs):
     
     #splitting the training and test  set according to bacth sizes
     for batch in mini_batches(X_train.T, Y_train.T, batch_size):
 
        X, Y = batch
  
        X = X.T
    
        Y = Y.T
  
        m = X.shape[0]
        
        #forward propogation
        Z1 = np.dot(W1, X) + b1
        A1 = softmax(Z1);

        cost = cross_entropy(Y, A1)
        
        #back propogation - gradient descent
        dZ1 = A1-Y

        dW1 = (1/m) * np.dot(dZ1, X.T)
        db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

        W1 = W1 - learning_rate * dW1
        b1 = b1 - learning_rate * db1

     print("Epoch", i, "cost: ", cost)
    
  
  # final evaluation for training data
  Z1 = np.dot(W1, X_train) + b1
  A2 = softmax(Z1)

  # final evaluation for test data
  Z3 = np.dot(W1, X_test) + b1
  A4 = softmax(Z3)

  
  # converting the data to (samples, vector) format
  A4  =  A4.T
  A2  =  A2.T
  Y_train = Y_train.T
  Y_test  = Y_test.T
  
  # creating one hot encoding of the softmax ouptut
  Y_train_prediction = np.zeros_like(Y_train)
  Y_train_prediction[np.arange(len(A2)), A2.argmax(1)] = 1
  
  Y_test_prediction = np.zeros_like(Y_test)
  Y_test_prediction[np.arange(len(A4)), A4.argmax(1)] = 1
  
  
  # comparing accuracies
  print("train accuracy : {} %".format(100 - np.mean(np.abs(Y_train_prediction - Y_train)) * 100))
  print("test accuracy  : {} %".format(100 - np.mean(np.abs(Y_test_prediction - Y_test)) * 100))
      

  d = { "costs": cost,
        "Y_prediction_test": A4, 
        "Y_prediction_train" : A2, 
      }
  
  return d;

In [0]:
classifier = model_with_hidden_layer(x_train, y_train, x_test, y_test, 12, 0.1)

Epoch 0 cost:  0.23019639155660201
Epoch 1 cost:  0.22664210204209073
Epoch 2 cost:  0.21022312116952477
Epoch 3 cost:  0.17116987510963913
Epoch 4 cost:  0.12994574033607967
Epoch 5 cost:  0.10074231077307352
Epoch 6 cost:  0.08053182996860711
Epoch 7 cost:  0.06598196808650018
Epoch 8 cost:  0.055397952845652056
Epoch 9 cost:  0.04763771934760565
Epoch 10 cost:  0.0418592630249051
Epoch 11 cost:  0.037470041537769255
train accuracy : 97.18633331358433 %
test accuracy  : 97.27200008928776 %
