<a href="https://colab.research.google.com/github/deepak-ucfknight/COT5405/blob/master/Softmax_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#import headers and packages
import keras
from keras.datasets import mnist
from keras import backend as K
import numpy as np

In [0]:
#variables
batch_size = 32
num_classes = 10
epochs = 12

In [0]:
#Image Dimensions of MNIST
rows = 28
cols = 28

In [0]:
#loading MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, rows, cols)
    x_test = x_test.reshape(x_test.shape[0], 1, rows, cols)
    input_shape = (1, rows, cols)
else:
    x_train = x_train.reshape(x_train.shape[0], rows, cols, 1)
    x_test = x_test.reshape(x_test.shape[0], rows, cols, 1)
    input_shape = (rows, cols, 1)

In [0]:
# Data Normalization

x_train = x_train / 255;
x_test = x_test / 255;


In [0]:
 y_train = keras.utils.to_categorical(y_train, 10)
 y_test = keras.utils.to_categorical(y_test, 10)
 

In [0]:
# Reshape data

x_train = x_train.reshape(x_train.shape[0], -1).T
x_test = x_test.reshape(x_test.shape[0], -1).T
y_train = y_train.T
y_test = y_test.T

***Helper Functions***

In [0]:
# Function intitalize weights and bias to zero

def initialize_with_zeros(dim):
   w = np.zeros(shape=(dim, num_classes))
   b = 0
   return w,b

In [0]:
# Function to compute Minibatches.
def mini_batches(X, Y, batchsize):
    for start_idx in range(0, X.shape[0] - batchsize + 1, batchsize):
        excerpt = slice(start_idx, start_idx + batchsize)
        yield X[excerpt], Y[excerpt]

In [0]:
def softmax(z):
  z -= np.max(z)
  return np.exp(z) / np.sum(np.exp(z), axis = 0)

In [0]:
def softmax_grad(z):
  s = z.reshape(-1,1)
  return np.diagflat(s) - np.dot(s, s.T)

In [0]:
def cross_entropy(w,b,X,Y):
 
  m = X.shape[1]
  A = softmax(np.dot(w.T, X) + b)
  
  cost = -np.mean(Y * np.log(A))
  
  
  
  dw = (-1 / m) * np.dot(X, (Y - A).T)
  db = (-1 / m) * np.sum(Y-A)
  
  grads = {"dw": dw,
            "db": db }
  
  cost = np.squeeze(cost)
    
  return grads, cost

In [0]:
# Function for back propogation of gradients
def optimize(w, b, X, Y, epochs, learning_rate, print_cost = False):
  
  costs = []
    
  for i in range(epochs):
 
    for batch in mini_batches(X.T, Y.T, batch_size):
 
       x_batch, y_batch = batch
       grads, cost = cross_entropy(w, b, x_batch.T, y_batch.T)
       dw = grads["dw"]
       db = grads["db"]
       w = w - learning_rate * dw 
       b = b - learning_rate * db

       
    costs.append(cost)

    if print_cost:
       print ("Loss after iteration %i: %f" % (i, cost))
            
  params = {"w": w,
            "b": b}

  grads = {"dw": dw,
               "db": db}

  return params, grads, costs

In [0]:
# Function for predicting the values
def predict(w, b, X):
  m = X.shape[1]
  Y_prediction = np.zeros((num_classes, m))
  w = w.reshape(X.shape[0], num_classes)
  A = softmax(np.dot(w.T, X) + b)
  
  Y_prediction[np.arange(len(A)), A.argmax(1)] = 1
  
  return Y_prediction

In [0]:
def model(X_train, Y_train, X_test, Y_test, num_iterations=50, learning_rate=0.1, print_cost=False):
  
  w, b = initialize_with_zeros(X_train.shape[0])
  parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
  w = parameters["w"]
  b = parameters["b"]


  Y_prediction_train = predict(w, b, X_train)
  Y_prediction_test = predict(w,b, X_test)
  
  
  print("train accuracy : {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
  print("test accuracy  : {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))
      

  d = { "costs": costs,
        "Y_prediction_test": Y_prediction_test, 
        "Y_prediction_train" : Y_prediction_train, 
        "w" : w, 
        "b" : b,
        "learning_rate" : learning_rate,
        "num_iterations": num_iterations }
  
  

In [534]:
classifier = model(x_train, y_train, x_test, y_test, num_iterations = 12, learning_rate = 0.01, print_cost = True)

Loss after iteration 0: 0.035348
Loss after iteration 1: 0.024838
Loss after iteration 2: 0.020828
Loss after iteration 3: 0.018670
Loss after iteration 4: 0.017292
Loss after iteration 5: 0.016317
Loss after iteration 6: 0.015578
Loss after iteration 7: 0.014990
Loss after iteration 8: 0.014506
Loss after iteration 9: 0.014097
Loss after iteration 10: 0.013745
Loss after iteration 11: 0.013436
train accuracy : 90.00166666666667 %
test accuracy  : 90.01 %
