<a href="https://colab.research.google.com/github/deepak-ucfknight/COT5405/blob/master/Binary_Entropy_loss_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# import headers and packages

import keras
from keras.datasets import mnist
from keras import backend as K
import numpy as np

In [0]:
# variables

batch_size = 32
num_classes = 1
epochs = 12

In [0]:
# Image Dimensions of MNIST

rows = 28
cols = 28

In [0]:
# loading MNIST dataset

(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, rows, cols)
    x_test = x_test.reshape(x_test.shape[0], 1, rows, cols)
    input_shape = (1, rows, cols)
else:
    x_train = x_train.reshape(x_train.shape[0], rows, cols, 1)
    x_test = x_test.reshape(x_test.shape[0], rows, cols, 1)
    input_shape = (rows, cols, 1)
    

In [0]:
# Data Normalization

x_train = x_train / 255;
x_test = x_test / 255;


In [0]:
# Reshape data

x_train = x_train.reshape(x_train.shape[0], -1).T
x_test = x_test.reshape(x_test.shape[0], -1).T
y_train = y_train.T
y_test = y_test.T


***Helper Functions***

In [0]:
# Function intitalize weights and bias to zero

def initialize_with_zeros(dim):
   w = np.zeros(shape=(dim, num_classes))
   b = 0
   return w,b

In [0]:
# Function to compute sigmoid

def sigmoid(z):
   return 1.0/(1.0+np.exp(-z))

In [0]:
# Function to compute sigmoid prime

def sigmoid_prime(z):
  return sigmoid(z)*(1-sigmoid(z))

In [0]:
# Function to compute Minibatches.

def mini_batches(X, Y, batchsize):
    for start_idx in range(0, X.shape[0] - batchsize + 1, batchsize):
        excerpt = slice(start_idx, start_idx + batchsize)
        yield X[excerpt], Y[excerpt]

In [0]:
# Function to compute binary entropy loss

def binary_entropy(w,b,X,Y):
  
  m = X.shape[1]
  A = sigmoid(np.dot(w.T, X) + b)
  cost = (- 1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A))) # binary entropy
 
  # binary entropy gradients computation
  dw = (1 / m) * np.dot(X, (A - Y).T)
  db = (1 / m) * np.sum(A - Y)
  
  grads = {"dw": dw,
            "db": db }
  
  cost = np.squeeze(cost)
    
  return grads, cost

In [0]:
# Function for back propogation of gradients
def optimize(w, b, X, Y, epochs, learning_rate, print_cost = False):
  
  costs = []
    
  for i in range(epochs):
 
    for batch in mini_batches(X.T, Y.T, batch_size):
 
       x_batch, y_batch = batch
       grads, cost = binary_entropy(w, b, x_batch.T, y_batch.T)
       
       # gradient descent
       dw = grads["dw"]
       db = grads["db"]
       w = w - learning_rate * dw 
       b = b - learning_rate * db

       
    costs.append(cost)

    if print_cost:
       print ("Loss after iteration %i: %f" % (i, cost))
            
  params = {"w": w,
            "b": b}

  grads = {"dw": dw,
               "db": db}

  return params, grads, costs

In [0]:
# Function for predicting the values

def predict(w, b, X):
   
   m = X.shape[1] #number of samples
   Y_prediction = np.zeros((1, m)) # shape of 1,60000 - contains whether 0 or 1 depending upong the confidence value
   w = w.reshape(X.shape[0], num_classes)
   A = sigmoid(np.dot(w.T, X) + b) # shape of 1,60000 - contains values from 0 to 1 denoting the confidence value
  
   for i in range(A.shape[1]):
      Y_prediction[0, i] = 1 if A[0, i] > 0.5 else 0  # storing 0 or 1 based on the confidence value of the predicted output
  
   return Y_prediction

In [0]:
def model(X_train, Y_train, X_test, Y_test, num_iterations=50, learning_rate=0.1, print_cost=False):
  

  predict_Y_train = Y_train;
  predict_Y_test = Y_test;
  
  # shape of predict_y_train and predict_y_test is 60000,10 similar to that of y_train and y_test categorical values
  predict_Y_train = keras.utils.to_categorical(predict_Y_train, 10)
  predict_Y_test = keras.utils.to_categorical(predict_Y_test, 10)
  
  
  models = { }  #dict to hold the trained models.
  
  for i in range(0,10):
      # set the digit to classify
      classifier_digit = i;
      
      # modify training labels to create single class classification
      y_train_mod = np.array(Y_train);
      y_train_mod = np.where(y_train_mod == classifier_digit, 1, 0)
      
      y_test_mod = np.array(Y_test);
      y_test_mod = np.where(y_test_mod == classifier_digit, 1, 0)
      
      # core of the model
      w, b = initialize_with_zeros(X_train.shape[0])
      parameters, grads, costs = optimize(w, b, X_train, y_train_mod, num_iterations, learning_rate, print_cost)
      w = parameters["w"]
      b = parameters["b"]

      # predicting the values based on trained weights and bias values
      Y_prediction_train = predict(w, b, X_train)
      Y_prediction_test = predict(w,b, X_test) 
      
      
      # we will store ones in the corresponding to indices to create array to similar to categorical data of y_train
      predict_Y_train[:,[i]] = Y_prediction_train.T
      predict_Y_test[:,[i]] = Y_prediction_test.T
      
       # train and test classifier accuracy for each digit
      print("train accuracy of classifer " + str(i) +" : {} %".format(100 - np.mean(np.abs(Y_prediction_train - y_train_mod)) * 100))
      print("test accuracy of classifer " + str(i) +" : {} %".format(100 - np.mean(np.abs(Y_prediction_test - y_test_mod)) * 100))
      

      d = { "costs": costs,
            "Y_prediction_test": Y_prediction_test, 
            "Y_prediction_train" : Y_prediction_train, 
            "w" : w, 
            "b" : b,
            "learning_rate" : learning_rate,
            "num_iterations": num_iterations }

      models[i] = d
    
    
  # coverting y_train from 60000,1 to 60000,10 i.e. to categorical 
  Y_train = keras.utils.to_categorical(Y_train, 10)
  Y_test = keras.utils.to_categorical(Y_test, 10)
  
    
  # overall test and train classifier accuracy of the network
  print("Overall train accuracy: {} %".format(100 - np.mean(np.abs(predict_Y_train - Y_train)) * 100))
  print("Overall test accuracy: {} %".format(100 - np.mean(np.abs(predict_Y_test - Y_test)) * 100))

  return models;
    
    
    
    

In [0]:
classifier = model(x_train, y_train, x_test, y_test, num_iterations = 12, learning_rate = 0.1, print_cost = False)

train accuracy of classifer 0 : 99.26333333333334 %
test accuracy of classifer 0 : 99.21 %
train accuracy of classifer 1 : 99.11833333333334 %
test accuracy of classifer 1 : 99.3 %
train accuracy of classifer 2 : 97.735 %
test accuracy of classifer 2 : 97.52 %
train accuracy of classifer 3 : 97.62166666666667 %
test accuracy of classifer 3 : 97.86 %
train accuracy of classifer 4 : 98.41666666666667 %
test accuracy of classifer 4 : 98.34 %
train accuracy of classifer 5 : 97.41333333333333 %
test accuracy of classifer 5 : 97.6 %
train accuracy of classifer 6 : 98.79 %
test accuracy of classifer 6 : 98.78 %
train accuracy of classifer 7 : 98.42166666666667 %
test accuracy of classifer 7 : 98.44 %
train accuracy of classifer 8 : 95.915 %
test accuracy of classifer 8 : 95.96 %
train accuracy of classifer 9 : 96.34833333333333 %
test accuracy of classifer 9 : 96.41 %
Overall train accuracy: 97.90433328598738 %
Overall test accuracy: 97.94200006872416 %
