In [1]:
from keras.datasets import fashion_mnist
from matplotlib import pyplot as plt
import numpy as np
import math
import statistics
from sklearn.utils import shuffle

In [2]:
import load_data as ld
(train_data , train_labels , validation_data , validation_labels , test_data , test_labels) = ld.load()

In [3]:
np.random.seed(1234)

class FeedForwardNeuralNetwork():

  def __init__(self, input_dim, output_dim, hidden_dim, hidden_layers):
    self.input_dim = input_dim
    self.hidden_dim = hidden_dim
    self.output_dim = output_dim
    self.hidden_layers = hidden_layers
    self.L = hidden_layers
    self.initialize()

  def size(self):
    l_sizes = []
    l_sizes.append(self.input_dim)
    for m in range(self.hidden_layers):
      l_sizes.append(self.hidden_dim)
    l_sizes.append(self.output_dim)
    return l_sizes


  def initialize(self):
    size=self.size()
    self.A = [np.zeros((size[i + 1], 1)) for i in range(len(size) - 1)]
    self.H = [np.zeros((size[i + 1], 1)) for i in range(len(size) - 1)]
    self.Weights=[np.random.randn(size[i + 1], size[i])*np.sqrt(2/(size[i]+size[i+1])) for i in range(len(size) - 1)]
    self.Update_Weights=[np.zeros((size[i + 1], size[i])) for i in range(len(size) - 1)]
    self.Biases= [np.zeros((size[i + 1], 1)) for i in range(len(size) - 1)]
    self.Update_Biases= [np.zeros((size[i + 1], 1)) for i in range(len(size) - 1)]

  def sigmoid(self, x):
    return np.where(x >= 0, 1 / (1 + np.exp(-x)),np.exp(x) / (1 + np.exp(x)))

  def diff_sigmoid(self,x):
    fx = self.sigmoid(x)
    return fx * (1 - fx)

  def preactivation(self, w, h_prev, b):
    return (np.dot(w,h_prev) + b)

  def output_function(self, x): #stable_softmax
    z=x-x.max()
    num=np.exp(z)
    denom=np.sum(num,axis=0)
    return num/denom

  def forwardPropagation(self, input_vector):
    L = self.L   #total layers - input layer
    self.A[0] = self.Biases[0] + np.dot(self.Weights[0], input_vector)

    self.H[0] = self.sigmoid(self.A[0])

    for k in range(1,L):
      self.A[k] = np.array(self.preactivation(self.Weights[k], self.H[k-1], self.Biases[k]))
      self.H[k] = self.sigmoid(self.A[k])

    self.A[L] = np.array(self.preactivation(self.Weights[L], self.H[L-1], self.Biases[L]))
    self.H[L] = self.output_function(self.A[L])

    return self.H[L] #H[L] = y^


  def backPropagation(self, train_data,truelabel,prediction):
    L = self.L
    e = np.zeros((10,1))
    e[truelabel] = 1
    grad_Weights = [0]*(L+1)
    grad_Biases = [0]*(L+1)
    # a = -(e - self.H[L])
    a = -(e - prediction)
    for k in range(L, -1, -1):
      if k==0:
        grad_Weights[k] = a.dot((train_data).T)
      else:
        grad_Weights[k] = a.dot((self.H[k-1]).T)

      grad_Biases[k] = a
      if k!=0:
        second = self.diff_sigmoid(self.A[k-1])
        first = (self.Weights[k].T).dot(a)
        a =  np.multiply(first,second)

    return (grad_Weights,grad_Biases)

  def oneHot(self, num, size = 10):
    vec = [0]*size
    # print("num :",num)
    vec[num] = 1
    vec = np.array(vec)
    vec = vec.reshape(len(vec),1)
    return vec


  def calculate_error(self, test_data, test_labels):
    predictions = []
    count = 0
    for example in test_data:
      predictions.append(self.forwardPropagation(example.reshape(784,1)))
    predicted_labels = list(map(np.argmax, predictions))
    for p,l in zip(predicted_labels, test_labels):
      if p == l:
        count += 1
    # print("accuracy% : ", (count/len(test_data))*100)
    return (count/len(test_data))*100

  #============================================================================================================================================================================================================================================
  def trainingAlgo(self, train_data, train_labels, validation_data, validation_labels, opt = 'adam', gamma = 0.9, eta = 1e-4, batch_size = 1, max_epochs = 1,alpha = 0.1, eps = 1e-6, beta = 0.9):

    limit = len(train_data)
    # limit = 500
    N = train_data.shape[0]
    L = self.L
    # prev_w, prev_b = [0]*(L+1), [0]*(L+1)
    # v_w, v_b = [0]*(L+1), [0]*(L+1)

    if opt=='adam' or 'nadam':
      m_w,m_b = [0]*(L+1),[0]*(L+1)
      step = 1

    for i in range(max_epochs):
      g_w, g_b = [0]*(L+1), [0]*(L+1)
      count = 0
      X_train,y_train = shuffle(train_data,train_labels,random_state=0)
      # for x, y in zip(X_train, y_train):
      for x, y in zip(X_train[:limit], y_train[:limit]):
        print("=====================================================================================================")
        print("training for example number : ", count)
        print("\n")
        predictions=self.forwardPropagation(x.reshape(784,1))
        (grad_Weights, grad_Biases) = self.backPropagation(x.reshape(784,1),y,predictions)

        for j in range(L+1):
          g_w[j] = g_w[j] + grad_Weights[j]
          g_b[j] = g_b[j] + grad_Biases[j]
        count = count + 1

        if count % batch_size == 0 or count == N :

          if opt=='momentum':
            for j in range(0,L+1):
              self.Update_Weights[j] = gamma * self.Update_Weights[j] + (eta * grad_Weights[j])
              self.Weights[j] =self.Weights[j]-self.Update_Weights[j]

              self.Update_Biases[j] = gamma * self.Update_Biases[j] + (eta * grad_Biases[j])
              self.Biases[j]  = self.Biases[j] - self.Update_Biases[j]

          elif opt=='sgd':
            for j in range(L + 1):
              self.Weights[j]   = self.Weights[j]-(eta*grad_Weights[j])

              self.Biases[j]  = self.Biases[j]-(eta * grad_Biases[j])

          elif opt=='nestrov':
            for j in range(L + 1):
              self.Update_Weights[j] = (gamma * self.Update_Weights[j]) + (eta * g_w[j])
              self.Weights[j]  = self.Weights[j] - ( gamma * prev_w[j] + eta * g_w[j] )

              self.Update_Biases[j] = (gamma *  self.Update_Biases[j]) + (eta * g_b[j])
              self.Biases[j]  = self.Biases[j]- ( gamma *  self.Update_Biases[j] + eta * g_b[j] )

          elif opt=='rmsprop':
            for j in range(L + 1):
              self.Update_Weights[j] = beta * self.Update_Weights[j] + (1 - beta) * g_w[j]**2
              self.Weights[j] = ( 1 - eta*alpha ) * self.Update_Weights[j] - eta /( np.sqrt(self.Update_Weights[j])+ eps) * grad_Weights[j]

              self.Update_Biases[j]= beta *  self.Update_Biases[j] + (1 - beta) * g_b[j]**2
              self.Biases[j]  = self.Biases[j] -(eta / (np.sqrt( self.Update_Biases[j]) +eps)) * grad_Biases[j]

          elif opt=='adam':
            # pass
            beta1 = 0.9
            beta2 = 0.999
            for j in range(L + 1):

              self.Update_Weights[j] = beta2 * self.Update_Weights[j] + (1 - beta2) * g_w[j]**2
              m_w[j]=beta1*m_w[j] +(1-beta1) * g_w[j]
              m_w_hat=m_w[j]/(1-math.pow(beta1,step))
              v_w_hat=self.Update_Weights[j]/(1-math.pow(beta2,step))
              self.Weights[j]=(1-eta*alpha/N)*self.Weights[j] -(eta/(np.sqrt(v_w_hat)+eps))*m_w_hat

              self.Update_Biases[j] = beta2 * self.Update_Biases[j] + (1 - beta2) * g_b[j]**2
              m_b[j]=beta1*m_b[j] +(1-beta1) * g_b[j]
              m_b_hat=m_b[j]/(1-math.pow(beta1,step))
              v_b_hat=self.Update_Biases[j]/(1-math.pow(beta2,step))
              self.Biases[j] = self.Biases[j] -(eta / (np.sqrt(v_b_hat)+eps))* m_b_hat

          elif opt=='nadam':
            # pass
            beta1 = 0.9
            beta2 = 0.999
            for j in range(L + 1):
              self.Update_Weights[j] = beta2 * self.Update_Weights[j] + (1 - beta2) * g_w[j] ** 2
              m_w[j] = beta1 * m_w[j] + (1 - beta1) * g_w[j]
              m_w_hat = m_w[j] / (1 - math.pow(beta1, step))
              m_w_hat = beta1 * m_w_hat + ((1 - beta1) * g_w[j]) / (1 - math.pow(beta1, step))
              v_w_hat=self.Update_Weights[j]/(1-math.pow(beta2,step))
              self.Weights[j] = (1 - eta * alpha) * self.Weights[j] -(eta /( np.sqrt(v_w_hat) + eps)) * m_w_hat

              self.Update_Biases[j] = beta2 * self.Update_Biases[j] + (1 - beta2) * g_b[j] ** 2
              m_b[j] = beta1 * m_b[j] + (1 - beta1) * g_b[j]
              m_b_hat = beta1 * (m_b[j] / (1 - math.pow(beta1, step))) + ((1 - beta1) * g_b[j]) / (1 - math.pow(beta1, step))
              v_b_hat=self.Update_Biases[j]/(1-math.pow(beta2,step))
              self.Biases[j] = self.Biases[j]-(eta/(np.sqrt(v_b_hat)+eps))*m_b_hat

          else:
            pass

          g_w=[0]*(L+1)
          g_b=[0]*(L+1)
          step = step + 1
      train_acc = self.calculate_error(train_data, train_labels)
      val_acc = self.calculate_error(validation_data, validation_labels)
      print("train_acc : ",train_acc, ", val_acc : ", val_acc, " epoch : ", i)

    return

  #============================================================================================================================================================================================================================================


In [4]:
input_dim = train_data[0].reshape(784,1).shape[0]
output_dim, hidden_dim, hidden_layers = 10, input_dim, 8

In [None]:
eta = 1e-4
gamma = 0.9
optimizer = 'sgd'
batch_size = 1
max_epochs = 1
number_of_neurons_per_hidden_layer = 128
number_of_hidden_layers = 1

#=================================================================================================================================
# BUILDING AND TRAINING THE NEURAL NETWORK
#---------------------------------------------------------------------------------------------------------------------------------

ffnn = FeedForwardNeuralNetwork(784, 10, number_of_neurons_per_hidden_layer, number_of_hidden_layers)
ffnn.trainingAlgo(train_data, train_labels, validation_data, validation_labels, optimizer, gamma, eta, batch_size, max_epochs)
print("testing accuracy : ",ffnn.calculate_error(test_data, test_labels))

#=================================================================================================================================