<a href="https://colab.research.google.com/github/codewithhari98/Machine_Learning/blob/main/Neural_network_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [78]:
#https://www.youtube.com/watch?v=RSl87lqOXDE&ab_channel=CoreySchafer
#https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce654
#https://www.youtube.com/playlist?list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi
#http://neuralnetworksanddeeplearning.com/chap1.html
#https://www.educative.io/answers/one-hot-encoding-in-python
#https://e2eml.school/softmax.html
import numpy as np
import pickle
import pandas as pd
from keras.datasets import mnist
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
class Layer:
  def __init__(self,inputsize,outputsize,weights_arr=0,bias_arr=0):
    self.input=inputsize
    self.output=outputsize
    if(weights_arr!=0 and bias_arr!=0):
          self.weights=weights_arr
          self.bias=bias_arr
    else:
          self.weights = np.random.rand(inputsize, outputsize) - 0.5
          self.bias = np.random.rand(1, outputsize) - 0.5
    self.z=0
  def forward(self, input):
    raise NotImplementedError
  def backward(self, output_error, learning_rate):
     raise NotImplementedError
    

In [79]:
class linearlayer(Layer):
  def forward(self,input):
    self.input_data=input
    f=np.dot(self.input_data, self.weights) + self.bias
    #print("input:  ",self.input_data)
    #print("weights:  ",self.weights)
    #print("bias:  ",self.bias)
    return f
  def backward(self,output_error,learning_rate):
    #diff of function f wrt weights gives input data 
    dfw=self.input_data
    dfx=self.weights
    #calculating sensitivity of cost wrt previous activation layer
    input_error = np.dot(output_error, dfx.T)
    #calculating sensitivity of cost wrt change in weights
    weights_error = np.dot(dfw.T, output_error)
    
    self.weights=self.weights-learning_rate*weights_error
    self.bias=self.bias-learning_rate*output_error

    #print("weights====", self.weights)
    #this returns the sensitivity of the cost func wrt previous activation func
    #which will be useful to calc the change in weights needed at inp
    return input_error

In [80]:
class sigmoid(Layer):
  def __init__(self):
    pass
  def forward(self,input):
    self.input_data=input
    #self.z=(np.dot(self.input_data, self.weights) + self.bias)
    #return 1. / (1. + np.exp(-self.z))
    return 1. / (1. + np.exp(-self.input_data))
  def backward(self, output_error, learning_rate):
    #sig_prime= (1. / (1. + np.exp(-self.z)))*(1- (1. / (1. + np.exp(-self.z))))*output_error*self.weights.T 
    sig_prime= (1. / (1. + np.exp(-self.input_data)))*(1- (1. / (1. + np.exp(-self.input_data))))*output_error
    return sig_prime



In [81]:
class tanh(Layer):
  def __init__(self):
    pass
  def forward(self,input):
    self.input_data=input
    #self.z=(np.dot(self.input_data, self.weights) + self.bias)
    #tanh_val=(np.exp(self.z) - np.exp(-self.z)) / (np.exp(self.z) + np.exp(-self.z))
    tanh_val=(np.exp(self.input_data) - np.exp(-self.input_data)) / (np.exp(self.input_data) + np.exp(-self.input_data))
    return tanh_val
  def backward(self, output_error,learning_rate):
    #tanh_val=(np.exp(self.z) - np.exp(-self.z)) / (np.exp(self.z) + np.exp(-self.z))
    tanh_val=(np.exp(self.input_data) - np.exp(-self.input_data)) / (np.exp(self.input_data) + np.exp(-self.input_data))
    tanh_prime= (1-(tanh_val)**2)*output_error 
    #print("shape = ",tanh_prime.shape)
    #print("tanh_prime:    ",tanh_prime)
    return tanh_prime

In [82]:
class softmax(Layer):
  def forward(self,input):
    self.input_data=input
    self.z=(np.dot(self.input_data, self.weights) + self.bias)
    num = np.exp(self.z- np.max(self.z))
    return num / np.sum(num, axis=0, keepdims=True)
    
  #*****************************************************#
  def backward(self, probs, bp_err):
    dim = probs.shape[1]
    output = np.empty(probs.shape)
    for j in range(dim):
        d_prob_over_xj = - (probs * probs[:,[j]])  # i.e. prob_k * prob_j, no matter k==j or not
        d_prob_over_xj[:,j] += probs[:,j]   # i.e. when k==j, +prob_j
        output[:,j] = np.sum(bp_err * d_prob_over_xj, axis=1)
    return output

In [83]:
class crossentropy(Layer):
  def forward(pred, target):
    return -target * np.log(pred)
  def backward(pred, target):
    return target - pred

In [84]:
#to calculate the loss function and its derivative
#class output_error:
def mse(y_true, y_pred):
  return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
  return 2*(y_pred-y_true)/y_true.size;

In [85]:
class sequential_class(Layer):
  def __init__(self,loss=None,loss_grad=None):
    self.layers = []
    self.loss=loss
    self.loss_grad=loss_grad
  def add_layer(self,newlayer):
    self.layers.append(newlayer)
    return(self.layers)
  def display_weights(self):
    for layer in self.layers:
      print(layer.weights)
  def predict(self,input):
    samplesize=len(input)
    predictions=[]
    for i in range(samplesize):
      output=input[i]
      #print("output1= ",samplesize)
      for layer in self.layers:
        output = layer.forward(output)
        #print("output= "+str(layer),output)
      predictions.append(output)
    return predictions
  def fit(self, x_train, y_train, epochs, learning_rate,modelname=""):
    self.modelname=modelname
    samplesize=len(x_train)
    list_of_epochs = []
    list_of_errors = []
    for i in range(epochs):
      errorval=0
      for j in range(samplesize):
      # forward propagation
        output = x_train[j]
        #print(output)
        for layer in self.layers:
          #print(layer)
          output = layer.forward(output)
          #print("output forward shape: ",output.shape)
          # compute loss (for display purpose only)
        #print("ytrain.shape: ",y_train[j].shape)
        errorval += self.loss(y_train[j], output)
          # backward propagation
        error = self.loss_grad(y_train[j], output)
        for layer in reversed(self.layers):
          error = layer.backward(error, learning_rate)

      # calculate average error on all samples
      errorval /= samplesize
      list_of_errors.append(errorval)
      #print(len(list_of_errors))
      list_of_epochs.append(i)
      print('epoch %d/%d   error=%f' % (i+1, epochs, errorval))
    fig = plt.figure(figsize=[6,10])
    ax = fig.add_subplot(1,1, 1)
    ax.plot(list_of_epochs,list_of_errors, color='b', linestyle="-")
    ax.set_title(modelname)
    ax.set_xlabel('Epochs')
    ax.set_ylabel('Loss')

      # plt.figure()
      # plt.subplots_adjust(left=0.1,bottom=0.1, right=0.9, top=0.9, wspace=0.4, hspace=0.8)
      # plt.title('Training Plot for current set of parameters')
      # plt.plot(list_of_epochs,list_of_errors)
      # plt.ylabel('Loss')
      # plt.xlabel('Epochs')
        


In [86]:
#model1=linearlayer(5,4)
#print(model1.output)
#model=Layer()
#print("weights: ", model1.weights)
#print("bias: ",model1.bias)

In [87]:
#model1=sequential_class()
#val1=model1.add_layer(linearlayer(5,4))
#val2=model1.add_layer(linearlayer(2,2))
#val3=model1.add_layer(linearlayer(2,2))
#model1.display_weights()
#print(val3)
#print("weights: ", model1.weights)
#print("bias: ",model1.bias)

In [88]:
def get_one_hot(targets, nb_classes):
    res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
    return res.reshape(list(targets.shape)+[nb_classes])

In [89]:
def XOR_Test():
  x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
  y_train = np.array([[[0]], [[1]], [[1]], [[0]]])
  XOR_Model1=sequential_class(mse,mse_prime)
  XOR_Model1.add_layer(linearlayer(2,2))
  XOR_Model1.add_layer(sigmoid())
  XOR_Model1.add_layer(linearlayer(2,1))
  XOR_Model1.add_layer(sigmoid())
  XOR_Model1.fit(x_train, y_train, epochs=5000, learning_rate=0.01,modelname='XOR prediction using Sigmoid Training: ')
  out_sigmoid = XOR_Model1.predict(x_train)
  with open('XOR_solved_sigmoid.w', 'wb') as files:
     pickle.dump(XOR_Model1, files)
  print("XOR prediction using Sigmoid Training: ",out_sigmoid)
  print("\n----------------------------------------------------------\n")

  XOR_Model2=sequential_class(mse,mse_prime)
  XOR_Model2.add_layer(linearlayer(2,2))
  XOR_Model2.add_layer(tanh())
  XOR_Model2.add_layer(linearlayer(2,1))
  XOR_Model2.add_layer(tanh())
  XOR_Model2.fit(x_train, y_train, epochs=5000, learning_rate=0.01,modelname='XOR prediction using Tanh Training: ')
  out_tanh = XOR_Model2.predict(x_train)
  with open('XOR_solved_tanh.w', 'wb') as files:
     pickle.dump(XOR_Model2, files)
  print("XOR prediction using Tanh Training: ",out_tanh)
  print("\n----------------------------------------------------------\n")

In [90]:
def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

In [91]:
def mnist_train_test():

  (x_train, y_train), (x_test, y_test) = mnist.load_data()
  print("xtrain shape",len(x_train))
  print("xtest shape",len(x_test))
  # reshape and normalize input data
  x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
  x_train = x_train.astype('float32')
  x_train /= 255
  y_train = np_utils.to_categorical(y_train)
  # take some validation dataset aside from training dataset
  x_train, x_validation, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, stratify=y_train)
  x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
  x_test = x_test.astype('float32')
  x_test /= 255
  y_test = np_utils.to_categorical(y_test)
  
  print("\n\n Model 1")
  mnist_network_layer1 = sequential_class(mse,mse_prime)
  mnist_network_layer1.add_layer(linearlayer(28*28, 100))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
  mnist_network_layer1.add_layer(tanh())
  mnist_network_layer1.add_layer(linearlayer(100, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
  mnist_network_layer1.add_layer(tanh())
  mnist_network_layer1.add_layer(linearlayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
  mnist_network_layer1.add_layer(tanh())
  #print("x_train.shape", x_train.shape)
  #print("y_train.shape",y_train.shape)
  mnist_network_layer1.fit(x_train, y_train, epochs=10, learning_rate=0.1,modelname='Mnist Model 1: ')
  samples = 5
  for test, true in zip(x_test[:samples], y_test[:samples]):
      pred = mnist_network_layer1.predict(test)
      idx = np.argmax(pred)
      idx_true = np.argmax(true)
      print('mnist_network_layer1---pred: %s, true: %d' % (idx, idx_true))

  print("\n\n Model 2")
  mnist_network_layer2 = sequential_class(mse,mse_prime)
  mnist_network_layer2.add_layer(linearlayer(28*28, 50))                
  mnist_network_layer2.add_layer(sigmoid())
  mnist_network_layer2.add_layer(linearlayer(50, 50))                   
  mnist_network_layer2.add_layer(sigmoid())
  mnist_network_layer2.add_layer(linearlayer(50, 10))                    
  mnist_network_layer2.add_layer(tanh())
  mnist_network_layer2.fit(x_train, y_train, epochs=10, learning_rate=0.1,modelname='Mnist Model 2: ')
  for test, true in zip(x_test[:samples], y_test[:samples]):
      pred = mnist_network_layer2.predict(test)
      idx = np.argmax(pred)
      idx_true = np.argmax(true)
      print('mnist_network_layer2---pred: %s, true: %d' % (idx, idx_true))

  print("\n\n Model 3")
  mnist_network_layer3 = sequential_class(mse,mse_prime)
  mnist_network_layer3.add_layer(linearlayer(28*28, 200))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
  mnist_network_layer3.add_layer(tanh())
  mnist_network_layer3.add_layer(linearlayer(200, 100))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
  mnist_network_layer3.add_layer(tanh())
  mnist_network_layer3.add_layer(linearlayer(100, 50))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
  mnist_network_layer3.add_layer(tanh())
  mnist_network_layer3.add_layer(linearlayer(50, 25))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
  mnist_network_layer3.add_layer(tanh())
  mnist_network_layer3.add_layer(linearlayer(25, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
  mnist_network_layer3.add_layer(tanh())
  mnist_network_layer3.fit(x_train, y_train, epochs=10, learning_rate=2,modelname='Mnist Model 3: ')
  for test, true in zip(x_test[:samples], y_test[:samples]):
      pred = mnist_network_layer3.predict(test)
      idx = np.argmax(pred)
      idx_true = np.argmax(true)
      print(' mnist_network_layer3--- pred: %s, true: %d' % (idx, idx_true))


In [92]:
def playing_with_hyperparameters():
  (x_train, y_train), (x_test, y_test) = mnist.load_data()
  # reshape and normalize input data
  x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
  x_train = x_train.astype('float32')
  x_train /= 255
  y_train = np_utils.to_categorical(y_train)
  # take some validation dataset aside from training dataset
  x_train, x_validation, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, stratify=y_train)
  x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
  x_test = x_test.astype('float32')
  x_test /= 255
  y_test = np_utils.to_categorical(y_test)
  
  print("\n\n Hyperparameter Model 1")
  hyperparameter_network_layer1 = sequential_class(mse,mse_prime)
  hyperparameter_network_layer1.add_layer(linearlayer(28*28, 100, np.zeros([28*28,10]), np.zeros([1,10])))                
  hyperparameter_network_layer1.add_layer(tanh())
  hyperparameter_network_layer1.add_layer(linearlayer(100, 50))                   
  hyperparameter_network_layer1.add_layer(tanh())
  hyperparameter_network_layer1.add_layer(linearlayer(50, 10))                    
  hyperparameter_network_layer1.add_layer(tanh())
  hyperparameter_network_layer1.fit(x_train, y_train, epochs=10, learning_rate=0.1,modelname='Playing with Hyperparameter Model 1: ')
  samples = 5
  for test, true in zip(x_test[:samples], y_test[:samples]):
      pred = hyperparameter_network_layer1.predict(test)
      idx = np.argmax(pred)
      idx_true = np.argmax(true)
      print('mnist_network_layer1---pred: %s, true: %d' % (idx, idx_true))

  print("\n\n Hyperparameter Model 2")
  high = 10
  low = -10   
  hyperparameter_network_layer2 = sequential_class(mse,mse_prime)
  hyperparameter_network_layer2.add_layer(linearlayer(28*28, 100, np.random.rand(28*28,10) * (high - low) + low, np.random.rand(1,10) * (high - low) + low ))                
  hyperparameter_network_layer2.add_layer(tanh())
  hyperparameter_network_layer2.add_layer(linearlayer(100, 50))                   
  hyperparameter_network_layer2.add_layer(tanh())
  hyperparameter_network_layer2.add_layer(linearlayer(50, 10))                   
  hyperparameter_network_layer2.add_layer(tanh())
  hyperparameter_network_layer2.fit(x_train, y_train, epochs=10, learning_rate=0.1,modelname='Playing with Hyperparameter Model 2: ')
  samples = 5
  for test, true in zip(x_test[:samples], y_test[:samples]):
      pred = hyperparameter_network_layer1.predict(test)
      idx = np.argmax(pred)
      idx_true = np.argmax(true)
      print('mnist_network_layer1---pred: %s, true: %d' % (idx, idx_true))

  print("\n\n Hyperparameter Model 3")
  hyperparameter_network_layer3 = sequential_class(mse,mse_prime)
  hyperparameter_network_layer3.add_layer(linearlayer(28*28, 100))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
  hyperparameter_network_layer3.add_layer(tanh())
  hyperparameter_network_layer3.add_layer(linearlayer(100, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
  hyperparameter_network_layer3.add_layer(linearlayer(100, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
  hyperparameter_network_layer3.add_layer(tanh())
  hyperparameter_network_layer3.add_layer(linearlayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
  hyperparameter_network_layer3.add_layer(tanh())
  #print("x_train.shape", x_train.shape)
  #print("y_train.shape",y_train.shape)
  hyperparameter_network_layer3.fit(x_train, y_train, epochs=10, learning_rate=1,modelname='Playing with Hyperparameter Model 3: ')
  samples = 5
  for test, true in zip(x_test[:samples], y_test[:samples]):
      pred = hyperparameter_network_layer3.predict(test)
      idx = np.argmax(pred)
      idx_true = np.argmax(true)
      print('mnist_network_layer1---pred: %s, true: %d' % (idx, idx_true))


  print("\n\n Hyperparameter Model 4")
  hyperparameter_network_layer4 = sequential_class(mse,mse_prime)
  hyperparameter_network_layer4.add_layer(linearlayer(28*28, 100))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
  hyperparameter_network_layer4.add_layer(tanh())
  hyperparameter_network_layer4.add_layer(linearlayer(100, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
  hyperparameter_network_layer4.add_layer(tanh())
  hyperparameter_network_layer4.add_layer(linearlayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
  hyperparameter_network_layer4.add_layer(tanh())
  #print("x_train.shape", x_train.shape)
  #print("y_train.shape",y_train.shape)
  hyperparameter_network_layer4.fit(x_train, y_train, epochs=10, learning_rate=0.001,modelname='Playing with Hyperparameter Model 4: ')
  samples = 5
  for test, true in zip(x_test[:samples], y_test[:samples]):
      pred = hyperparameter_network_layer4.predict(test)
      idx = np.argmax(pred)
      idx_true = np.argmax(true)
      print('mnist_network_layer1---pred: %s, true: %d' % (idx, idx_true))

In [93]:
def main():
  print("...................................Doing Xor Testing...........................................\n\n")
  XOR_Test()
  print("..................................Training and testing on mnist Dataset.........................\n\n" )
  mnist_train_test()
  print("..................................Playing with Hyperparameters.........................\n\n" )
  playing_with_hyperparameters()

In [None]:
if __name__ == "__main__" :   
    main()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
epoch 50/5000   error=0.257391
epoch 51/5000   error=0.257352
epoch 52/5000   error=0.257315
epoch 53/5000   error=0.257278
epoch 54/5000   error=0.257242
epoch 55/5000   error=0.257206
epoch 56/5000   error=0.257171
epoch 57/5000   error=0.257136
epoch 58/5000   error=0.257102
epoch 59/5000   error=0.257069
epoch 60/5000   error=0.257036
epoch 61/5000   error=0.257004
epoch 62/5000   error=0.256972
epoch 63/5000   error=0.256940
epoch 64/5000   error=0.256909
epoch 65/5000   error=0.256878
epoch 66/5000   error=0.256848
epoch 67/5000   error=0.256818
epoch 68/5000   error=0.256789
epoch 69/5000   error=0.256760
epoch 70/5000   error=0.256731
epoch 71/5000   error=0.256703
epoch 72/5000   error=0.256675
epoch 73/5000   error=0.256648
epoch 74/5000   error=0.256620
epoch 75/5000   error=0.256593
epoch 76/5000   error=0.256567
epoch 77/5000   error=0.256541
epoch 78/5000   error=0.256515
epoch 79/5000   error=0.256489
epoch

# New Section