# Single Neuron

In [0]:
import numpy as np

In [0]:
class Neuron():
  """ 
  Simple feed forward neuron

  Args
    num_input (Int) : number of input value
    activation_fn (callable) : activation function
  Attributes:
    W (ndarray) : weights value for each input
    b (float) : bias
    activation (callable) :activation function

  """

  def __init__(self,num_inputs,activation_fn):
    """ intialize weight and bias randomly"""
    self.W = np.random.rand(num_inputs)
    self.b = np.random.rand(1)
    self.activation = activation_fn

  def forward(self,X):
    z = np.dot(X,self.W) + self.b # matrix multiplication and add bias(z=x.W+b)
    result = self.activation(z) # apply activation function
    return result
    

In [0]:
x = np.random.rand(3).reshape(1, 3) # random array (1X3)

activation_function = lambda y: 0 if y <= 0 else 1 #if y negative or 0 return 0 else 1

perceptron = Neuron(num_inputs=x.size, activation_fn=activation_function) #create perceptron

perceptron.forward(x) # apply forward function

1

# Fully Connected Layer

In [0]:
import numpy

In [0]:
class FullyConnectedLayer():
  """
  Args
  num_input (int) : number of input
  layer_size (int) : layer size
  activation_fn (callable) : activation function

  Attributes
  W (ndarray) : matrix of weights (num_input X layer_size)
  b (ndarray) : array of bias
  activation_fn (callable): activation function

  """

  def __init__(self,num_input,layer_size,activation_fn):
    """
    initalize weight and bias
    """
    self.W = np.random.standard_normal(size=(num_input,layer_size))
    self.b = np.random.standard_normal(size=layer_size)
    self.activation_fn = activation_fn

  def forward(self,X):
    z = np.dot(X,self.W) + self.b
    result = self.activation_fn(z)
    return result




In [0]:
"""
input_size = 2 (x1,x2)
layer_size = 5
outpur_size = 5

"""

x = np.random.rand(2).reshape(1,2) # random array (1 X 2)

activation_fn = lambda z : np.maximum(z,0) #relu

layer = FullyConnectedLayer(x.size,5,activation_fn) # intialize layer

print(layer.forward(x)) # apply forward function

[[0.         1.63021409 1.35667062 0.         0.        ]]


In [0]:
# in above cell we see example with one input array
# let apply more than one input array on connected layer

x1 = np.random.rand(2).reshape(1,2) # random array (1 X 2)
x2 = np.random.rand(2).reshape(1,2) # random array (1 X 2)

batch = np.concatenate((x1,x2)) # stcak two array
layer.forward(batch)

array([[0.0625052 , 0.73118069, 0.19824003, 0.        , 0.        ],
       [0.        , 1.1124988 , 1.19359605, 0.        , 0.        ]])



> we get  matrix 2X5 which means 5 values for each input as we use 2 input array in input


> This result of concatenation of input array called batch


> In our example batch size is as we use input array at a time to forward operation




# Feed Forward Neural Network

In [0]:
class FullyConnectedLayer():
  """
  Args
  num_input (int) : number of input
  layer_size (int) : layer size
  activation_fn (callable) : activation function

  Attributes
  W (ndarray) : matrix of weights (num_input X layer_size)
  b (ndarray) : array of bias
  activation_fn (callable): activation function

  """

  def __init__(self,num_input,layer_size,activation_fn):
    """
    initalize weight and bias
    """
    self.W = np.random.standard_normal(size=(num_input,layer_size))
    self.b = np.random.standard_normal(size=layer_size)
    self.activation_fn = activation_fn

  def forward(self,X):
    z = np.dot(X,self.W) + self.b
    result = self.activation_fn(z)
    return result


class FeedForwardNN():
  """
  simple feed forward neural network

  Args
  num_input (int) : number of input values
  num_output (int) : number of output neuron
  hidden_layer_size (list) : list of size of each hidden layer
  activation_fn (callable) : activation function

  Attributes
  layers (list) : list of layers which will make neural network

  """

  def __init__(self,num_input,num_output,hidden_layer_size,activation_fn):
    """
    for simplycity we will use same activation in each layer
    """
    sizes = [num_input,*hidden_layer_size,num_output]
    self.layers = [FullyConnectedLayer(sizes[i],sizes[i+1],activation_fn)\
                   for i in range(len(sizes)-1)]
      
  def forward(self,X):
    """
    perform forward operation on each layer
    """
    for layer in self.layers:
      X = layer.forward(X)
    return X

  def predict(self,X):
    """
    call forward method and apply argmax on result to get index of result
    """
    estimation = self.forward(X)
    result = np.argmax(estimation)
    return result

  def evaluate_score(self,X_test,y_test):
    """
    evaluate accuracy on test set
    """
    correct_predictions = 0
    for i in range(len(X_test)):
      if self.predict(X_test[i]) == y_test[i]:
        correct_predictions += 1
    return correct_predictions / len(X_test) 

In [0]:
#activation fun
def sigmoid(x):
  return 1/(1+np.exp(-x))

In [6]:
!pip install mnist

Collecting mnist
  Downloading https://files.pythonhosted.org/packages/c6/c4/5db3bfe009f8d71f1d532bbadbd0ec203764bba3a469e4703a889db8e5e0/mnist-0.2.2-py2.py3-none-any.whl
Installing collected packages: mnist
Successfully installed mnist-0.2.2


In [0]:
# we will use mnist dataset for this feed forward networ
import numpy as np
import mnist

# Loading the training and testing data:
X_train, y_train = mnist.train_images(), mnist.train_labels()
X_test, y_test = mnist.test_images(), mnist.test_labels()

num_classes = 10
# classes are the digits from 0 to 9

# We transform the images into column vectors (as inputs for our NN):
X_train, X_test = X_train.reshape(-1, 28*28), X_test.reshape(-1, 28*28)  #(748,)

# We "one-hot" the labels
y_train_one_hot = np.zeros((y_train.size, y_train.max()+1))
y_train_one_hot[np.arange(y_train.size),y_train] = 1

In [0]:
y_train_one_hot[0].shape

(10,)

In [0]:
X_train[0].shape

(784,)

In [0]:
# here we are using feedforward network so there is no training we jusd pass input and get output

mnist_classifier = FeedForwardNN(X_train.shape[1],num_classes,[64,32],sigmoid)
accuracy = mnist_classifier.evaluate_score(X_train,y_train)

  This is separate from the ipykernel package so we can avoid doing imports until


In [0]:
print(accuracy*100)

10.148333333333333




> Here we get only 10.14% accuracy as we define neural network with random parameters.


> for better accuracy we need to train neural network





# Simple Neural Network With Back Propagation

In [0]:
class FullyConnectedLayer():
  """
  Args
  num_input (int) : number of input
  layer_size (int) : layer size
  activation_fn (callable) : activation function
  d_activation_fn (callable) : differentiation of activation function

  Attributes
  W (ndarray) : matrix of weights (num_input X layer_size)
  b (ndarray) : array of bias
  activation_fn (callable): activation function

  """

  def __init__(self,num_input,layer_size,activation_fn,d_activation_fn):
    """
    initalize weight and bias
    """
    self.W = np.random.standard_normal(size=(num_input,layer_size))
    self.b = np.random.standard_normal(size=layer_size)
    self.activation_fn = activation_fn
    self.d_activation_fn = d_activation_fn
  
  def forward(self,X):
    self.z = np.dot(X,self.W) + self.b
    self.y_hat = self.activation_fn(self.z)
    self.X = X # we store X  for backropagation
    return self.y_hat

  def backward(self,dL_dy):
    # dL_dy : derivation of loss function w.r.t y

    # derivation of activation function w.r.t z
    dy_dz = self.d_activation_fn(self.y_hat)

    # derivation of Loss function w.r.t to z (chain rule) dL/dz = dL/dy * dy/dz
    dL_dz = dL_dy * dy_dz 

    # derivation of z with respect to weight dz/dw = x  (x.w/w = x)
    dz_dw = self.X.T

    # derivation of z with respect to X dz/dx = w
    dz_dx = self.W.T

    # derivation of loss with respect to weight dL_d_w = dL_dy*dy_dz*dz_dw = dz_dw*dL_dz
    self.dL_dw = np.dot(dz_dw, dL_dz) #storing for updating weights

    # derivation of z w.r.t b  dz_db = d(W.x + b)/db = 0 + db/db = "ones"-vector
    dz_db = np.ones(dL_dy.shape[0]) #storing for updating bias
    self.dL_db = np.dot(dz_db, dL_dz)

    # Computing the derivative with respect to the input, to be passed to the previous layers (their `dL_dy`):
    dL_dx = np.dot(dL_dz, dz_dx)

    return dL_dx

  def optimize(self, learning_rate):
    """
    Optimize the layer's parameters w.r.t. the derivative values.
    """
    self.W -= learning_rate * self.dL_dw #update weights
    self.b -= learning_rate * self.dL_db #update bias
  
    

In [0]:
# sigmoid function
def sigmoid(x):     
    return 1 / (1 + np.exp(-x)) # y

# sigmoid derivative function
def derivated_sigmoid(y):   
    return y * (1 - y)

# L2 loss function
def loss_L2(pred, target):    
    return np.sum(np.square(pred - target)) / pred.shape[0] # opt. we divide by the batch size

# L2 derivative function
def derivated_loss_L2(pred, target):    
    return 2 * (pred - target)

# cross-entropy loss function
def cross_entropy(pred, target):    
    return -np.mean(np.multiply(np.log(pred), target) + np.multiply(np.log(1 - pred), (1 - target)))

# cross-entropy derivative function
def derivated_cross_entropy(pred, target):    
    return (pred - target) / (pred * (1 - pred))

In [0]:
class SimpleNeuralNetwork():
  """
  Args:

    num_input (int) : number of input values
    num_output (int) : number of output neuron
    hidden_layer_size (list) : list of size of each layer
    activation_fn (callable) : activation function
    d_activation_fn (callable) : derivative of activation function
    loss_fn (callable) : loss function to train this network
    d_loss_fn (callable) : The derivative of the loss function, for back-propagation

  Attributes:
    layers (list): list of size of each layer.
    loss_fn (callable): loss function to train this network.
    d_loss_fn (callable): The derivative of the loss function, for back-propagation.
  """

  def __init__(self,num_input,num_output,hidden_layer_size = [64,32],\
               activation_fn = sigmoid,d_activation_fn = derivated_sigmoid,\
               loss_fn=loss_L2,d_loss_fn=derivated_loss_L2):
    layer_sizes = [num_input, *hidden_layer_size, num_output]
    self.layers = [\
                   FullyConnectedLayer(layer_sizes[i], layer_sizes[i + 1], activation_fn, d_activation_fn)\
                   for i in range(len(layer_sizes) - 1)]
    self.activation_fn = activation_fn
    self.loss_fn = loss_fn
    self.d_loss_fn = d_loss_fn
  
  def forward(self,X):
    for layer in self.layers:
      X = layer.forward(X)
    return X
  
  def predict(self,X):
    z = self.forward(X)
    y_hat = np.argmax(self.activation_fn(z))
    return y_hat

  def backward(self,dL_dy):
    """
    backward operation Back propagation
    """
    for layer in reversed(self.layers):
      dL_dy = layer.backward(dL_dy)
    return dL_dy

  def optimize(self,learning_rate):
    for layer in self.layers:
      layer.optimize(learning_rate)

  def evaluate_score(self,X_test,y_test):
    """
    evaluate accuracy on test set
    """
    correct_predictions = 0
    for i in range(len(X_test)):
      if self.predict(X_test[i]) == y_test[i]:
        correct_predictions += 1
    return correct_predictions / len(X_test)

  def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, num_epochs=5, learning_rate=1e-3):
    num_batches_per_epoch = len(X_train) // batch_size
    do_validation = X_val is not None and y_val is not None
    losses, accuracies = [], []
    for epoch in range(num_epochs):
      epoch_loss = 0
      for b in range(num_batches_per_epoch):
        #create batch
        b_idx = b * batch_size
        b_idx_e = b_idx + batch_size
        x, y_true = X_train[b_idx:b_idx_e], y_train[b_idx:b_idx_e]

        # Optimize on batch:
        y = self.forward(x) # forward pass
        epoch_loss += self.loss_fn(y, y_true) # loss
        dL_dy = self.d_loss_fn(y, y_true) # loss derivation
        self.backward(dL_dy) # back-propagation pass
        self.optimize(learning_rate) # optimization

      losses.append(epoch_loss / num_batches_per_epoch)
      # After each epoch, we "validate" our network, i.e., we measure its accuracy over the test/validation set:
      accuracies.append(self.evaluate_score(X_val, y_val))
      print("Epoch {:4d}: training loss = {:.6f} | val accuracy = {:.2f}%".format(epoch, losses[epoch], accuracies[epoch] * 100))
    return losses, accuracies



In [0]:
import mnist
import numpy as np

# Loading the training and testing data:
X_train, y_train = mnist.train_images(), mnist.train_labels()
X_test, y_test = mnist.test_images(), mnist.test_labels()

num_classes = 10
# classes are the digits from 0 to 9

# We transform the images into column vectors (as inputs for our NN):
X_train, X_test = X_train.reshape(-1, 28*28), X_test.reshape(-1, 28*28)  #(748,)

# We "one-hot" the labels
y_train_one_hot = np.zeros((y_train.size, y_train.max()+1))
y_train_one_hot[np.arange(y_train.size),y_train] = 1

In [84]:
num_classes = 10
mnist_classifier = SimpleNeuralNetwork(X_train.shape[1],num_classes)
losses, accuracies = mnist_classifier.train(X_train, y_train_one_hot, X_test, y_test, batch_size=32, num_epochs=100)

  This is separate from the ipykernel package so we can avoid doing imports until


Epoch    0: training loss = 1.003200 | val accuracy = 26.59%
Epoch    1: training loss = 0.822031 | val accuracy = 37.87%
Epoch    2: training loss = 0.736707 | val accuracy = 45.53%
Epoch    3: training loss = 0.669756 | val accuracy = 51.74%
Epoch    4: training loss = 0.614017 | val accuracy = 57.32%
Epoch    5: training loss = 0.564004 | val accuracy = 61.85%
Epoch    6: training loss = 0.524683 | val accuracy = 65.27%
Epoch    7: training loss = 0.489468 | val accuracy = 67.81%
Epoch    8: training loss = 0.463183 | val accuracy = 70.21%
Epoch    9: training loss = 0.439475 | val accuracy = 72.22%
Epoch   10: training loss = 0.419056 | val accuracy = 73.56%
Epoch   11: training loss = 0.402877 | val accuracy = 75.21%
Epoch   12: training loss = 0.386778 | val accuracy = 76.34%
Epoch   13: training loss = 0.374423 | val accuracy = 76.90%
Epoch   14: training loss = 0.360682 | val accuracy = 78.03%
Epoch   15: training loss = 0.353163 | val accuracy = 78.49%
Epoch   16: training los



> Here we can see by implementing back propagation we get around 90% in 100 epochs


In [0]:
+