<a href="https://colab.research.google.com/github/hanani8/Backprop_Implementation/blob/main/BACKPROP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [113]:
import numpy as np

In [114]:
# Variables

no_of_layers = 2
no_of_inputs = 3
no_of_outputs = 3
no_of_neurons_in_each_layer = 3
X = np.array([[1], [0], [1]])
Y = np.array([[0], [0], [1]])


In [115]:
# Auxiallary Functions

def sigmoid(x):
  return 1/(1 + np.exp(-x))

def softmax(x):
    denominator = np.sum(np.exp(i) for i in x)
    return np.array([ (np.exp(i) / denominator) for i in x ])

def grad_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

def linear(w,x,b):
  return np.sum(np.dot(w, x), b)

def error(h):
  return -np.sum(Y * np.log(h[-1]))

In [116]:

# Parameter Initialization

W = []
B = []

for i in range(no_of_layers + 1):

  if i != no_of_layers:

    W.append(np.zeros(no_of_neurons_in_each_layer ** 2).reshape(no_of_neurons_in_each_layer, no_of_neurons_in_each_layer))
    B.append(np.zeros(no_of_neurons_in_each_layer))

  else:

    W.append(np.zeros(no_of_neurons_in_each_layer * no_of_outputs).reshape(no_of_neurons_in_each_layer, no_of_outputs))
    B.append(np.zeros(no_of_outputs))

for i in W:
  print(i)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [117]:
# Loading Weights and Biases
files = np.load('/content/parameters.npz')
for i in range(1, no_of_layers + 2):
  W[i-1] = files.get("W{}".format(i))
  B[i-1] = files.get("b{}".format(i))

In [118]:
# Forward Propagation

def forward_propagation(input):

  a = []
  h = []

  for layer in range(no_of_layers + 1):

    if layer == 0:
      a.append( W[layer] @ input + B[layer] )
    else:
      a.append( W[layer] @ h[layer - 1] + B[layer])

    if layer != no_of_layers:

      h.append(sigmoid(a[layer]))

    else:

      h.append(softmax(a[layer]))

  return (a,h)

In [119]:
# # Backward Proagation

def backward_propagation(a,h):

  grads_W = [0] * (no_of_layers + 1)
  grads_b = [0] * (no_of_layers + 1)
  grads_h = [0] * (no_of_layers + 1)
  grads_a = [0] * (no_of_layers + 1)
  grads_a[-1] = -(Y - h[-1])


  for k in range(no_of_layers, 0, -1):
    # Compute Gradient WRT parameters
    grads_W[k] = grads_a[k] @ h[k-1].T
    grads_b[k] = grads_a[k][:]
    # Compute Gradients WRT layer below;
    grads_h[k-1] = W[k].T @ grads_a[k]
    # Compute Gradients WRT layer below(pre-activation)
    grads_a[k-1] = grads_h[k-1] * grad_sigmoid(a[k-1])

  grads_W[0] = grads_a[0] @ X.T
  grads_b[0] = grads_a[0]

  return (grads_W, grads_b, grads_h, grads_a)


In [120]:
# Training/Gradient Descent

max_epochs = 10

for epoch in range(max_epochs):
  (a,h) = forward_propagation(X)
  (grads_W, grads_b, grads_h, grads_a) = backward_propagation(a, h)

  for i in range(no_of_layers + 1):
    W[i] = W[i] - grads_W[i]
    B[i] = B[i] - grads_b[i]





[array([[ 0.00083319],
       [ 0.00141185],
       [-0.00211219]]), array([[ 0.01838198],
       [-0.01997644],
       [-0.0038401 ]]), array([[ 0.23691422],
       [ 0.33838847],
       [-0.57530268]])]


  denominator = np.sum(np.exp(i) for i in x)
