In [6]:
# OPTIMIZATION USING GRADIENT DESCENT AND STOCHASTIC GRADIENT DESCENT

import numpy as np
from sklearn.metrics import accuracy_score

def sigmoid(x):
  return 1/(1+np.exp(-x))
def sigmoidder(x):
  return x*(1-x)

ip = np.array([[0,0],[0,1],[1,0],[1,1]])
op = np.array([[0],[1],[1],[0]])
ipsize = 2
hiddensize = 2
opsize = 1
lr = 0.1
epochs = 10000
np.random.seed(42)
ihweight = np.random.rand(ipsize, hiddensize)
howeight = np.random.rand(hiddensize, opsize)
ihbias = np.random.rand(1, hiddensize)
hobias = np.random.rand(1, opsize)

# GRADIENT DESCENT
for i in range(epochs):
  hiddenlayerip = np.dot(ip, ihweight) + ihbias
  hiddenlayerop = sigmoid(hiddenlayerip)
  outputlayerip = np.dot(hiddenlayerop, howeight) + hobias
  predop = sigmoid(outputlayerip)
  error = op - predop
  derpredop = error * sigmoidder(predop)
  hiddenlayererror = np.dot(derpredop, howeight.T)
  derhiddenlayer = hiddenlayererror * sigmoidder(hiddenlayerop)
  ihweight += np.dot(ip.T, derhiddenlayer) * lr
  howeight += np.dot(hiddenlayerop.T, derpredop) * lr
  ihbias += np.sum(derhiddenlayer, axis=0, keepdims=True) * lr
  hobias += np.sum(derpredop, axis=0, keepdims=True) * lr
print("Using Gradient Descent")
print("INPUT\tEXPECTED OUTPUT\t  PREDICTED OUTPUT")
predicted = []
for i in range(len(ip)):
  hiddenlayerip = np.dot(ip[i], ihweight) + ihbias
  hiddenlayerop = sigmoid(hiddenlayerip)
  outputlayerip = np.dot(hiddenlayerop, howeight) + hobias
  predop = sigmoid(outputlayerip).round()
  predicted.append(predop)
  print(ip[i], "  \t", op[i], " \t", predop)
predicted = np.array(predicted).reshape(-1, 1)
print("Accuracy: ", accuracy_score(op, predicted))

# STOCHASTIC GRADIDENT DESCENT
np.random.seed(42)
ihweight = np.random.rand(ipsize, hiddensize)
howeight = np.random.rand(hiddensize, opsize)
ihbias = np.random.rand(1, hiddensize)
hobias = np.random.rand(1, opsize)
for i in range(epochs):
  for j in range(len(ip)):
    hiddenlayerip = np.dot(ip[j:j+1], ihweight) + ihbias
    hiddenlayerop = sigmoid(hiddenlayerip)
    outputlayerip = np.dot(hiddenlayerop, howeight) + hobias
    predop = sigmoid(outputlayerip)
    error = op[j:j+1] - predop
    derpredop = error * sigmoidder(predop)
    hiddenlayererror = np.dot(derpredop, howeight.T)
    derhiddenlayer = hiddenlayererror * sigmoidder(hiddenlayerop)
    ihweight += np.dot(ip[j:j+1].T, derhiddenlayer) * lr
    howeight += np.dot(hiddenlayerop.T, derpredop) * lr
    ihbias += derhiddenlayer * lr
    hobias += derpredop* lr

print("\nUsing Stochastic Gradient Descent")
print("INPUT\tEXPECTED OUTPUT\t  PREDICTED OUTPUT")
predictedsgd = []
for i in range(len(ip)):
  hiddenlayerip = np.dot(ip[i:i+1], ihweight) + ihbias
  hiddenlayerop = sigmoid(hiddenlayerip)
  outputlayerip = np.dot(hiddenlayerop, howeight) + hobias
  predop = sigmoid(outputlayerip).round()
  predictedsgd.append(predop)
  print(ip[i], "  \t", op[i], "  \t", predop)
predictedsgd = np.array(predictedsgd).reshape(-1, 1)
print("Accuracy: ", accuracy_score(op, predictedsgd))

Using Gradient Descent
INPUT	EXPECTED OUTPUT	  PREDICTED OUTPUT
[0 0]   	 [0]  	 [[0.]]
[0 1]   	 [1]  	 [[1.]]
[1 0]   	 [1]  	 [[1.]]
[1 1]   	 [0]  	 [[0.]]
Accuracy:  1.0

Using Stochastic Gradient Descent
INPUT	EXPECTED OUTPUT	  PREDICTED OUTPUT
[0 0]   	 [0]   	 [[0.]]
[0 1]   	 [1]   	 [[1.]]
[1 0]   	 [1]   	 [[1.]]
[1 1]   	 [0]   	 [[0.]]
Accuracy:  1.0
