<a href="https://colab.research.google.com/github/godpeny/laboratory/blob/master/Study/Deep_Learning_for_Everyone/Chatper_3/xor_NN_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# insall
%pip install numpy



In [2]:
# import
import numpy as np
import random as rand

In [109]:
data = [
    [[0,0],[0]],
    [[0,1],[1]],
    [[1,0],[1]],
    [[1,1],[0]],
]

epochs = 5000
lr = 0.1
mo = 0.4 # momentum for momentum SGD

In [110]:
# functions
# activation functions
def sigmoid(x, deriv=False):
  """
  sigmoid function
  """
  if deriv==False:
    return 1 / (1 + np.exp(-x)) # exponential of Euler's number
  else:
    return sigmoid(x, False) * (1 - sigmoid(x, False))
    # return x * (1-x)

def tanh(x, deriv=False):
  """
  hyperbolic tangent
  """
  if deriv==False:
    # return (np.exp(x) - np.exp(-x) / np.exp(x) + np.exp(-x)) # equals to np.tanh(x)
    return np.tanh(x)
  else:
    return (1 - (tanh(x, False)**2))
    # return (1 - (x**2))

# general functions
def makeMatrix(col, row, fill=0.0):
  """
  intialize matrix with col*row with value of fill
  """
  matrix = []
  for i in range(col):
    matrix.append([fill] * row)
  return matrix

In [113]:
# NN
class NN:
  """
  Class for Neural Network
  """
  def __init__(self, num_x, num_yh, num_yo, bias=1):
    """
    constructor of the class
    num_x : input x
    num_yh : hidden layer output
    num_yo : output
    """

    self.bias = bias
    self.num_x = num_x + self.bias;
    self.num_yh = num_yh
    self.num_yo = num_yo

    # initialize activation variables
    self.activation_input = [1.0] * self.num_x
    self.activation_hidden = [1.0] * self.num_yh
    self.activation_output = [1.0] * self.num_yo

    # initialize weight in
    self.weight_in = makeMatrix(self.num_x, self.num_yh)
    for i in range(self.num_x):
      for j in range(self.num_yh):
        self.weight_in[i][j] = rand.random()

    # initialize weight out
    self.weight_out = makeMatrix(self.num_yh, self.num_yo)
    for j in range(self.num_yh):
      for k in range(self.num_yo):
        self.weight_out[j][k] = rand.random()

    # initialize momentum SGD
    self.gradient_in = makeMatrix(self.num_x, self.num_yh)
    self.gradient_out = makeMatrix(self.num_yh, self.num_yo)

  def update(self, inputs):
    # update input layer activation function
    for i in range(self.num_x - self.bias):
      self.activation_input[i] = inputs[i]

    # update hidden layer activation function
    for j in range(self.num_yh):
      sum = 0.0
      for i in range(self.num_x):
        sum = sum + self.activation_input[i] * self.weight_in[i][j]
      self.activation_hidden[j] = tanh(sum, False)

    # update output layer activation function
    for k in range(self.num_yo):
      sum = 0.0
      for j in range(self.num_yh):
        sum = sum + self.activation_hidden[j] * self.weight_out[j][k]
      self.activation_output[k] = tanh(sum, False)

    return self.activation_output[:] # shallow copy

  def back_propagation(self, targets):
    """
    Delta Rule
    https://en.wikipedia.org/wiki/Delta_rule

    error = (target - output)
    deltas = (derivation of activation function) * error

    Stochastic Gradient Descent with momentum
    http://aikorea.org/cs231n/neural-networks-3/#sgd

    Momentum update
    velocity = mu * v - learning_rate * dx
    x += velocity
    """

    # output deltas
    output_deltas = [0.0] * self.num_yo
    for k in range(self.num_yo):
      err = targets[k] - self.activation_output[k]
      output_deltas[k] = err * tanh(self.activation_output[k], True)

    # hidden layer deltas
    hidden_deltas = [0.0] * self.num_yh
    for j in range(self.num_yh):
      err = 0.0
      for k in range(self.num_yo):
        err = err + output_deltas[k] * self.weight_out[j][k]
      hidden_deltas[j] = err * tanh(self.activation_hidden[j], True)

    # update gradient out
    for j in range(self.num_yh):
      for k in range(self.num_yo):
        gradient = output_deltas[k] * self.activation_hidden[j]
        velocity = mo * self.gradient_out[j][k] - (lr * gradient)
        self.weight_out[j][k] += velocity
        self.gradient_out[j][k] = gradient

    # update gradient in
    for i in range(self.num_x):
      for j in range(self.num_yh):
        gradient = hidden_deltas[j] * self.activation_input[i]
        velocity = mo * self.gradient_in[i][j] - (lr * gradient)
        self.weight_in[i][j] += velocity
        self.gradient_in[i][j] = gradient

    # mean square error
    error = 0.0
    for i in range(len(targets)):
      error = error + (1/2) * ((targets[i] - self.activation_output[i])**2)
    return error

  def train(self, patterns):
    for i in range(epochs):
      error = 0.0
      for p in patterns:
        inputs = p[0]
        targets = p[1]
        # update input
        self.update(inputs)
        # back propagation
        error = error + self.back_propagation(targets)
      if i % 500 == 0:
        print('error: %-.5f' % error)

  def result(self, patterns):
    for p in patterns:
      print('Input: %s, Predict: %s' % (p[0], self.update(p[0])))

In [114]:
# main
nn = NN(2,2,1)

nn.train(data)

nn.result(data)

error: 0.50922
error: 0.03127
error: 0.02056
error: 0.00154
error: 0.00135
error: 0.00118
error: 0.00129
error: 0.00139
error: 0.00151
error: 0.00188
Input: [0, 0], Predict: [0.037948898812758]
Input: [0, 1], Predict: [0.9999804703490038]
Input: [1, 0], Predict: [0.9999963631125318]
Input: [1, 1], Predict: [0.07078124691477407]


Q1. shouldn't derivative form of acivation function be like below?  

sigmoid : ``sigmoid(x, False) * (1 - sigmoid(x, False))``

tanh : ``(1 - (tanh(x, False)**2))``

Q2. What is ``gradient_in`` and ``gradient_out`` ? isn't it for saving previous velocity in Stochastic Gradient Descent with momentum?

