<a href="https://colab.research.google.com/github/comojin1994/Deep_Learning_Study/blob/master/3step_lecture/Training_NN_with_BP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
%tensorflow_version 2.x
import tensorflow as tf
import numpy as np
import time
tf.__version__

'2.1.0'

### Utility Function

In [0]:
def _t(x): return np.transpose(x)

def _m(A, B): return np.matmul(A, B)

### Sigmoid

In [0]:
class Sigmoid:
    def __init__(self):
        self.last_o = 1

    def __call__(self, x):
        self.last_o = 1 / (1. + np.exp(-x))
        return self.last_o

    def grad(self):
        return self.last_o * (1 - self.last_o)

### MSE

In [0]:
class MSE:
    def __init__(self):
        # gradient
        self.dh = 1
        self.last_diff = 1

    def __call__(self, h, y):
        self.last_diff = h - y
        return 1 / 2 * np.mean(np.square(h - y))
    
    def grad(self):
        return self.last_diff

### Neuron

In [0]:
class Neuron:
    def __init__(self, W, b, a_obj):
        # Model parameter
        self.W = W
        self.b = b
        self.a = a_obj()

        # gradient
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dh = np.zeros_like(_t(self.W))

        self.last_x = np.zeros((self.W.shape[0]))
        self.last_h = np.zeros((self.W.shape[1]))

    def __call__(self, x):
        self.last_x = x
        self.last_h = _m(_t(self.W), x) + self.b
        return self.a(self.last_h)

    def grad(self): # dy/dh = W
        return self.W * self.a.grad()

    def grad_W(self, dh): # dy/dW = x
        grad = np.ones_like(self.W)
        grad_a = self.a.grad()
        for j in range(grad.shape[1]):
            grad[:, j] = dh[j] * grad_a[j] * self.last_x
        return grad

    def grad_b(self, dh):
        return dh * self.a.grad()        

### Model

In [0]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation=Sigmoid):
        def init_var(i, o):
            return np.random.normal(0., 0.01, (i, o)), np.zeros((o, ))
        
        self.sequence = list()
        
        # First layer
        W, b = init_var(num_input, num_neuron)
        self.sequence.append(Neuron(W, b, activation))

        # Hidden layers
        for _ in range(hidden_depth - 1):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Neuron(W, b, activation))
        
        # Output layer
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Neuron(W, b, activation))

    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x
    
    def calc_gradient(self, loss_obj):
        loss_obj.dh = loss_obj.grad()
        self.sequence.append(loss_obj)
        
        # BP
        for i in range(len(self.sequence) - 1, 0, -1):
            l1 = self.sequence[i]
            l0 = self.sequence[i - 1]

            l0.dh = _m(l0.grad(), l1.dh)
            l0.dW = l0.grad_W(l1.dh)
            l0.db = l0.grad_b(l1.dh)

        self.sequence.remove(loss_obj)

### Gradient Descent

In [0]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = loss_obj(network(x), y) # Forward
    network.calc_gradient(loss_obj) # BP
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

### Training

In [37]:
x = np.random.normal(0., 1., (10, ))
y = np.random.normal(0., 1., (2, ))

t = time.time()
dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=Sigmoid)
loss_obj = MSE()

for epoch in range(100):
    loss = gradient_descent(dnn, x, y, loss_obj, alpha=0.01)
    print('Epoch {}, Test Loss: {}'.format(epoch, loss))
print('{} seconds elapse.'.format(time.time() - t))

Epoch 0, Test Loss: 0.6743494511218002
Epoch 1, Test Loss: 0.6668008735268547
Epoch 2, Test Loss: 0.6593406193678844
Epoch 3, Test Loss: 0.6519727851815206
Epoch 4, Test Loss: 0.6447011333231917
Epoch 5, Test Loss: 0.6375290876956003
Epoch 6, Test Loss: 0.6304597322520952
Epoch 7, Test Loss: 0.6234958121285906
Epoch 8, Test Loss: 0.6166397372263444
Epoch 9, Test Loss: 0.6098935880428367
Epoch 10, Test Loss: 0.6032591235292986
Epoch 11, Test Loss: 0.5967377907409557
Epoch 12, Test Loss: 0.5903307360394767
Epoch 13, Test Loss: 0.5840388176059621
Epoch 14, Test Loss: 0.5778626190264762
Epoch 15, Test Loss: 0.5718024637199841
Epoch 16, Test Loss: 0.5658584299898552
Epoch 17, Test Loss: 0.5600303664941703
Epoch 18, Test Loss: 0.5543179079461995
Epoch 19, Test Loss: 0.5487204908739629
Epoch 20, Test Loss: 0.5432373692861494
Epoch 21, Test Loss: 0.5378676301103335
Epoch 22, Test Loss: 0.5326102082879234
Epoch 23, Test Loss: 0.5274639014282492
Epoch 24, Test Loss: 0.5224273839413297
Epoch 25, 