###  Back Prop

In [10]:
import time
import numpy as np

#####  유틸리티 함수

In [11]:
def _t(x):
    return np.transpose(x)
def _m(A,B):
    return np.matmul(A,B)

#####  Sigmoid

In [12]:
class Sigmoid:
    def __init__(self):
        # 마지막 출력을 저장
        self.last_o = 1 
        
    def __call__(self, x):
        self.last_o = 1 / (1.0 + np.exp(-x))
        return self.last_o
    
    def grad(self): # sigmoid(x)(1-sigmoid(x))
        return self.last_o*(1-self.last_o)

##### Mean Squared Error 

In [13]:
class MeanSquaredError:
    def __init__(self):
        # gradiant(기울기,미분값) 저장
        self.dh = 1
        self.last_diff = 1
        
    def __call__(self, h, y): # 1/2 * mean((h - y)^2)
        self.last_diff = h - y
        return 1 / 2 *np.mean(np.square(h-y))
    def grad(self): # h - y
        return self.last_diff

#####  Neuron

In [32]:
class Neuron:
    def __init__(self, W, b, a_obj):
            # model parameters
            self.W = W
            self.b = b
            self.a = a_obj()
            
            # gradient
            self.dW = np.zeros_like(self.W)
            self.db = np.zeros_like(self.b)
            self.dh = np.zeros_like(_t(self.W)) # 이전 입력으로 인한 gradient
            
            self.last_x = np.zeros((self.W.shape[0]))
            self.last_h = np.zeros((self.W.shape[1]))
    
    def __call__(self, x):
        self.last_x = x
        self.last_h = _m(_t(self.W), x) + self.b
        return self.a(self.last_h)
    
    # y = Wx + b 
    def grad(self): # x로 미분 -> dy/dh = W
        return self.W * self.a.grad() # chain rule에 의해 activation gradient는 항상 곱해짐

    def grad_W(self, dh): # W로 미분 
        grad = np.ones_like(self.W)
        grad_a = self.a.grad() 
        for j in range(grad.shape[1]): # W가 아니라 출력 하나하나를 기준으로 함
            # y = w^Tx + b 를 미분
            # dy/dw = x
            grad[:,j] = dh[j] * grad_a[j] * self.last_x
            return grad
    
    def grad_b(self, dh): # b로 미분 -> dy/dh = 1 
        # dh : 현재까지 넘어온 gradient
        return dh * self.a.grad() * 1

##### Deep Neural Network

In [36]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation=Sigmoid):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i,o)), np.zeros((o,))
        self.sequence = list()
        #First Hidden Layer
        W, b = init_var(num_input, num_neuron)
        self.sequence.append(Neuron(W, b, activation))
        
        #Hidden Layers
        for index in range(hidden_depth):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Neuron(W, b, activation))
            
        # Output Layer
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Neuron(W, b, activation))
        
    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x
    def calc_gradient(self, loss_obj):
        loss_obj.dh = loss_obj.grad()
        self.sequence.append(loss_obj)

        # back-prop loop
        for i in range(len(self.sequence) - 1, 0, -1): # 마지막 레이어부터 back-prop 함
            l1 = self.sequence[i]
            l0 = self.sequence[i - 1]

            l0.dh = _m(l0.grad(), l1.dh)
            l0.dw = l0.grad_W(l1.dh)
            l0.db = l0.grad_b(l1.dh)

        self.sequence.remove(loss_obj)
            

##### Gradient Descent

In [37]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = loss_obj(network(x), y) # Forward inference
    network.calc_gradient(loss_obj) # Back-propagation
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

##### Test

In [38]:
x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

t = time.time()
dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=Sigmoid)
loss_obj = MeanSquaredError()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, loss_obj, alpha=0.01)
    print("Epoch {}: Test loss {}".format(epoch, loss))
print("{} seconds elapsed.".format(time.time() - t))

Epoch 0: Test loss 0.003200862554230688
Epoch 1: Test loss 0.0031968631465908795
Epoch 2: Test loss 0.0031928687555775764
Epoch 3: Test loss 0.003188879375120474
Epoch 4: Test loss 0.0031848949991555163
Epoch 5: Test loss 0.0031809156216249415
Epoch 6: Test loss 0.0031769412364772358
Epoch 7: Test loss 0.0031729718376671476
Epoch 8: Test loss 0.003169007419155678
Epoch 9: Test loss 0.0031650479749101004
Epoch 10: Test loss 0.0031610934989039228
Epoch 11: Test loss 0.003157143985116907
Epoch 12: Test loss 0.0031531994275350718
Epoch 13: Test loss 0.0031492598201506854
Epoch 14: Test loss 0.003145325156962229
Epoch 15: Test loss 0.003141395431974452
Epoch 16: Test loss 0.0031374706391983235
Epoch 17: Test loss 0.003133550772651051
Epoch 18: Test loss 0.0031296358263560816
Epoch 19: Test loss 0.003125725794343087
Epoch 20: Test loss 0.003121820670647937
Epoch 21: Test loss 0.0031179204493127764
Epoch 22: Test loss 0.003114025124385923
Epoch 23: Test loss 0.0031101346899219265
Epoch 24: Te