## 수치 미분을 이용한 심층 신경망 학습

In [1]:
import time
import numpy as np

## 유틸리티 함수

In [2]:
epsilon = 0.0001

def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def mean_squared_error(h, y):
    return 1 / 2 * np.mean(np.square(h - y))

## 뉴런 구현

In [3]:
class Neuron:
    def __init__(self, W, b, a):
      self.W = W
      self.b = b
      self.a = a #activation , sigmoid

      #gradient
      self.dW = np.zeros_like(self.W)
      self.db = np.zeros_like(self.b)

    def __call__(self, x): #  a(W * x + b)
      return self.a(_m(_t(self.W), x) + self.b)
        

## 심층신경망 구현

In [19]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation=sigmoid):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i, o)), np.zeros((o,)) # W, b, 초기화 ; W (i, o) b (o, )

        self.sequence = list()
        # First hidden layer 
        W, b = init_var(num_input, num_neuron)
        self.sequence.append(Neuron(W, b, activation))
        
        # Hidden layers
        for _ in range(hidden_depth-1):
          W, b = init_var(num_neuron, num_neuron)
          self.sequence.append(Neuron(W, b, activation))

        # Output layer
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Neuron(W, b, activation))

    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x

    def calc_gradient(self, x, y, loss_func): #dW, dh 구하기, vector gradient 
        def get_new_sequence(layer_index, new_neuron):
          new_sequence = list()
          for i, layer in enumerate(self.sequence):
            if i == layer_index:
              new_sequence.append(new_neuron)
            else:
              new_sequence.append(layer)
          return new_sequence
        
        def eval_sequence(x, sequence):
          for layer in sequence:
            x = layer(x)
          return x

        loss = loss_func(self(x), y)
        for layer_id, layer in enumerate(self.sequence): # layer마다
          # dW
          for w_i, w in enumerate(layer.W): # W col 마다
            for w_j, ww in enumerate(w): #col의 row
              W = np.copy(layer.W)
              W[w_i][w_j] = ww + epsilon  # f(W + e) - f(W) / epsilon 

              new_neuron = Neuron(W, layer.b, layer.a)
              new_seq = get_new_sequence(layer_id, new_neuron)
              h = eval_sequence(x, new_seq)

              # L(W+ e) - L(W) / epsilon
              num_grad = (loss_func(h, y) - loss) / epsilon
              layer.dW[w_i][w_j] = num_grad

          #db
          for b_i, bb in enumerate(layer.b): #(o,) #layer마다 , b_i마다
              b = np.copy(layer.b)
              b[b_i] = bb + epsilon # f(b + e)

              new_neuron = Neuron(layer.W, b, layer.a)
              new_seq = get_new_sequence(layer_id, new_neuron)
              h = eval_sequence(x, new_seq) 

              num_grad = (loss_func(h, y) - loss) / epsilon #L(b + e) - L / epsilon
              layer.db[b_i] = num_grad
        
        return loss





## 경사하강 학습법

In [20]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = network.calc_gradient(x, y, loss_obj)
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

## 동작 테스트

In [None]:
x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=sigmoid)

t = time.time()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, mean_squared_error, 0.01)
    print('Epoch {}: Test loss {}'.format(epoch, loss))
print('{} seconds elapsed.'.format(time.time() - t))

Epoch 0: Test loss 0.49067469887836956
Epoch 1: Test loss 0.4879254606874828
Epoch 2: Test loss 0.48519367996902363
Epoch 3: Test loss 0.48247966038542345
Epoch 4: Test loss 0.4797836909975817
Epoch 5: Test loss 0.4771060462533734
Epoch 6: Test loss 0.4744469860034807
Epoch 7: Test loss 0.4718067555433753
Epoch 8: Test loss 0.46918558568115953
Epoch 9: Test loss 0.4665836928294646
Epoch 10: Test loss 0.46400127912082956
Epoch 11: Test loss 0.4614385325452896
Epoch 12: Test loss 0.45889562710860293
Epoch 13: Test loss 0.45637272301066334
Epoch 14: Test loss 0.4538699668420847
Epoch 15: Test loss 0.4513874917984585
Epoch 16: Test loss 0.4489254179102646
Epoch 17: Test loss 0.4464838522879468
Epoch 18: Test loss 0.444062889380162
Epoch 19: Test loss 0.44166261124476985
Epoch 20: Test loss 0.4392830878306596
Epoch 21: Test loss 0.43692437726975675
Epoch 22: Test loss 0.43458652617743637
Epoch 23: Test loss 0.4322695699607737
Epoch 24: Test loss 0.4299735331334792
Epoch 25: Test loss 0.4276