In [2]:
import time
import numpy as np

### 유틸리티 함수

In [14]:
epsilon = 0.0001

def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def mean_squared_error(h, y):
    return 1 / 2 * np.mean(np.square(h - y))

### Sigmoid 구현

In [1]:
class Sigmoid:
    def __init__(self):
        self.last_o = 1
        
    def __call__(self, x):
        self.last_o = 1 / (1.0 + np.exp(-x))
        return self.last_o
    
    def grad(self):
        return self.last_o * (1 - self.last_o)

### MSE

In [3]:
class MeanSquaredError:
    def __init__(self):
        # gradient
        self.dh = 1
        self.last_diff = 1
        
    def __call__(self, h, y): # 1/2 * mean((h-y)^2)
        self.last_diff = h - y
        return 1 / 2 * np.mean(np.square(h-y))
        
    def grad(self): #h-y
        return self.last_diff

### 뉴런 구현

In [15]:
class Neuron:
    def __init__(self, W, b, a_obj):
        #model params
        self.W = W
        self.b = b
        self.a = a_obj()
        
        #gradients
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dh = np.zeros_like(_t(self.W))
        
        self.last_x = np.zeros((self.W.shape[0]))
        
    def __call__(self, x):
        self.last_x = x
        self.last_h = _m(_t(self.W), x) + self.b
        return self.a(self.last_h)
    
    def grad(self): #dy/dh = W
        return self.W * self.a.grad()
    
    def grad_W(self, dh):
        grad = np.ones_like(self.W)
        grad_a = self.a.grad()
        for j in range(grad.shape[1]): # y = W^Tx + b
            grad[:, j] = dh[j] * grad_a[j] * self.last_x
        return grad
    
    def grad_b(self, dh): # dy/dh = 1
        return dh * self.a.grad() * 1
    

### DNN

In [27]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation=sigmoid):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i, o)), np.zeros((o,))
        
        self.sequence = list()
        
        #first hidden layer
        W, b = init_var(num_input, num_neuron)
        self.sequence.append(Neuron(W, b, activation))
        
        #hidden
        for _ in range(hidden_depth -1):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Neuron(W, b, activation))
        
        #output
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Neuron(W, b, activation))
        
    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x
    
    def calc_gradient(self, x, y, loss_func):
        def get_new_sequence(layer_index, new_neuron):
            new_sequence = list()
            for i, layer in enumerate(self.sequence):
                if i == layer_index:
                    new_sequence.append(new_neuron)
                else:
                    new_sequence.append(layer)
            return new_sequence
        
        def eval_sequence(x, sequence):
            for layer in sequence:
                x = layer(x)
            return x
        
        loss = loss_func(self(x), y)
        
        for layer_id, layer in enumerate(self.sequence): #iterate layer
            for w_i, w in enumerate(layer.W): #iterate W(row)
                for w_j, ww in enumerate(w): #iterate W(col)
                    W = np.copy(layer.W)
                    W[w_i][w_j] = ww + epsilon
                    
                    new_neuron = Neuron(W, layer.b, layer.a)
                    new_seq = get_new_sequence(layer_id, new_neuron)
                    h = eval_sequence(x, new_seq)
                    
                    num_grad = (loss_func(h, y) - loss) / epsilon
                    layer.dW[w_i][w_j] = num_grad
            
                for b_i, bb in enumerate(layer.b):
                    b = np.copy(layer.b)
                    b[b_i] = bb + epsilon

                    new_neuron = Neuron(layer.W, b, layer.a)
                    new_seq = get_new_sequence(layer_id, new_neuron)
                    h = eval_sequence(x, new_seq)

                    num_grad = (loss_func(h, y) - loss) / epsilon
                    layer.db[b_i] = num_grad
        return loss
                

### Gradient Descent

In [28]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = network.calc_gradient(x, y, loss_obj)
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

### Test

In [30]:
x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=sigmoid)

t = time.time()

for epoch in range(100):
    loss = gradient_descent(dnn, x, y, mean_squared_error, 0.01)
    print('Epoch {}: Test loss{}'.format(epoch, loss))
print('{} seconds elapsed.'.format(time.time() - t))

Epoch 0: Test loss0.7885620805778328
Epoch 1: Test loss0.776781443658949
Epoch 2: Test loss0.7651782456011257
Epoch 3: Test loss0.7537505190823045
Epoch 4: Test loss0.7424962759971061
Epoch 5: Test loss0.7314135094425132
Epoch 6: Test loss0.7205001957050653
Epoch 7: Test loss0.7097542962407211
Epoch 8: Test loss0.6991737596405375
Epoch 9: Test loss0.6887565235748756
Epoch 10: Test loss0.6785005167099863
Epoch 11: Test loss0.6684036605906546
Epoch 12: Test loss0.6584638714840345
Epoch 13: Test loss0.6486790621796903
Epoch 14: Test loss0.6390471437413442
Epoch 15: Test loss0.629566027206956
Epoch 16: Test loss0.6202336252338465
Epoch 17: Test loss0.6110478536857811
Epoch 18: Test loss0.6020066331600094
Epoch 19: Test loss0.5931078904524391
Epoch 20: Test loss0.5843495599592796
Epoch 21: Test loss0.5757295850141226
Epoch 22: Test loss0.5672459191601822
Epoch 23: Test loss0.5588965273567525
Epoch 24: Test loss0.5506793871204074
Epoch 25: Test loss0.5425924896007346
Epoch 26: Test loss0.534