In [3]:
import time
import numpy as np

###  Utility Function

In [12]:
epsilon = 0.0001

def _t(x):
    return np.transpose(x)

def _m(A,B):
    return np.matmul(A,B)

def sigmoid(x):
    return 1/ (1+np.exp(-x))

def mean_squared_error(h, y):
    return 1 / 2 * np.mean(np.squared(h - y))
    

### Define Neuron 

#### numpy function
###### numpy.random.normal(loc=0.0, scale=1.0, size=None)
loc = float or array_like of floats
mean of the distribution

scale = float or array_like of floats
standard deviation(spread or "width") of distribution. Must be non-negative

size = int or tuple of ints, optional
Output shape. if the given shape is `(m,n,k)`, then `m*n*k` smaples are drawn. if size is None, a single value is returned if `loc` and `scale` are both scalars.

###### numpy.zeros_like(a, dtype=None, order='K', subok=True, shape=None)
a = array_like
the shape and data-type of a define these same attributes of the retured array
```python
>>> x = np.arange(6)
>>> x = x.reshape((2, 3))
>>> x
array([[0, 1, 2],
       [3, 4, 5]])

>>> np.zeros_like(x)
array([[0, 0, 0],
       [0, 0, 0]])       
```

In [35]:
class Neuron:
    def __init__(self, W, b, a):
        # Model Parametor
        self.W = W
        self.b = b 
        self.a = a # activation function
        
        # Gradients
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
    def __call__(self, x):
        return self.a(_m(_t(self.W), x) + self.b) # activation((W^T)x + b)

###  Deep Neural Network

In [36]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation=sigmoid):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i,o)), np.zeros((o,))
        self.sequence = list()
        
        # Fisrt hidden layer
        W, b= init_var(num_input, num_neuron)
        self.sequence.append(Neuron(W,b, activation))
        
        # Hidden layers
        for _ in range(hidden_depth):
            W, b= init_var(num_input, num_neuron)
            self.sequence.append(Neuron(W,b, activation))
        
        # Output layer
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Neuron(W,b, activation))
        
    def __call__(self,x):
        for layer in self.sequence:
            x = layer(x)
        return x
    
    def calc_gradient(self, x, y, loss_func):
        def get_new_sequence(layer_index, new_neuron):
            new_sequence = list()
            for i, layer in enumerate(self.sequence):
                if i == layer_index :
                    new_sequence.append(new_neuron)
                else:
                    new_sequence.append(layer)
            return new_sequence
        
        def eval_sequence(x, sequence):
            for layer in sequence:
                x = layer(x)
            return x
        
        # 기준이 되는 현재 상태의 loss 계산
        loss = loss_func(self(x), y) # 추정값, 결과값
        # loss = loss_func(self.__init__(x), y) 과 동일
        
        for layer_id, layer in enumerate(self.sequence):
            for w_i, w in enumerate(layer.W): # iterate W (row)
                for w_j, ww in enumerate(w): # iterate w (col)
                    W = np.copy(layer.W)
                    W[w_i][w_j] = ww + epsilon
                    
                    new_neuron = Neuron(W, layer.b, layer.a)
                    new_seq = get_new_sequence(layer_id, new_neuron)
                    h = eval_sequence(x, new_seq)
                    
                    num_grad = (loss_func(h,y) - loss) / epsilon # f(x+eps) - f(x) / eps
                    layer.dW[w_i][w+j] = grad
                for b_j, bb in enumerate(layer.b): # iterate w (col)
                    b = np.copy(layer.b)
                    b[b_i] = bb + epsilon
                    
                    new_neuron = Neuron(layer.W, b, layer.a)
                    new_seq = get_new_sequence(layer_id, new_neuron)
                    h = eval_sequence(x, new_seq)
                    
                    num_grad = (loss_func(h,y) - loss) / epsilon # f(x+eps) - f(x) / eps
                    layer.db[w_i][w+j] = grad

###  Gradient Descent

In [37]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = network.calc_gradient(x,y,loss_obj)
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

###  동작 테스트

In [38]:
x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=sigmoid)

t = time.time()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, mean_squared_error, 0.01)
    print('Epoch {}: Test loss{}'.format(epoch, loss))
print('{} seconds elapsed.').format(time.time() - t)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 32 is different from 10)