In [23]:
import time
import numpy as np

In [29]:
epsilon = 1e-4

def _t(x):
    return np.transpose(x)

def _m(A,B):
    return np.matmul(A,B)

def sigmoid(x):
    return 1/(1+np.exp(-x))

def mean_squared_error(h,y):
    return np.mean(np.square(h-y))

In [30]:
class Dense:
    def __init__(self, W, b, a):
        self.W = W
        self.b = b
        self.a = a
        
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        
        
    def __call__(self, x):
        return self.a(_m(_t(self.W), x)+self.b)

In [31]:
class DNN:
    def __init__(self, 
                 hidden_depth, num_neuron, num_input, num_output, 
                 activation = sigmoid):
        
        def init_var(i, o):
            return np.random.normal(0.0,0.01,(i,o)), np.zeros((o,))
        
        self.sequence = list()
        
        W, b = init_var(num_input, num_neuron)
        self.sequence.append(Dense(W,b,activation))
        
        for _ in range(hidden_depth - 1):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Dense(W,b,activation))
            
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Dense(W,b,activation))
        
    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x
    
    def calc_gradient(self, x, y, loss_func):
        def get_new_sequence(layer_index, new_layer):
            new_sequence = list()
            for i, layer in enumerate(self.sequence):
                if i == layer_index:
                    new_sequence.append(new_layer)
                else:
                    new_sequence.append(layer)
            return new_sequence
        
        def eval_sequence(x, sequence):
            for layer in sequence:
                x = layer(x)
            return x
        
        loss = loss_func(self(x), y)
        
        for layer_id, layer in enumerate(self.sequence):
            for w_i, w in enumerate(layer.W):
                for w_j, ww in enumerate(w): 
                    W = np.copy(layer.W)
                    W[w_i][w_j] = ww + epsilon
                    
                    new_layer = Dense(W,layer.b,layer.a)
                    new_seq = get_new_sequence(layer_id, new_layer)
                    h = eval_sequence(x, new_seq)
                    
                    num_grad = (loss_func(h,y)-loss)/epsilon
                    layer.dW[w_i][w_j] = num_grad
                    
            for b_i, bb in enumerate(layer.b):
                b = np.copy(layer.b)
                b[b_i] = bb + epsilon
                
                new_layer = Dense(layer.W, b, layer.a)
                new_seq = get_new_sequence(layer_id, new_layer)
                h = eval_sequence(x, new_seq)
                
                num_grad = (loss_func(h,y)-loss)/epsilon
                layer.db[b_i] = num_grad
        
        return loss

In [32]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = network.calc_gradient(x,y,loss_obj)
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

In [33]:
x = np.random.normal(0.0,1.0,(10,))
y = np.random.normal(0.0,1.0,(2,))

dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=sigmoid)

t= time.time()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, mean_squared_error, 0.01)
    print('Epoch {}: Test loss {}'.format(epoch, loss))
print('{} seconds elapsed'.format(time.time()-t))

Epoch 0: Test loss 0.1563400725152985
Epoch 1: Test loss 0.15459126238882911
Epoch 2: Test loss 0.15286247430645988
Epoch 3: Test loss 0.1511536966969475
Epoch 4: Test loss 0.14946490662211548
Epoch 5: Test loss 0.14779607017116858
Epoch 6: Test loss 0.14614714285970398
Epoch 7: Test loss 0.144518070032078
Epoch 8: Test loss 0.14290878726587614
Epoch 9: Test loss 0.14131922077737594
Epoch 10: Test loss 0.13974928782666332
Epoch 11: Test loss 0.13819889712163735
Epoch 12: Test loss 0.13666794921959838
Epoch 13: Test loss 0.13515633692571277
Epoch 14: Test loss 0.1336639456873806
Epoch 15: Test loss 0.13219065398374347
Epoch 16: Test loss 0.13073633370954177
Epoch 17: Test loss 0.12930085055277646
Epoch 18: Test loss 0.12788406436543998
Epoch 19: Test loss 0.1264858295268727
Epoch 20: Test loss 0.12510599529915573
Epoch 21: Test loss 0.12374440617421399
Epoch 22: Test loss 0.12240090221226496
Epoch 23: Test loss 0.12107531937111012
Epoch 24: Test loss 0.11976748982629226
Epoch 25: Test l