In [1]:
import time
import numpy as np

In [2]:
def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

In [3]:
class Sigmoid:
    def __init__(self):
        self.last_o = 1
    
    def __call__(self, x):
        self.last_o = 1 / (1.0 + np.exp(-x))
        return self.last_o
        
    def grad(self):    # sigmoid(x)(1-sigmoid(x))
        return self.last_o * (1 - self.last_o)

In [4]:
class MeanSquaredError:
    def __init__(self):
        # gradient 저장
        self.dh = 1
        self.last_diff = 1
        
    def __call__(self, h, y):    # 1/2 * mean ((h - y)^2)
        self.last_diff = h - y
        return 1 / 2 * np.mean(np.square(h - y))
        
    def grad(self):    # h - y
        return self.last_diff

In [None]:
class Neuron:
    def __init__(self, W, b, a_obj):
        # Model parameters
        self.W = W
        self.b = b
        self.a = a_obj()
        
        # gradient 저장
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dh = np.zeros_like(_t(self.W))
        
        self.last_x = np.zeros((self.W.shape[0]))    # W로 미분했을때 이전 입력을 가지고 있어야 미분 가능하기 때문에 마지막 x를 저장
        self.last_h = np.zeros((self.W.shape[1]))
        
    def __call__(self, x):
        self.last_x = x
        self.last_h = _m(_t(self.W), x) + self.b
        return self.a(self.last_h)
    
    def grad(self):    # y = Wx + b  ->  dy/dh = W
        return self.W * self.a.grad()
    
    def grad_W(self, dh):
        grad = np.ones_like(self.W)
        grad_a = self.a.grad()
        for j in range(grad.shape[1]):    # y = w^Tx + b    ->   dy/dw = x
            grad[:, j] = dh[j] * grad_a[j] * self.last_x
        return grad
        
    def grad_b(self, dh):    # y = Wx + b  ->  dy/dh = 1
        return dh * self.a.grad() * 1
        