In [1]:
import numpy as np

In [2]:
class Softmax:
    def __call__(self, x):
        exps = np.exp(x)
        self._softmax = exps / np.sum(exps)
        return self._softmax
        

In [3]:
class Sigmoid:
    def __call__(self, x):
        exps = np.exp(x)
        self._sigmoid = exps / (1 + exps)
        return self._sigmoid

    def derivative(self):
        return self._sigmoid * (1-self._sigmoid)
    

In [4]:
# def softmax(arr):
#     exps = np.exp(arr)
#     return exps / np.sum(exps)

In [5]:
# def sigmoid(arr):
#     exps = np.exp(arr)
#     return exps / (1 + exps)

In [15]:
def mseloss(preds, labels):
    matrix = np.square(preds - labels)
    return np.mean(matrix), matrix

In [4]:
class MSELoss:
    def __call__(self, preds, labels):
        self._loss = np.square(preds - labels)
        return np.mean(self._loss)
    
    def derivative(self):
        return np.sqrt(self._loss) * (-1/2)

In [8]:
#y = w*x + b 
#dy/dw =  x
#dy/db = 1
#loss = sum(out-y)**2 / len
#dloss/dy = 1/2 * sum(out-y) * (-1) * len
#dloss/dw = dloss/dy * dy/dw = -(sum(out-y)* len) /2 * x = -1 * root(loss) /2 * x
#dloss/db = dloss/dy * dy/db = -sum(out-y) * len /2 * 1 = -1 * root(loss) / 2 

In [9]:
x = np.array([[0,0,0], [10,10,10], [2,0,0]]) 
y = np.array([[0],[5],[3]])
w = np.array([[0,0,0], [0,0,0], [0,0,0]])
b =  np.array([[0],[0],[0]])

In [5]:
class PReLU:
    def __init__(self, a=0.25):
        self.a = a
    
    def __call__(self, x):
        zeros = np.zeros(x.shape)
        self._z = np.max([zeros, x], axis = 0) + self.a * np.min([zeros, x], axis = 0) 
        return self._z
    
    def derivative(self):
        x, y = self._z.shape
        zeros = np.zeros((x,y))
        for i in range(x):
            for j in range(y):
                if self._z[i][j] >0:
                    zeros[i][j] = 1
                elif self._z[i][j] <0:
                    zeros[i][j] =  -self.a
        return zeros

In [11]:
# def PReLU(x, a = 0.25):
#     zeros = np.zeros(x.shape)
#     return np.max([zeros, x], axis = 0) + a * np.min([zeros, x], axis = 0)

In [6]:
def backpropagation(x, w, b, y, loss, learning_rate=1e-3):
    dy_dw = x #shape : (n, in_size)
    dy_db = 1 
    dloss_dy = np.sqrt(loss) * (-1/2) #shape : (n, out_size)
    dloss_dw = np.matmul(np.transpose(dy_dw),dloss_dy)#shape : (in_size, out_size)
    dloss_db = dloss_dy* dy_db
    w = w + learning_rate * dloss_dw
    b = b + learning_rate * dloss_db 
    return w, b

In [7]:
# 이전 :  y = w*x + b 
#           loss = MSELoss(y', y)

# 이후 :  y = w*x + b
#            z = act_fn(y)
#           loss = MSELoss(y', z)

In [8]:
def backpropagation_with_actfn(x, w, b, z, dz_dy,learning_rate=1e-3):
    #dz_dy shape :  n, out_size
        
    dy_dw = x #shape : n, in_size
    dy_db = 1 
    dz_dw = np.matmul(np.transpose(dy_dw), dz_dy) #dz_dy * dy_dw , shape : in_size, out_size
    dz_db = dz_dy * dy_db #dz_dy * dy_db , shape n, out_size
    w = w + learning_rate * dz_dw
    b = b + learning_rate * dz_db 
    return w, b

In [16]:
class Linear:
    def __init__(self, in_size, out_size, epochs = 10, learning_rate = 1e-3):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.in_size = in_size
        self.out_size = out_size
        self._initialize_weights()
        self.act_fn = Sigmoid()
    
    def _initialize_weights(self):
        self.weight = np.random.rand(self.in_size, self.out_size)
        self.bias = np.random.rand(self.out_size)
        
    def _forward(self, x):
        out = np.matmul(x, self.weight) + self.bias
        out = self.act_fn(out)
        return out
    
    def train(self, x, y):
        for e in range(self.epochs):
            out = self._forward(x)
            loss, loss_matrix = mseloss(out, y)
            self.weight, self.bias = backpropagation_with_actfn(x, self.weight, self.bias, y, self.act_fn.derivative(), learning_rate = self.learning_rate)
            print(f'{e+1}번째 epoch의 loss는 {loss}')

In [10]:
w = np.random.rand(1, 4)
x = np.array([[0,1,0,1], [10,10,10,2], [0,0,0,3]]) 
y = np.array([[0],[1],[0]])
b = np.random.rand(1)

In [18]:
linear = Linear(x.shape[-1], y.shape[-1])

In [19]:
linear.train(x, y)

1번째 epoch의 loss는 0.29056577915144804
2번째 epoch의 loss는 0.29098802998841194
3번째 epoch의 loss는 0.2914101107585081
4번째 epoch의 loss는 0.2918320183292913
5번째 epoch의 loss는 0.2922537495808795
6번째 epoch의 loss는 0.2926753014060545
7번째 epoch의 loss는 0.2930966707103592
8번째 epoch의 loss는 0.29351785441219524
9번째 epoch의 loss는 0.2939388494429187
10번째 epoch의 loss는 0.29435965274693315


In [22]:
class NewLinear:
    def __init__(self, in_size, out_size, act_fn = 'PReLU'):
        self.in_size = in_size
        self.out_size = out_size
        
        if act_fn == 'PReLU':
            self.act_fn = PReLU()
        elif act_fn == 'Softmax':
            self.act_fn = Softmax()
        elif act_fn == 'Sigmoid':
            self.act_fn = Sigmoid()
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        self.weight = np.random.rand(self.in_size, self.out_size)
        self.bias = np.random.rand(self.out_size)
        
    def __call__(self, x):
        out = np.matmul(x,self.weight) + self.bias
        out = self.act_fn(out)
        return out
    
    def update(self, x, z, learning_rate):
        dz_dy = self.act_fn.derivative()
        self.weight, self.bias = backpropagation_with_actfn(x, self.weight, self.bias, z, dz_dy, learning_rate = learning_rate)
    

In [None]:
#linear = NewLinear(x.shape[-1], y.shape[-1])

In [20]:
class Train:
    def __init__(self, x, y, n_layers =3, n_node = 32, epochs=10, learning_rate=1e-3):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.layers = []
        self.loss_fcn = MSELoss()
        self.n_layers = n_layers
        
        for i in range(n_layers):
            act_fn = 'PReLU'
            in_shape = n_node
            out_shape = n_node
            if i == 0: #첫번째 레이어
                in_shape = x.shape[-1]

            if i == n_layers-1: #마지막 레이어
                out_shape = y.shape[-1]
                act_fn = 'Sigmoid'
            self.layers.append(NewLinear(in_shape, out_shape, act_fn))
            
        self._train(x, y)
        
    def _forward(self, x):
        outs = []
        out = x
        for layer in self.layers:
            out = layer(out)
            outs.append(out)
        return outs, out #outs에는 지금까지 layer들의 출력값이 담겨있다. 
    
    def _backpropagation(self, x, outs):
        last = x
        zipped = list(zip(self.layers, outs))
        for i, (layer, out) in enumerate(zipped):
            if i == self.n_layers - 1: #마지막 레이어일 때
                out = out * self.loss_fcn.derivative()
            layer.update(last, out, self.learning_rate)
            last = out
        
    def _train(self, x, y):
        for e in range(self.epochs):
            outs, out = self._forward(x)
            loss = self.loss_fcn(out, y)
            outs.append(loss) #각 layer의 역전파를 위해 쓰일 배열이다. 
            self._backpropagation(x, outs)
        
            print(f'{e+1}번째 epoch의 loss는 {loss}')

In [23]:
Train(x, y,n_layers=1, n_node=32)

1번째 epoch의 loss는 0.4342514791432084
2번째 epoch의 loss는 0.43449342348115433
3번째 epoch의 loss는 0.4347349468984185
4번째 epoch의 loss는 0.43497605009271473
5번째 epoch의 loss는 0.4352167337624741
6번째 epoch의 loss는 0.4354569986068239
7번째 epoch의 loss는 0.43569684532556624
8번째 epoch의 loss는 0.43593627461915646
9번째 epoch의 loss는 0.43617528718868287
10번째 epoch의 loss는 0.43641388373584594


<__main__.Train at 0x7fc970852070>