https://github.com/hyona-yu/python_machine_learning/blob/main/rnn_python.ipynb

In [2]:
import numpy as np

In [17]:
class Softmax: # 출력층 상태 계산을 위함
    def __call__(self, x):
        exps = np.exp(x)
        self._softmax = exps / np.sum(exps)
        return self._softmax

class Sigmoid: # 은닉층 상태 계산을 위함
    def __call__(self, x):
        exps = np.exp(x)
        self._sigmoid = exps / (1 + exps)
        return self._sigmoid

    def derivative(self):
        return self._sigmoid * (1-self._sigmoid)

# 성능평가 : RNN에서 특정 예측 모델의 성능을 평가할 때 사용될 수 있는 손실 함수와 그에 대한 미분을 정의
class MSELoss:
    def __call__(self, preds, labels):
        self._loss = np.square(preds - labels)
        return np.mean(self._loss)

    def derivative(self):
        return np.sqrt(self._loss) * (-1/2)

`tanh` 출력값 계산시, 가중치 벡터와 함께 계산됨

In [18]:
def tanh(x):
    exp_minus = np.exp(-1 * x)
    exp_plus = np.exp(x)
    return (exp_plus + exp_minus) / (exp_plus + exp_minus)

In [19]:
class Tanh: # tanh 함수의 미분은 클래스로 표현
    def __call__(self,x):
        exp_minus = np.exp(-1 * x)
        exp_plus = np.exp(x)
        self.tanh = (exp_plus + exp_minus) / (exp_plus + exp_minus)
        return self.tanh

    def derivative(self):
        return (1+self.tanh) * (1-self.tanh)

In [20]:
class PReLU:
    def __init__(self, a=0.25):
        self.a = a

    def __call__(self, x):
        zeros = np.zeros(x.shape)
        self._z = np.max([zeros, x], axis = 0) + self.a * np.min([zeros, x], axis = 0)
        return self._z

    def derivative(self):
        x, y = self._z.shape
        zeros = np.zeros((x,y))
        for i in range(x):
            for j in range(y):
                if self._z[i][j] >0:
                    zeros[i][j] = 1
                elif self._z[i][j] <0:
                    zeros[i][j] =  -self.a
        return zeros

In [21]:
def PReLU(x, a = 0.25):
    zeros = np.zeros(x.shape)
    return np.max([zeros, x], axis = 0) + a * np.min([zeros, x], axis = 0)

In [22]:
class SimpleRNN:
    def __init__(self, input_shape, hidden_dim, h=None, act_fn='tanh'):
        self.batch_size, self.length, self.embedding_size = input_shape
        self.weight, self.bias = [], []
        self.hidden_dim = hidden_dim

        if act_fn=='tanh':
            self.act_fn = Tanh()

        elif act_fn=='sigmoid':
            self.act_fn = Sigmoid()

        # hidden h 가 없을 경우 0으로 초기화
        if h is None:
            self.h = np.zeros((self.batch_size, self.length+1, hidden_dim))
        else:
            self.h = h

        for i in range(self.length):
            self.in_size = self.embedding_size
            w_h = np.random.rand(self.hidden_dim, self.hidden_dim)
            w_x = np.random.rand(self.in_size, self.hidden_dim)
            weight = np.concatenate((w_h, w_x), axis= 0) #(self.in_size+hidden_dim , hidden_dim )
            self.weight.append(weight)
            self.bias.append(np.random.rand(self.hidden_dim))

    def __call__(self, x):
        next_x = x[0]
        outs = []
        for t in range(self.length):
            out = np.matmul(np.concatenate((self.h[:,t,:], x[:,t,:]),axis=1),self.weight[t]) + self.bias[t]
            self.h[:,t+1,:] = out[:self.hidden_dim,:] # 다음 스텝에서 h_(t)역할을 해줄 것.
            real_out = out[self.hidden_dim:,:]
            real_out = self.act_fn(out) #활성화함수
            outs.append(real_out)

        outs = np.array(outs) # shape (length, batch_size, hidden_dim)
        outs = np.transpose(outs, (1,0,2))
        return self.h[:,1:], outs

In [23]:
x = [[[1,2,3,4], [3,4,5,6], [4,5,6,7]], [[3,4,1,1], [4,5,1,1], [5,6,1,1]]]
x = np.array(x)
y = [[0], [1]]
y = np.array(y)
x.shape

(2, 3, 4)

batch_size 2, timestamp_length 3, embedding_size 4

In [24]:
layer = SimpleRNN(x.shape, 8, h=None)
layer(x)[0].shape, layer(x)[1].shape

((2, 3, 8), (2, 3, 8))

In [35]:
class RNN:
    def __init__(self, input_shape, hidden_dim, h=None, act_fn='Tanh'):
        self.batch_size, self.length, self.embedding_size = input_shape
        self.weight, self.bias = [], []
        self.hidden_dim = hidden_dim

        if act_fn=='Tanh':
            self.act_fn = Tanh()

        elif act_fn=='Sigmoid':
            self.act_fn = Sigmoid()

        if h is None:
            self.h = np.zeros((self.batch_size, self.length+1, hidden_dim))
        else:
            self.h = h

        for i in range(self.length):
            self.in_size = self.embedding_size
            w_h = np.random.rand(self.hidden_dim, self.hidden_dim)
            w_x = np.random.rand(self.in_size, self.hidden_dim)
            weight = np.concatenate((w_h, w_x), axis= 0) #(self.in_size+hidden_dim , hidden_dim )
            self.weight.append(weight)
            self.bias.append(np.random.rand(self.hidden_dim))

    def __call__(self, x, h =None):
        next_x = x[0]
        if h is not None:
            self.h = h
        outs = []
        for t in range(self.length):
            print(f'{t}th timestamp, x[t] shape {x[:,t,:].shape}, h[t] shape {self.h[:,t,:].shape}, self.weight[t] shape {self.weight[t].shape}')
            new_x = np.concatenate((self.h[:,t,:], x[:,t,:]),axis=1)
            print(f'new_x shape {new_x.shape} bias shape {self.bias[t].shape}')
            out = np.matmul(new_x,self.weight[t]) + self.bias[t]
            self.h[:,t+1,:] = out[:self.hidden_dim,:] # 다음 스텝에서 h_(t)역할을 해줄 것.
            real_out = out[self.hidden_dim:,:]
            real_out = self.act_fn(out) #활성화함수
            outs.append(real_out)

        outs = np.array(outs) # shape (length, batch_size, hidden_dim)
        outs = np.transpose(outs, (1,0,2))
        return outs

    #backpropagation 함수 추가.
    def backpropagation(self, x, z, learning_rate):
        dz_dy = self.act_fn.derivative()
        for l in reversed(range(self.length)): #timestamp만큼의 RNN 노드가 있다. l번째 weight, bias를 순서대로 역전파 계산하자.
            dy_dw = np.concatenate((self.h[:,l,:], x[:,l,:]), axis=-1) #(batch_size, 1, self.embedding_size + self.in_size)
            #print(f'dy_dw shape {dy_dw.shape}')
            dy_db = 1
            dz_dw = np.matmul(np.transpose(dy_dw), dz_dy)
            #print(f'dz_dw shape { dz_dw.shape}')
            dz_db = dz_dy * dy_db
            self.weight[l] = self.weight[l] + learning_rate * dz_dw
            self.bias[l] = self.bias[l] + learning_rate * dz_db

In [36]:
layer = RNN(x.shape, 8, h=None)

In [41]:
x = [[[1,2,3,4], [3,4,5,6], [4,5,6,7]], [[3,4,1,1], [4,5,1,1], [5,6,1,1]]]
y = [[0], [1]]
x = np.array(x)
y = np.array(y)
x.shape, y.shape

((2, 3, 4), (2, 1))

## RNN 학습하는 train 클래스

In [38]:
class Train: # 레이어 3개 쌓은 형태
    def __init__(self, x, y, n_layers =3, n_node = 32, epochs=10, learning_rate=1e-3):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.layers = []
        self.loss_fcn = MSELoss()
        self.n_layers = n_layers # RNN 층의 수
        self.batch_size, self.length, self.embedding_size = x.shape

        for i in range(n_layers):
            act_fn = 'Tanh'
            out_shape = n_node

            if i != 0: #첫번째 레이어
                self.embedding_size = n_node

            if i == n_layers-1: #마지막 레이어
                out_shape = y.shape[-1]
                act_fn = 'Sigmoid'

            print(f'{i}th layer input_shape({self.batch_size, self.length,self.embedding_size},hidden dim {out_shape})')
            self.layers.append(RNN(input_shape=(self.batch_size,self.length,self.embedding_size), hidden_dim=out_shape, h = None, act_fn=act_fn))

        self._train(x, y)

    def _forward(self, x):
        outs = []
        new_x = x
        for layer in self.layers:
            #이전 layer의 output값인 hidden layer를 넣어보자.
            new_x  = layer(new_x)
            outs.append(new_x)

        return outs, new_x #outs에는 지금까지 layer들의 출력값이 담겨있다.

    def _backpropagation(self, x, outs, loss, learning_rate):
        for i in reversed(range(self.n_layers)):
            if i == 0:
                x_in = x
            else:
                x_in = outs[i-1]
            if i == self.n_layers - 1:
                outs[i] = outs[i] * np.mean(self.loss_fcn.derivative())
            self.layers[i].backpropagation(x_in, outs[i], learning_rate)

        #    def backpropagation(self, x, z, learning_rate):

    def _train(self, x, y):
        for e in range(self.epochs):
            outs, out = self._forward(x)
            real_out = out[:,-1,:] #마지막 timestamp의 값을 가져온다.
            loss = self.loss_fcn(real_out, y)
            #outs.append(loss) #각 layer의 역전파를 위해 쓰일 배열이다.
            self._backpropagation(x, outs, loss, self.learning_rate)

            print(f'{e+1}번째 epoch의 loss는 {loss}')

In [39]:
train = Train
train(x,y, n_node=8)

0th layer input_shape((2, 3, 4),hidden dim 8)
1th layer input_shape((2, 3, 8),hidden dim 8)
2th layer input_shape((2, 3, 8),hidden dim 1)
0th timestamp, x[t] shape (2, 4), h[t] shape (2, 8), self.weight[t] shape (12, 8)
new_x shape (2, 12) bias shape (8,)
1th timestamp, x[t] shape (2, 4), h[t] shape (2, 8), self.weight[t] shape (12, 8)
new_x shape (2, 12) bias shape (8,)
2th timestamp, x[t] shape (2, 4), h[t] shape (2, 8), self.weight[t] shape (12, 8)
new_x shape (2, 12) bias shape (8,)
0th timestamp, x[t] shape (2, 8), h[t] shape (2, 8), self.weight[t] shape (16, 8)
new_x shape (2, 16) bias shape (8,)
1th timestamp, x[t] shape (2, 8), h[t] shape (2, 8), self.weight[t] shape (16, 8)
new_x shape (2, 16) bias shape (8,)
2th timestamp, x[t] shape (2, 8), h[t] shape (2, 8), self.weight[t] shape (16, 8)
new_x shape (2, 16) bias shape (8,)
0th timestamp, x[t] shape (2, 8), h[t] shape (2, 1), self.weight[t] shape (9, 1)
new_x shape (2, 9) bias shape (1,)
1th timestamp, x[t] shape (2, 8), h[t]

<__main__.Train at 0x7cafd4ae9ae0>