In [19]:
import numpy as np
from tqdm import tqdm

## 【問題1】SimpleRNNのフォワードプロパゲーション実装

In [173]:
class SimpleRNN():
    def __init__(self, n_features, n_nodes, initializer=None, optimizer=None, activator1=None, activator2=None):
        self.optimizer = optimizer
        self.activator1 = activator1
        self.activator2 = activator2
#         self.Wx = initializer.W(n_features, n_nodes)
#         self.b = initializer.b(n_nodes)
#         self.Wh = initializer.W(n_nodes, n_nodes)
        
    def forward(self, x, h_in):
        self.Z = np.dot(x, self.Wx) + np.dot(h_in, self.Wh) + self.b
        self.h_out = self.activator1.forward(self.Z)
        self.out = self.activator2.forward(self.h_out)
        return self.out, self.h_out
    
    def backward(self, y_true, y_pred, dh_out):
        dh_out = dh_out + self.activator2.backward(y_pred, y_true)
        dZ = self.activator1.backward(dh_out)
        dh_in, dWx, dWh, db = self.optimizer.backward(self, dZ)
        return dh_in, dWx, dWh, db
    
    def update(self, dWx, dWh, db):
        self.optimizer.update(self, dWx, dWh, db)

In [164]:
class SGD:
    def __init__(self, lr=0.001):
        self.lr = lr
        
    def backward(self, layer, dZ):
        db = np.sum(dZ, axis=0)
        dWx = np.dot(layer.x.T, dZ)
        dx = np.dot(dZ, layer.Wx.T)
        dWh = np.dot(layer.h.T, dZ)
        dh = np.dot(dZ, layer.Wh.T)
        return dh, dWx, dWh, db
    
    def update(self, layer, dWx, dWh, db):
        layer.Wx -= self.lr * dWx
        layer.Wh -= self.lr * dWh
        layer.b -= self.lr * db

In [141]:
class Softmax():
    def __init__(self):
        pass
    
    def forward(self, Z):
        if Z.ndim == 2:
            Z = Z.T
            A = (np.exp(Z) / np.sum(np.exp(Z), axis=0)).T
            return A
        A = np.exp(Z) / np.sum(np.exp(Z))
        return A
    
    def backward(self, y_pred, y_true):
        return y_pred - y_true
    
    
class Tanh():
    def __init__(self):
        pass
    
    def forward(self, Z):
        A = np.tanh(Z)
        return A
    
    def backward(self, dA, A):
        return dA * np.square(1 - A)

In [167]:
class ScratchSimpleRNNClassifier():
    def __init__(self, layer, epoch=3):
        self.epoch = epoch
        self.layer = layer
        self.loss_train = []
        self.loss_valid = []

    def train(self, X, y, X_val=None, y_val=None):
        # X shape: (batch_size, n_sequences, n_features)
        n_sequences = X.shape[1]
        self.out_list = []
        self.hout_list = []
        self.dh_list = []
        self.dWx_list = []
        self.dWh_list = []
        self.db_list = []
        
        for _ in tqdm(range(self.epoch)):
            
            # feedforward
            self.h = np.zeros((batch_size, n_nodes))
            h_out = np.zeros((batch_size, n_nodes))
            for n in range(n_sequences):
                out, h_out = self.layer.forward(X[:,n,:], h_out)
                self.out_list.append(out)
                self.hout_list.append(h_out)
                
            # backward
            dh = np.zers((batch_size, n_nodes))
            for n in range(n_sequences):
                dh, dWx, dWh, db = layer.backward(self.out_list[-n-1], dh)
                self.dh_list.append(dh)
                self.dWx_list.append(dWx)
                self.dWh_list.append(dWh)
                self.db_list.append(db)
                
            # update weight
            layer.update(np.sum(dWx), np.sum(dWh), np.sum(db))
#           self.loss_train.append(self.crossentropy(mini_y_train, fout))        
        
    def crossentropy(self, y, y_pred):
        loss = -np.mean(np.sum(y*np.log(y_pred), axis=1))
        return loss
    
            
    def predict(self, X_test):
        out = X_test
        for layer in self.layers:
            out = layer.forward(out)
        return out

## 【問題2】小さな配列でのフォワードプロパゲーションの実験

In [175]:
softmax = Softmax()
tanh = Tanh()
sgd = SGD()

In [169]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes)) # (batch_size, n_nodes)
b = np.array([1, 1, 1, 1]) # (n_nodes,)

In [179]:
rnn = SimpleRNN(2, 4, None, sgd, tanh, softmax)
rnn.Wx = w_x
rnn.Wh = w_h
rnn.b = b
rnn.h = h

In [180]:
rnn_cls = ScratchSimpleRNNClassifier(rnn, 1)

In [181]:
rnn_cls.train(x, None)
rnn_cls.hout_list

100%|██████████| 1/1 [00:00<00:00, 464.02it/s]


[array([[0.76188798, 0.76213958, 0.76239095, 0.76255841]]),
 array([[0.792209  , 0.8141834 , 0.83404912, 0.84977719]]),
 array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])]