In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Problem 1 Forward propagation implementation of SimpleRNN

In [3]:
class SimpleRNN:
    def __init__(self, n_nodes, initializer, optimizer, activation, debug=False):
       #Create node optimization / initialization instance
        self.optimizer = optimizer
        self.initializer = initializer
        self.activation  = activation

        self.n_nodes = n_nodes
        self.debug = debug

        #Initialize node bias
        if self.debug:
          self.b = np.array([1, 1, 1, 1])
        else:
          self.b = self.initializer.B(n_nodes, )

    def forward(self, X):
        """
        X: Input (batch_size, n_sequences, n_features)

        """
        batch_size  = X.shape[0]
        n_sequences = X.shape[1]
        n_features  = X.shape[2]

        if self.debug:
          self.w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100
          self.w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100
        else:
          self.w_x = self.initializer.W(n_features, self.n_nodes)
          self.w_h = self.initializer.W(self.n_nodes, self.n_nodes)

        h = np.zeros((n_sequences+1, batch_size, self.n_nodes))

        for i in range(n_sequences):
           a = X[:, i, :] @ self.w_x + h[i, :, :] @ self.w_h + self.b
           h[i+1, :, :] = self.activation.forward(a)
        print(h)
        return h[-1, :, :]

    def backward(self, dA):
        """
        No implementation
        """
        
        return dZ


In [4]:
#Initialization class
class SimpleInitializer:
    def __init__(self, sigma):
        self.sigma = sigma
        
    def W(self, n_nodes1, n_nodes2):
        W = self.sigma * np.random.randn(n_nodes1, n_nodes2)
        return W
    
    def B(self, n_nodes2):
        B = self.sigma * np.random.randn(n_nodes2, )
        return B

In [5]:
#Optimization method
class SGD:
    def __init__(self, lr):
        self.lr = lr
    def update(self, layer):
        layer.B -= self.lr * layer.dB
        layer.W -= self.lr * layer.dW

In [7]:
#Activation function
class Sigmoid:
    """
    Sigmoid function class
    """  
    def __init__(self):
        pass
    
    def forward(self, X):
        self.A = X
        return 1 / (1 + np.exp(-X))
    
    def backward(self, X):
        return X * (1- self.forward(self.A)) * self.forward(self.A)

In [9]:
class Tanh:
    """
    Tanh function class
    """
    def __init__(self):
        pass
    
    def forward(self, X):
        self.A = X
        return np.tanh(X)
    
    def backward(self, X):
        return X * (1 - self.forward(self.A)**2)

In [10]:
class Softmax:
    def __init__(self):
        pass
    
    def forward(self, X):
        X = X - np.max(X)
        return np.exp(X) / np.sum(np.exp(X), axis=1, keepdims=True)
    
    def backward(self, X, y):
        batch_size = len(X)
        delta = 1e-7
        
        self.loss = -np.sum(y * np.log(X+delta)) / batch_size
        return X - y

In [11]:
class ReLU:
    """
   ReLU function class
    """
    def forward(self, X):
        self.A = X
        return np.maximum(0, X)
    
    def backward(self, X):
        return X * (self.A > 0)

# Problem 2 Experiment of forward propagation with small sequence

In [12]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes)) # (batch_size, n_nodes)
b = np.array([1, 1, 1, 1]) # (n_nodes,)

In [16]:
#Calculate only the forward part
#1st series
activation = Tanh()
a1 = x[:, 0, :] @ w_x + h @ w_h + b
h1 = activation.forward(a1)
#2nd series
a2 = x[:, 1, :] @ w_x + h1 @ w_h + b
h2 = activation.forward(a2)
#3rd series
a3 = x[:, 2, :] @ w_x + h2 @ w_h + b
h3 = activation.forward(a3)
h3

print(h1)
print(h2)
print(h3)

[[0.79483716 0.81815261 0.839056   0.85545658]]
[[0.79497284 0.81839951 0.83938892 0.85583371]]
[[0.79513154 0.81868106 0.83976521 0.85625949]]


In [18]:
#Initialization class
initializer = SimpleInitializer(sigma=0.1)
#Optimization class
optimizer   = SGD(lr=0.1)
#Activation function class
activation  = Tanh()

#Modeling
rnn = SimpleRNN(n_nodes, initializer=initializer, optimizer=optimizer, activation=activation, debug=True)
rnn.forward(x)

[[[0.         0.         0.         0.        ]]

 [[0.76188798 0.76213958 0.76239095 0.76255841]]

 [[0.792209   0.8141834  0.83404912 0.84977719]]

 [[0.79494228 0.81839002 0.83939649 0.85584174]]]


array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])

# Problem 3 (Advance task) Implementation of backpropagation



Implement backpropagation.

Since the inside of the RNN is a combination of fully connected layers, the update formula is the same as for fully connected layers.

No implementation ...