In [1]:
import numpy as np

In [2]:
class Tanh(object):
    
    @staticmethod
    def forward(X_in):
        return np.tanh(X_in)
    
    @staticmethod
    def backward(X_in):
        #dEdX = dEdY * dYdX = dEdY * 1 - (tanh(X))^2
        return 1 - (np.tanh(X_in))**2
    
    @staticmethod
    def backward_calculated(tanh_x_in):
        return 1 - tanh_x_in**2
    
class Sigmoid(object):
    
    @staticmethod
    def forward(x_in):
        return 1./(1 + np.exp(-x_in))
    
    @staticmethod
    def backward(x_in):
        fw = Sigmoid().forward(x_in)
        return fw * (1 - fw)
    
    @staticmethod
    def backward_calculated(sigmoid_x):
        return sigmoid_x * (1 - sigmoid_x)

In [4]:
class GRULayer(object):

    def __init__(self, input_dim, hidden_dim, use_bias=True):
        
        #r_t = sigmoid(W_r_hi.x_t + W_r_hh.h_(t-1) + b_r)
        #z_t = sigmoid(W_z_hi.x_t + W_z_hh.h_(t-1) + b_z)
        #c_t = tanh(W_n_hi.x_t + W_n_hh.h_(t-1) * r_t + b_c)
        #h_t = (1-z_t) * n_t + z_t * h_(t-1)
        
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.use_bias = use_bias

        sq = np.sqrt(1. / hidden_dim)
        # input weights [W_r_hi,W_z_hi,W_c_hi]
        self.input_weights = np.random.uniform(-sq, sq, (3, hidden_dim, input_dim))
        # hidden weights [W_r_hi,W_z_hi,W_c_hi]
        self.hidden_weights = np.random.uniform(-sq, sq, (3, hidden_dim, hidden_dim))

        self.tanh = Tanh
        self.sigmoid = Sigmoid

        self.gates = None
        self.H = None
        self.C = None

        if self.use_bias:
            # bias = (r_ias|fgt_bias|g_bias|out_bias)
            self.bias = np.random.uniform(-sq, sq, (4, hidden_dim))
        else:
            self.bias = np.zeros((4, hidden_dim))

    def forward(self, X_in, h_0=None, c_0=None):
        batch_size = X_in.shape[0]
        seq_len = X_in.shape[1]

        self.H = np.zeros((batch_size, seq_len + 1, self.hidden_dim))
        if h_0 is not None:
            self.H[:, 0, :] = h_0

        self.C = np.zeros((batch_size, seq_len + 1, self.hidden_dim))
        if c_0 is not None:
            self.C[:, 0, :] = c_0

        self.gates = np.zeros((4, batch_size, seq_len, self.hidden_dim))

        for i in range(seq_len):
            # input_gate
            self.gates[0, :, i, :] = self.sigmoid.forward(
                np.dot(X_in[:, i, :], self.input_weights[0, :, :].T) + np.dot(self.H[:, i, :], self.hidden_weights[0, :, :].T) + self.bias[0, :])
            # forget gate
            self.gates[1, :, i, :] = self.sigmoid.forward(
                np.dot(X_in[:, i, :], self.input_weights[1, :, :].T) + np.dot(self.H[:, i, :], self.hidden_weights[1, :, :].T) + self.bias[1, :])
            # c~ gate
            self.gates[2, :, i, :] = self.tanh.forward(
                np.dot(X_in[:, i, :], self.input_weights[2, :, :].T) + np.dot(self.H[:, i, :], self.hidden_weights[2, :, :].T) + self.bias[2, :])
            # output gate
            self.gates[3, :, i, :] = self.sigmoid.forward(
                np.dot(X_in[:, i, :], self.input_weights[3, :, :].T) + np.dot(self.H[:, i, :], self.hidden_weights[3, :, :].T) + self.bias[3, :])

            self.C[:, i + 1, :] = self.gates[1, :, i, :] * self.C[:, i, :] + self.gates[0, :, i, :] * self.gates[2, :, i, :]
            self.H[:, i + 1, :] = self.gates[3, :, i, :] * self.tanh.forward(self.C[:, i + 1, :])

        return self.H, self.H[:, seq_len, :], self.C[:, seq_len, :]