EPOCH_NUM = 50
BATCH_SIZE = 32
hid_state_size = 128
out_size = 6
T = 150

LR_arr = [0.07]#, 0.1]
hid_layer_size_arr = [64]#16, 32, 64, 128, 256]
alpha_arr = [0.85]#, 0.50 ,0.85]

In [1]:
import h5py as h5
import numpy as np
from matplotlib import pyplot as plt

In [2]:
# Import datasets
f = h5.File("assign3_data3.h5", "r")

# Convert them to np array
trainX = np.array(f['trX'])
testX = np.array(f['tstX'])
trainY = np.array(f['trY'])
testY = np.array(f['tstY'])
f.close()

train_sample = np.random.choice(3000, 2700, replace=False)
train_sample = train_sample.reshape((train_sample.shape[0]))
validation_sample = np.array(list(set(range(3000)) - set(train_sample.reshape((2700)))))
X_train = trainX.reshape((trainX.shape[0], trainX.shape[2], trainX.shape[1]))[train_sample]
X_val = trainX.reshape((trainX.shape[0], trainX.shape[2], trainX.shape[1]))[validation_sample]
y_train = trainY[train_sample]
y_val = trainY[validation_sample]
X_test = testX.reshape((testX.shape[0], testX.shape[2], testX.shape[1]))
y_test = testY
X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape
X_train = X_train/4
X_val = X_val/4
X_test = X_test/4

In [3]:
def sigmoid(X):
    return 1 / (1 + np.exp(-1 * X))

def sigmoid_backward(X):
    return np.multiply(X, 1 - X)

def softmax(Z):
    e = np.exp(Z)
    row_sum = np.sum(e, axis=1).reshape((e.shape[0], 1))
    return e / row_sum

def RELU(X):
    return np.multiply(X, X > 0)

def RELU_backward(X):
    return 1*(X > 0)

def tanh(X):
    return ( np.exp(2*X) - 1 ) / ( np.exp(2 * X) + 1)

def tanh_backward(X):
    return 1 - X**2

def unison_shuffled_copies(a, b):
    assert a.shape[0] == b.shape[0]
    p = np.random.permutation(a.shape[0])
    return a[p], b[p]

def initialize_weights(in_size, hid_state_size, hid_layer_size, out_size):
    #https://stackoverflow.com/questions/44883861/initial-bias-values-for-a-neural-network
    W = {}
    U = {}
    b = {}
    
    t_x = (6/(in_size+hid_state_size))**(1/2)
    t_h = (3/hid_state_size)**(1/2)
    W['z'] = np.random.uniform(low=-1*t_h, high=t_h, size=(hid_state_size, hid_state_size))
    U['z'] = np.random.uniform(low=-1*t_x, high=t_x, size=(in_size, hid_state_size))
    b['z'] = np.zeros((1, hid_state_size))
    #np.zeros((1, hid_state_size))
    W['r'] = np.random.uniform(low=-1*t_h, high=t_h, size=(hid_state_size, hid_state_size))
    U['r'] = np.random.uniform(low=-1*t_x, high=t_x, size=(in_size, hid_state_size))
    b['r'] = np.zeros((1, hid_state_size))
    #np.zeros((1, hid_state_size))
    W['h'] = np.random.uniform(low=-1*t_h, high=t_h, size=(hid_state_size, hid_state_size))
    U['h'] = np.random.uniform(low=-1*t_x, high=t_x, size=(in_size, hid_state_size))
    b['h'] = np.zeros((1, hid_state_size))
    
    t = (6/(hid_state_size + hid_layer_size))**(1/2)
    W['1'] = np.random.uniform(low=-1*t, high=t, size=(hid_state_size, hid_layer_size))
    b['1'] = np.zeros((1, hid_layer_size))

    t = (6/(hid_layer_size + out_size))**(1/2)
    W['2'] = np.random.uniform(low=-1*t, high=t, size=(hid_layer_size, out_size))
    b['2'] = np.zeros((1, out_size))
    
    return W, U, b

def predict(X_test, y_test, T, h_init, W, U, b): # Counts true if the true word is in the top ten predictions
    Z, R, H_, H, hidden, outputs = forward(X_test, h_init, W, U, b, T)
    correct = np.argmax(y_test, axis=1)
    predictions = np.argmax(outputs, axis=1)
    return np.sum(correct == predictions) / y_test.shape[0]

def calculate_errors(o, d):
    target_predictions = np.multiply(o, d) # Only target class probs
    target_predictions = np.sum(target_predictions, axis=1) 
    return -1 * np.sum(np.log(target_predictions)) # -yi.log(pi)

In [4]:
def cell_forward(h_t_1, x_t, W, U, b):
    z_t = sigmoid(x_t.dot(U['z']) + h_t_1.dot(W['z']) + b['z'])
    r_t = sigmoid(x_t.dot(U['r']) + h_t_1.dot(W['r']) + b['r'])
    h_t_ = tanh(x_t.dot(U['h']) + np.multiply(r_t, h_t_1.dot(W['h'])) + b['h'])
    h_t = np.multiply(1-z_t, h_t_1) + np.multiply(z_t, h_t_)
    return z_t, r_t, h_t_, h_t

<img src='https://image.slidesharecdn.com/dlsl2017d2l2recurrentneuralnetworksi-170125171004/95/recurrent-neural-networks-i-d2l2-deep-learning-for-speech-and-language-upc-2017-31-638.jpg?cb=1485365064'>

In [5]:
def cell_backward(delta_h_t, h_t_1, z_t, r_t, h_t_, x_t, W, U, b):
    U_grad = {}
    W_grad = {}
    b_grad = {}
    
    e = np.multiply(delta_h_t, z_t)
    delta_h_t_ = np.multiply(e, tanh_backward(h_t_))
    U_grad['h'] = x_t.transpose().dot(delta_h_t_)
    W_grad['h'] = h_t_1.transpose().dot(np.multiply(delta_h_t_, r_t))
    b_grad['h'] = np.ones((x_t.shape[0], 1)).transpose().dot(delta_h_t_)
    
    e = np.multiply(delta_h_t_, h_t_1.dot(W['h']))
    delta_r_t = np.multiply(e, sigmoid_backward(r_t))
    U_grad['r'] = x_t.transpose().dot(delta_r_t)
    W_grad['r'] = h_t_1.transpose().dot(delta_r_t)
    b_grad['r'] = np.ones((x_t.shape[0], 1)).transpose().dot(delta_r_t)
    
    e = np.multiply(delta_h_t, h_t_ - h_t_1)
    #e = np.multiply(delta_h_t, -1 * h_t_1)
    #e = -1 * delta_h_t
    delta_z_t = np.multiply(e, sigmoid_backward(z_t))
    U_grad['z'] = x_t.transpose().dot(delta_z_t)
    W_grad['z'] = h_t_1.transpose().dot(delta_z_t)
    b_grad['z'] = np.ones((x_t.shape[0], 1)).transpose().dot(delta_z_t)
    
    e = delta_z_t.dot(W['z'].transpose())
    e += np.multiply(r_t, delta_h_t_).dot(W['h'].transpose())
    e += np.multiply(delta_h_t, 1 - z_t)
    e += delta_r_t.dot(W['r'].transpose()) 
    delta_h_t_1 = e
    return delta_h_t_1, W_grad, U_grad, b_grad
                     
def backward(x, y, h_init, H, Z, R, H_, hidden, out, W, U, b, T):
    W_grad = {}
    U_grad = {}
    b_grad = {}
    delta_y = out - y
    W_grad['2'] = hidden.transpose().dot(delta_y)
    b_grad['2'] = np.ones((hidden.shape[0], 1)).transpose().dot(delta_y)
    
    e = delta_y.dot(W['2'].transpose())
    delta_hidden = np.multiply(e, RELU_backward(hidden))
    W_grad['1'] = H[:,:,-1].transpose().dot(delta_hidden)
    b_grad['1'] = np.ones((H[:,:,-1].shape[0], 1)).transpose().dot(delta_hidden)
    
    cur_delta_h = delta_hidden.dot(W['1'].transpose())
    
    W_grad['h'] = 0
    W_grad['r'] = 0
    W_grad['z'] = 0
    U_grad['h'] = 0
    U_grad['r'] = 0
    U_grad['z'] = 0
    b_grad['h'] = 0
    b_grad['r'] = 0
    b_grad['z'] = 0
    
    for t in range(T-1, 0, -1):
        cur_delta_h, cur_W_grad, cur_U_grad, cur_b_grad = cell_backward(cur_delta_h, H[:,:,t-1], Z[:,:,t], R[:,:,t], H_[:,:,t], x[:,:,t], W, U, b)
        W_grad['h'] += cur_W_grad['h']
        W_grad['r'] += cur_W_grad['r']
        W_grad['z'] += cur_W_grad['z']
        U_grad['h'] += cur_U_grad['h']
        U_grad['r'] += cur_U_grad['r']
        U_grad['z'] += cur_U_grad['z']
        b_grad['h'] += cur_b_grad['h']
        b_grad['r'] += cur_b_grad['r']
        b_grad['z'] += cur_b_grad['z']
    
    h_grad, cur_W_grad, cur_U_grad, cur_b_grad = cell_backward(cur_delta_h, np.ones((x.shape[0], 1)).dot(h_init), Z[:,:,0], R[:,:,0], H_[:,:,0], x[:,:,0], W, U, b)
    W_grad['h'] += cur_W_grad['h']
    W_grad['r'] += cur_W_grad['r']
    W_grad['z'] += cur_W_grad['z']
    U_grad['h'] += cur_U_grad['h']
    U_grad['r'] += cur_U_grad['r']
    U_grad['z'] += cur_U_grad['z']
    b_grad['h'] += cur_b_grad['h']
    b_grad['r'] += cur_b_grad['r']
    b_grad['z'] += cur_b_grad['z']
    
    h_grad = np.sum(h_grad, axis=0).reshape((1, h_init.shape[1]))
    
    n = x.shape[0]
    for key in W_grad.keys():
        W_grad[key] /= n
    for key in U_grad.keys():
        U_grad[key] /= n
    for key in b_grad.keys():
        b_grad[key] /= n

    h_grad /= n
                     
    return W_grad, U_grad, b_grad, h_grad

In [6]:
def forward(x, h_0, W, U, b, T):
    H = np.zeros((x.shape[0], h_0.shape[1], T))
    Z = np.zeros((x.shape[0], h_0.shape[1], T))
    R = np.zeros((x.shape[0], h_0.shape[1], T))
    H_ = np.zeros((x.shape[0], h_0.shape[1], T))
    
    cur_h = h_0
    for t in range(T):
        z_t, r_t, h_t_, cur_h = cell_forward(cur_h, x[:, :, t], W, U, b)
        H[:,:,t] = cur_h
        Z[:,:,t] = z_t
        R[:,:,t] = r_t
        H_[:,:,t] = h_t_
    
    hidden = RELU(cur_h.dot(W['1']) + b['1'])
    O = softmax(hidden.dot(W['2']) + b['2'])
    
    return Z, R, H_, H, hidden, O

In [7]:
def train(EPOCH_NUM, BATCH_SIZE, LR, alpha, hid_state_size, hid_layer_size, out_size, T, X_train, X_val, y_train, y_val):
    print(f'Train for epoch:{EPOCH_NUM}, batch size:{BATCH_SIZE}, lr: {LR}')

    W, U, b = initialize_weights(3, hid_state_size, hid_layer_size, out_size) # initialize the weights

    # CSE records
    train_CSE = []
    val_CSE = []
    val_acc = []
    delta_prev = None # For momentum
    batch_num = int(np.ceil(X_train.shape[0] / BATCH_SIZE))
    h_init = np.zeros((1, hid_state_size))
    
    patience = 2
    
    for epoch in range(EPOCH_NUM):
        if epoch > 0 and epoch % 1 == 0: # Report metrics
            # TODO
            train_accuracy = predict(X_train, y_train, T, h_init, W, U, b) # Counts true if the true word is in the top ten predictions
            val_accuracy = predict(X_val, y_val, T, h_init, W, U, b)
            print('Epoch:', epoch)
            print('Train CSE:', train_CSE[-1])
            print('Validation CSE:', val_CSE[-1])
            print('Train accuracy:', train_accuracy)
            print('Validation accuracy:', val_accuracy)
        # Shuffle dataset
        shuffled_X, shuffled_y = unison_shuffled_copies(X_train, y_train)

        totalCSE = 0

        beginning = 0

        for i in range(batch_num):
            if i < batch_num - 1:
                X = shuffled_X[beginning: beginning + BATCH_SIZE] 
                y = shuffled_y[beginning: beginning + BATCH_SIZE]
            else:
                X = shuffled_X[beginning: ] 
                y = shuffled_y[beginning: ]
            beginning += BATCH_SIZE
            
            # TODO
            ####### Calculate activations and errors ############
            Z, R, H_, H, hidden, O = forward(X, h_init, W, U, b, T) # Forward pass
            totalCSE += calculate_errors(O, y) # Loss calculation
            #####################################################
            W_grad, U_grad, b_grad, h_grad = backward(X, y, h_init, H, Z, R, H_, hidden, O, W, U, b, T) # Backpropagation algorithm
            ##### Delta calculations for momentum ######
            delta_W_h = -1 * LR * W_grad['h']
            delta_W_z = -1 * LR * W_grad['z']
            delta_W_r = -1 * LR * W_grad['r']
            delta_U_h = -1 * LR * U_grad['h']
            delta_U_z = -1 * LR * U_grad['z']
            delta_U_r = -1 * LR * U_grad['r']
            delta_b_h = -1 * LR * b_grad['h']
            delta_b_z = -1 * LR * b_grad['z']
            delta_b_r = -1 * LR * b_grad['r']
            delta_W_1 = -1 * LR * W_grad['1']
            delta_b_1 = -1 * LR * b_grad['1']
            delta_W_2 = -1 * LR * W_grad['2']
            delta_b_2 = -1 * LR * b_grad['2']
            delta_h = -1 * LR * h_grad
            
            if delta_prev != None:
                delta_W_h += alpha * delta_prev['W_h']
                delta_W_z += alpha * delta_prev['W_z']
                delta_W_r += alpha * delta_prev['W_r']
                delta_U_h += alpha * delta_prev['U_h']
                delta_U_z += alpha * delta_prev['U_z']
                delta_U_r += alpha * delta_prev['U_r']
                delta_b_h += alpha * delta_prev['b_h']
                delta_b_z += alpha * delta_prev['b_z']
                delta_b_r += alpha * delta_prev['b_r']
                delta_W_1 += alpha * delta_prev['W_1']
                delta_W_2 += alpha * delta_prev['W_2']
                delta_b_1 += alpha * delta_prev['b_1']
                delta_b_2 += alpha * delta_prev['b_2']
                delta_h += alpha * delta_prev['h_init']
            delta_prev = {'W_h': delta_W_h, 'W_z': delta_W_z, 'W_r': delta_W_r, 'U_h': delta_U_h, 'U_z': delta_U_z, 'U_r': delta_U_r, 'b_h': delta_b_h, 'b_z': delta_b_z, 'b_r': delta_b_r, 'W_1': delta_W_1, 'W_2': delta_W_2, 'b_1': delta_b_1, 'b_2': delta_b_2, 'h_init': delta_h}
            ###### Update weights ###############
            W['h'] += delta_W_h
            W['z'] += delta_W_z
            W['r'] += delta_W_r
            U['h'] += delta_U_h
            U['z'] += delta_U_z
            U['r'] += delta_U_r
            b['h'] += delta_b_h
            b['z'] += delta_b_z
            b['r'] += delta_b_r
            W['1'] += delta_W_1
            W['2'] += delta_W_2
            b['1'] += delta_b_1
            b['2'] += delta_b_2
            h_init += delta_h
            #####################################

        train_CSE.append(totalCSE / X_train.shape[0])
        Z, R, H_, H, hidden, O = forward(X_val, h_init, W, U, b, T) # Forward pass
        val_CSE.append(calculate_errors(O, y_val) / X_val.shape[0])
        val_acc.append(predict(X_val, y_val, T, h_init, W, U, b))
        """if len(val_CSE) > 1: # Early stopping if validation loss starts to increase or it is stationary
            validation_loss_change = (val_CSE[-1] - val_CSE[-2]) / val_CSE[-2]
            if validation_loss_change > -1*0.001:
                print('Finished at epoch', epoch)
                break"""

    return train_CSE, val_CSE, val_acc, W, U, b, h_init

In [9]:
EPOCH_NUM = 50
BATCH_SIZE = 32
hid_state_size = 128
out_size = 6
T = 150

LR_arr = [0.07]#, 0.1]
hid_layer_size_arr = [64]#16, 32, 64, 128, 256]
alpha_arr = [0.85]#, 0.50 ,0.85]

#results_train = {}
for LR in LR_arr:
    for hid_layer_size in hid_layer_size_arr:
        for alpha in alpha_arr:
            result = train(EPOCH_NUM, BATCH_SIZE, LR, alpha, hid_state_size, hid_layer_size, out_size, T, X_train, X_val, y_train, y_val)
            #results_train[f'{LR}-{hid_layer_size}-{alpha}'] = a

Train for epoch:50, batch size:32, lr: 0.07
Epoch: 1
Train CSE: 1.686240512438913
Validation CSE: 1.6229469420561702
Train accuracy: 0.32
Validation accuracy: 0.37333333333333335
Epoch: 2
Train CSE: 1.5086745616557065
Validation CSE: 1.419842476158575
Train accuracy: 0.3548148148148148
Validation accuracy: 0.39
Epoch: 3
Train CSE: 1.373836245838677
Validation CSE: 1.272731160421746
Train accuracy: 0.46814814814814815
Validation accuracy: 0.4866666666666667
Epoch: 4
Train CSE: 1.268378959328589
Validation CSE: 1.2213311130074709
Train accuracy: 0.5366666666666666
Validation accuracy: 0.5266666666666666
Epoch: 5
Train CSE: 1.1521649209031646
Validation CSE: 1.205376273051254
Train accuracy: 0.5466666666666666
Validation accuracy: 0.5366666666666666
Epoch: 6
Train CSE: 1.078956421413426
Validation CSE: 1.1345607736301127
Train accuracy: 0.585925925925926
Validation accuracy: 0.5333333333333333
Epoch: 7
Train CSE: 1.0981879422690792
Validation CSE: 1.0348406922983424
Train accuracy: 0.5988

In [10]:
import joblib
joblib.dump(result, 'gru_best.joblib')

['gru_best.joblib']

In [11]:
def plot_graph(funcs, labels, title, xlabel, ylabel):
    colors = ['black', 'blue', 'red', 'green']
    min_epoch = 50
    for func in funcs:
        if len(func) < min_epoch:
            min_epoch = len(func)
    for i, func in enumerate(funcs):
        plt.plot(range(min_epoch), func[:min_epoch], color=colors[i], label=labels[i])
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend()
    plt.show()

In [15]:
def confusion_matrix(X, y, T, h_init, W, U, b): # Counts true if the true word is in the top ten predictions
    Z, R, H_, H, hidden, outputs = forward(X, h_init, W, U, b, T)
    correct = np.argmax(y, axis=1)
    predictions = np.argmax(outputs, axis=1)
    conf_matrix = np.zeros((6,6))
    for i, prediction in enumerate(predictions):
        conf_matrix[prediction, correct[i]] += 1
    #print('Confusion matrix:\n')
    #print(conf_matrix)
    return conf_matrix, np.sum(correct == predictions) / y.shape[0]

In [19]:
plot_graph([result[1][:33]], ['Validation error'], 'Validation CSE over Epochs', 'Epoch', 'CSE Error')

In [20]:
cm,acc = confusion_matrix(X_train, y_train, 150, result[6], result[3], result[4], result[5])
print(cm)

[[416.   0.   1.   0.  29.   9.]
 [  4. 449.   0.   0.   1.   1.]
 [  0.   0. 421.   0.   0.   0.]
 [  0.   0.   3. 460.   0.   0.]
 [ 12.   0.  15.   2. 420.   4.]
 [ 14.   0.   0.   0.   5. 434.]]


In [21]:
cm,acc = confusion_matrix(X_test, y_test, 150, result[6], result[3], result[4], result[5])
print(cm)
print(acc)

[[ 69.   6.   0.   0.  31.  12.]
 [  1.  92.   0.   0.   0.   7.]
 [  0.   0. 100.   3.   0.   0.]
 [  0.   0.   0.  85.   3.   0.]
 [ 25.   0.   0.  12.  51.   5.]
 [  5.   2.   0.   0.  15.  76.]]
0.7883333333333333


In [8]:
EPOCH_NUM = 50
BATCH_SIZE = 32
hid_state_size = 128
out_size = 6
T = 150

LR_arr = [0.1]#, 0.1]
hid_layer_size_arr = [64]#16, 32, 64, 128, 256]
alpha_arr = [0.85]#, 0.50 ,0.85]

#results_train = {}
for LR in LR_arr:
    for hid_layer_size in hid_layer_size_arr:
        for alpha in alpha_arr:
            result = train(EPOCH_NUM, BATCH_SIZE, LR, alpha, hid_state_size, hid_layer_size, out_size, T, X_train, X_val, y_train, y_val)
            #results_train[f'{LR}-{hid_layer_size}-{alpha}'] = a

Train for epoch:50, batch size:32, lr: 0.1
Epoch: 1
Train CSE: 1.6664947396908045
Validation CSE: 1.5186891852452986
Train accuracy: 0.3322222222222222
Validation accuracy: 0.36333333333333334
Epoch: 2
Train CSE: 1.4540187800341275
Validation CSE: 1.362450600512562
Train accuracy: 0.3874074074074074
Validation accuracy: 0.4
Epoch: 3
Train CSE: 1.3249429978844256
Validation CSE: 1.2745676234673324
Train accuracy: 0.48518518518518516
Validation accuracy: 0.4633333333333333
Epoch: 4
Train CSE: 1.6558612297090964
Validation CSE: 1.8149323082314375
Train accuracy: 0.1711111111111111
Validation accuracy: 0.12666666666666668
Epoch: 5
Train CSE: 1.795034015897521
Validation CSE: 1.8028969412826934
Train accuracy: 0.1711111111111111
Validation accuracy: 0.12666666666666668
Epoch: 6
Train CSE: 1.7968226510479983
Validation CSE: 1.7961654728215157
Train accuracy: 0.16296296296296298
Validation accuracy: 0.2
Epoch: 7
Train CSE: 1.7970255099791839
Validation CSE: 1.7843186125312427
Train accuracy: 

KeyboardInterrupt: 

In [182]:
def check(EPOCH_NUM, BATCH_SIZE, LR, alpha, hid_state_size, hid_layer_size, out_size, T, X_train, X_val, y_train, y_val):
    W, U, b = initialize_weights(3, hid_state_size, hid_layer_size, out_size) # initialize the weights
    h_init = np.random.uniform(low=-6, high=6, size=(1, hid_state_size))
    #c_init = np.random.uniform(low=-6, high=6, size=(1, hid_state_size))
    
    X = X_train[:32,:,:T]
    y = y_train[:32]
    epsilon = 0.0000001
    
    W_epsilon = {}
    b_epsilon = {}
    b_plus = {}
    W_plus = {}
    U_epsilon = {}
    U_plus = {}
    
    for key in W.keys():
        #print(key)
        W_epsilon[key] = W[key]
        W_plus[key] = W[key]
    for key in U.keys():
        U_epsilon[key] = U[key]
        U_plus[key] = U[key]
    for key in b.keys():
        b_epsilon[key] = b[key]
        b_plus[key] = b[key]
        
    h_epsilon = h_init- epsilon
    h_plus = h_init + epsilon
    #print(c_epsilon)
    #print(c_plus)
    #print(c_init)
    #print(np.sum(W['1']))
    #cur_key = 'z'
    #b_epsilon[cur_key] = b_epsilon[cur_key] - epsilon
    #b_plus[cur_key]= b_plus[cur_key] + epsilon
    #print(epsilon)
    #print(np.sum(W_epsilon['1']), np.sum(W_plus['1']), np.sum(W['1']))
    Z, R, H_, H, hidden, O = forward(X, h_epsilon, W_epsilon, U_epsilon, b_epsilon, T) # Forward pass
    J_1 = calculate_errors(O, y) # Loss calculation
    Z, R, H_, H, hidden, O = forward(X, h_plus, W_plus, U_plus, b_plus, T) # Forward pass
    J_2 = calculate_errors(O, y) # Loss calculation
    Z, R, H_, H, hidden, O = forward(X, h_init, W, U, b, T) # Forward pass
    W_grad, U_grad, b_grad, h_grad = backward(X, y, h_init, H, Z, R, H_, hidden, O, W, U, b, T) # Backpropagation algorithm
    expected = np.sum(h_grad)
    real = (J_2/32 - J_1/32)/(2*epsilon)
    print((real-expected)/(real+expected))
    print(real)
    print(expected)

check(EPOCH_NUM, BATCH_SIZE, LR, alpha, hid_state_size, hid_layer_size, out_size,T, X_train, X_val, y_train, y_val)

-1.0
0.0
-4.240263848881398e-20


In [2]:
EPOCH_NUM = 100
BATCH_SIZE = 32
hid_state_size = 128
out_size = 6
T = 150

LR = 0.01
hid_layer_size = 16 #16, 32, 64, 128, 256]
alpha = 0#, 0.50 ,0.85]

In [None]:
import joblib
rnn_res = joblib.load('rnn_best.joblib')
lstm_res = joblib.load('lstm_best.joblib')
gru_res = joblib.load('gru_best.joblib')

plot_graph([rnn_res[1]], ['RNN'], 'Validation CSE over Epochs', 'Epoch', 'CSE Error')