In [146]:
import numpy as np
import pandas as pd
from scipy.special import softmax,logsumexp

In [147]:
np.random.seed(42)

In [148]:
with open('Train_Arabic_Digit.txt') as f:
    train_lines = f.readlines()
with open('Test_Arabic_Digit.txt') as f:
    test_lines = f.readlines()

In [149]:
def one_hot_encoding(data):
    num_count = set()
    for i in data:
        num_count.add(i)
    data = np.copy(data)
    zeros = np.zeros((data.shape[0],len(num_count)))
    for i in range(data.shape[0]):
        zeros[i,data[i]] = 1
    return zeros

In [150]:
def generate_sample_and_label(data, lens):
    X = list()
    y = [0]*lens
    for i in range(1,10):
        y += [i]*lens
    sample = list()
    for idx,i in enumerate(data):
        if ((i == data[0] and len(sample)>0) or (idx == len(data)-1)):
            X.append(sample)
        if (i == data[0]):
            sample = list()
        else:
            sample.append(i.strip('\n').split(' '))
    return X,y
train_x,train_y = generate_sample_and_label(train_lines,660)
test_x,test_y = generate_sample_and_label(test_lines,220)
train_y_not = np.copy(np.array(train_y))
train_y = one_hot_encoding(train_y)    
test_y = one_hot_encoding(test_y)

In [151]:
def f(input_vec,method):
    if method == 'softmax':
        return softmax(input_vec)
    if method == 'log_softmax':
        return np.exp(input_vec-logsumexp(input_vec))
    elif method == 'relu':
        return np.maximum(0,input_vec)
def f_back(output_vec,method):
    if method == 'softmax':
        return softmax(output_vec) * (1 - softmax(output_vec).T)
    elif method == 'relu':
        return output_vec > 0
def partial_error(pred, gt, method):
    if method == 'cross_entropy':
        return -gt/(pred)

In [152]:
def cross_entropy(y,y_pre):
  loss=-np.sum(y*np.log(y_pre))
  return loss/float(y_pre.shape[0])

In [153]:
def forward(m,b_h,b_o,W_i,W_h,W_o,test_X):
    pred = list()
    index = [i for i in range(len(test_X))]
    for k_index in index:
        h_t = np.zeros(m)
        X = np.copy(np.array(test_X[k_index]).astype(np.float64))
        t = X.shape[0]
        h_t_list = [h_t]
        o_t_list = [W_o.T@h_t_list[0]+b_o]
        for i in range(1,t+1):
            h_t_list.append(np.copy(f(W_i.T@X[i-1]+W_h.T@h_t_list[i-1]+b_h,'relu')))
            #modification for line 13: nevermind, we will just use the last one and ignore or other outputs!
            o_t_list.append(np.copy(f(W_o.T@h_t_list[-1]+b_o,'log_softmax')))
        y = o_t_list[-1]
        pred.append(y)
    return pred

In [154]:
np.random.seed(42)
m = 32
output_num = 10
input_size = 13
max_iter = 46
eta = 1e-3
model_type = 'single_output'
b_h = np.random.rand(m)/1000
b_o = np.random.rand(output_num)/1000
W_i = np.random.rand(input_size, m)/1000
W_h = np.random.rand(m,m)/1000
W_o = np.random.rand(m,output_num)/1000
r = 0
index = [i for i in range(len(train_x))]
while r <= max_iter:
    print(r)
    train_cross_entropy = 0
    right = 0
    np.random.shuffle(index)
    for k_index in index:
        h_t = np.zeros(m)
        X = np.copy(np.array(train_x[k_index]).astype(np.float64))
        y = np.copy(np.array(train_y[k_index]))
        t = X.shape[0]
        h_t_list = [h_t]
        for i in range(1,t+1):
            h_t_list.append(np.copy(f(W_i.T@X[i-1]+W_h.T@h_t_list[i-1]+b_h,'relu')))
        o_t = np.copy(f(W_o.T@h_t_list[-1]+b_o,'log_softmax'))
        train_cross_entropy += cross_entropy(y,o_t)
        right += (np.argmax(o_t) == np.argmax(y))
        if (model_type == 'single_output'):
            grad_o_t = o_t-y
        grad_h_t = np.multiply(f_back(h_t_list[-1],'relu').reshape(-1,1),(np.dot(W_o,grad_o_t)).reshape(-1,1))
        grad_h_t_list = [np.copy(grad_h_t)]
        for j in range(t-1,0,-1):
            grad_h_t_list.insert(0,np.minimum(1,np.multiply(f_back(h_t_list[j],'relu').reshape(-1,1),((W_h@grad_h_t_list[0]).reshape(-1,1)))))
        grad_b_o,grad_W_o = np.copy(grad_o_t),h_t_list[-1].reshape(-1,1)@grad_o_t.reshape(-1,1).T
        grad_b_h,grad_W_h,grad_W_i = np.sum(grad_h_t_list,axis = 0),np.sum([h_t_list[i-1].reshape(-1,1)@grad_h_t_list[i].reshape(-1,1).T for i in range(1,t)],axis = 0),\
        np.sum([X[i].reshape(-1,1)@grad_h_t_list[i].reshape(-1,1).T for i in range(1,t)],axis = 0)
        b_o = b_o-eta*grad_b_o
        W_o = W_o-eta*grad_W_o
        b_h = b_h-eta*grad_b_h.reshape(b_h.shape)
        W_h = W_h-eta*grad_W_h
        W_i = W_i-eta*grad_W_i
    print(f'The average Cross Entropy on training set is {train_cross_entropy/len(index)}')
    print(f'The average accuracy on training set is {right/len(index)}')
    # pred = forward(m,b_h,b_o,W_i,W_h,W_o,test_x)
    # print(f'The accuracy on testing set is {np.sum([np.argmax(pred[i]) == np.argmax(test_y[i]) for i in range(len(test_y))])/2200}')
    # pred = forward(m,b_h,b_o,W_i,W_h,W_o,train_x)
    # print(np.sum(np.array(pred) == np.array(train_y_not)))
    eta = eta*0.95
    r+=1

0
The average Cross Entropy on training set is 0.2302943453005961
The average accuracy on training set is 0.09757575757575758
1
The average Cross Entropy on training set is 0.23029594279246277
The average accuracy on training set is 0.0946969696969697
2
The average Cross Entropy on training set is 0.23027021256490862
The average accuracy on training set is 0.09803030303030304
3
The average Cross Entropy on training set is 0.22969861653702708
The average accuracy on training set is 0.11924242424242425
4
The average Cross Entropy on training set is 0.22234977460995617
The average accuracy on training set is 0.18742424242424244
5
The average Cross Entropy on training set is 0.1943706155121324
The average accuracy on training set is 0.22727272727272727
6
The average Cross Entropy on training set is 0.18197012018893102
The average accuracy on training set is 0.25666666666666665
7
The average Cross Entropy on training set is 0.1786116972374929
The average accuracy on training set is 0.274848

In [157]:
pred = forward(m,b_h,b_o,W_i,W_h,W_o,test_x)
print(f'The accuracy on testing set is {np.sum([np.argmax(pred[i]) == np.argmax(test_y[i]) for i in range(len(test_y))])/2200}')
print(f'The average cross entropy on testing set is {np.sum([cross_entropy(test_y[i],np.array(pred[i])) for i in range(len(test_y))])/len(test_y)}')

The accuracy on testing set is 0.8218181818181818
The average cross entropy on testing set is 0.06271353707666355
