In [None]:
import numpy as np
from sympy import Symbol
from collections import deque

In [None]:
# parameter
name = 1 # data pick
hid_num = 2
node = [5, 5]
iter_num = 1000
show_time = 50 
alpha = 1 # gradient extent

In [None]:
# load trainning data
train = np.loadtxt('data/data' + str(name) + '_train.csv')
N = train.shape[0]
X_train = train[:, 0:2]
norm_mean = X_train.mean()
norm_std = X_train.std()
X_train = (X_train - norm_mean) / norm_std # normalization
X_train = np.concatenate((X_train, 1.0 * np.ones([N, 1])), axis = 1) # need to concatenate 1 bias
Y_temp = train[:, 2:3] 
label = np.unique(Y_temp)
n_class = len(label)
node.append(n_class)
Y_train = np.zeros((N, n_class))
for k in range(n_class):
    class_pick = label[k]
    temp = np.ones((N, 1))
    temp = np.where(Y_temp == class_pick, temp, 0)
    Y_train[:, k] = temp[:, 0]

In [None]:
def softmax(X, axis = None):
    X_e = np.exp(X) # N x m
    if axis == None:
        return X_e / np.sum(X_e)
    elif axis == 0:
        return X_e / X_e.sum(axis = 0)
    else:
        return np.transpose(np.transpose(X_e) / np.transpose(X_e.sum(axis = 1))) 

In [None]:
def SMpCE(X, Y, axis = None):
    X_e = np.exp(X) # N x m
    if axis == None:
        SM = X_e / np.sum(X_e)
    elif axis == 0:
        SM = X_e / X_e.sum(axis = 0)
    else:
        SM = np.transpose(np.transpose(X_e) / np.transpose(X_e.sum(axis = 1))) 
    return -(Y * np.log(SM))

In [None]:
def relu(X):
    return np.maximum(0, X)

In [None]:
def deri_cal(func_aim, X, Y, axis = None, scale = 1000):
    X_max = X + 1 / scale
    X_min = X - 1 / scale
    return (func_aim(X_max, Y, axis) - func_aim(X_min, Y, axis)) * scale / 2

In [None]:
# train
layer_num = hid_num + 1 # hidden+output
W_all = deque(maxlen = layer_num)
for j in range(iter_num):
    delta_all = deque(maxlen = layer_num)
    a_all = deque(maxlen = layer_num + 1)
    z_all = deque(maxlen = layer_num)
    # Forward Propagation
    x_in = X_train.copy()
    for i in range(layer_num):
        a_all.append(x_in) # the earliest element is out
        try:
            W = W_all[i] # node[i-1] x node[i]
        except:
            W = np.reshape(np.random.normal(0.0, 1 / np.sqrt(N), x_in.shape[1] * node[i]), (x_in.shape[1], node[i]))   
            W_all.append(W) # initialize
        y_temp = np.dot(x_in, W) # N x node[i]
        z_all.append(y_temp) # the earliest element is out
        y_out = relu(y_temp) # N x node[i]
        x_in = y_out.copy()
    a_all.append(softmax(y_out, 1))
    loss = np.sum(SMpCE(y_out, 1)) # final output
    # Back Propagation
    for i in range(layer_num - 1, -1, -1):
        try:
            delta = (z_all[i] > 0) * np.dot(delta_all[i - 1], np.transpose(W_all[i + 1])) # N x node[i]
        except:
            delta = (z_all[i] > 0) * deri_cal(SMpCE, a_all[i + 1], Y_train, 1) # N x node[i]
        # renew W
        W_all[i] = W_all[i] - alpha * np.dot(np.transpose(a_all[i]), delta)
        delta_all.append(delta) # from output layer to input layer
    if j % show_time == 0:
        print('iteration = ' + str(j) + ',' + 'loss = ' + str(loss) + '\n')