In [1]:
import pandas as pd
import numpy as np

In [2]:
path = "PIDD.xlsx"

In [3]:
def load_data(path):
    train_data_ori = pd.read_excel(path, sheet_name='traindata', header=None)
    train_label = np.array(train_data_ori.iloc[:, -1])
    train_data = np.array(train_data_ori)[:, :-1]
    test_data_ori = pd.read_excel(path, sheet_name='testdata', header=None)
    test_label = np.array(test_data_ori.iloc[:, -1])
    test_data = np.array(test_data_ori)[:, :-1]
    
#     train_mean = train_data.mean(axis=0)
#     test_mean = test_data.mean(axis=0)
#     train_var = train_data.var(axis=0)
#     test_var = test_data.var(axis=0)
#     train_data = (train_data - train_mean) / train_var
#     test_data = (test_data - test_mean) / test_var
    
    train_min = train_data.min(axis=0)
    test_min = test_data.min(axis=0)
    train_max = train_data.max(axis=0)
    test_max = test_data.max(axis=0)
    train_data = (train_data + train_min) / (train_max - train_min)
    test_data = (test_data + test_min) / (test_max - test_min)
    
    return train_data, train_label, test_data, test_label

In [4]:
train_data, train_label, test_data, test_label = load_data(path)

In [5]:
class FC:
    def __init__(self, W, b, alpha_0, decay, epoch_drop):
        self.W = W.copy()
        self.b = b.copy()
        self.alpha_0 = alpha_0
        self.decay = decay
        self.epoch_drop = epoch_drop
        self.count = 0

    def forward(self, x):
        self.x = x.copy()
        self.m, self.n = x.shape
        return np.dot(self.x, self.W) + self.b

    def backprop(self, back_grad):
        self.grad_W = np.dot(self.x.T, back_grad)
        self.grad_b = np.dot(np.ones(self.m), back_grad)
        self.grad = np.dot(back_grad, self.W.T)
        return self.grad

    def l_rate(self):
        lrate = self.alpha_0 * (self.decay ** (np.floor((1 + self.count) / self.epoch_drop)))
        self.count += 1
        return lrate
    
    def update(self):
        alpha = self.l_rate()
        self.W -= alpha * self.grad_W
        self.b -= alpha * self.grad_b

In [6]:
class Sigmoid:
    def forward(self, x):
        self.x = x.copy()
        self.sig_res = 1 / (1 + np.exp(-x))
        return self.sig_res
    
    def backprop(self, back_grad):
        grad = back_grad * self.sig_res * (1 - self.sig_res)
        return grad

In [7]:
class Relu:
    def forward(self, x):
        self.x = x.copy()
        return np.maximum(x, 0)

    def backprop(self, back_grad):
        grad = back_grad.copy()
        grad[self.x < 0] = 0
        return grad

In [8]:
class Leaky_Relu:
    def forward(self, x):
        self.x = x.copy()
        return np.maximum(x, self.x * 0.01)

    def backprop(self, back_grad):
        grad = back_grad.copy()
        grad[self.x < 0] = grad[self.x < 0] * 0.01
        return grad

In [9]:
class Tanh:
    def forward(self, x):
        self.x = x.copy()
        self.tanh = np.tanh(x)
        return self.tanh
    
    def backprop(self, back_grad):
        grad = back_grad * (1 - self.tanh ** 2)
        return grad

In [10]:
class Arctan:
    def forward(self, x):
        self.x = x.copy()
        return np.arctan(self.x)
    
    def backprop(self, back_grad):
        grad = back_grad / (1 + self.x ** 2)
        return grad

In [11]:
class SoftPlus:
    def forward(self, x):
        self.x = x.copy()
        return np.log(1 + np.exp(self.x))
    
    def backprop(self, back_grad):
        grad = back_grad / (1 + np.exp(-self.x))
        return grad

In [12]:
class SoftSign:
    def forward(self, x):
        self.x = x.copy()
        return self.x / (1 + np.abs(self.x))
    
    def backprop(self, back_grad):
        grad = back_grad / (1 + np.abs(self.x) ** 2)
        return grad

In [13]:
class Softmax:
    def forward(self, x, y):
        self.x = (x.copy() - x.max(axis=1).reshape(-1, 1))
        self.y = y.copy()
        self.m, self.n = self.x.shape
        self.denom = np.sum(np.exp(x), axis=1).reshape((-1, 1))
        self.softmax = np.exp(x) / self.denom
        loss = 0
        for i in range(self.m):
            loss -= np.log(self.softmax[i, y[i]])
        return loss / self.m

    def dirac(self, a, b):
        return 1 if a == b else 0
    
    def backprop(self):
        grad = np.zeros([self.m, self.n])
        for i in range(self.m):
            for j in range(self.n):
                grad[i, j] = (self.softmax[i, j] - self.dirac(j, self.y[i])) / self.m
        return grad

In [14]:
def bp(train_data, 
       train_label, 
       test_data, 
       test_label, 
       epochs, 
       activation, 
       alpha_0, 
       decay, 
       epochs_drop, 
       result_require=False):
    
    W1 = np.random.randn(8, 17) / np.sqrt(6)
    b1 = np.zeros(17)
    W2 = np.random.randn(17, 7) / np.sqrt(6)
    b2 = np.zeros(7)
    W3 = np.random.randn(7, 2) / np.sqrt(6)
    b3 = np.random.randn(2)
    
    activation_function_dict = {
        "arctan"   : Arctan,
        "l_relu"   : Leaky_Relu, 
        "relu"     : Relu, 
        "sigmoid"  : Sigmoid, 
        "tanh"     : Tanh, 
        "softplus" : SoftPlus,
        "softsign" : SoftSign
    }
    
    fc1 = FC(W1, b1, alpha_0, decay, epochs_drop)
    act_f1 = activation_function_dict[activation]()
    fc2 = FC(W2, b2, alpha_0, decay, epochs_drop)
    act_f2 = activation_function_dict[activation]()
    fc3 = FC(W3, b3, alpha_0, decay, epochs_drop)
    softmax = Softmax()

    # TRAINING BEGIN
    for i in range(1, epochs + 1):
        h1 = fc1.forward(train_data)
        h2 = act_f1.forward(h1)
        h3 = fc2.forward(h2)
        h4 = act_f2.forward(h3)
        h5 = fc3.forward(h4)
        loss = softmax.forward(h5, train_label)

        if i % (epochs / 5) == 0:
            print("After %d/%d epochs, loss : %f" % (i, epochs, loss))

        h5_grad = softmax.backprop()
        h4_grad = fc3.backprop(h5_grad)
        fc3.update()
        h3_grad = act_f2.backprop(h4_grad)
        h2_grad = fc2.backprop(h3_grad)
        fc2.update()
        h1_grad = act_f1.backprop(h2_grad)
        x_grad = fc1.backprop(h1_grad)  # x_grad is useless in this supervised learning
        fc1.update()
    # TRAINING FINISH
    
    train_h1 = fc1.forward(train_data)
    train_h2 = act_f1.forward(train_h1)
    train_h3 = fc2.forward(train_h2)
    train_h4 = act_f2.forward(train_h3)
    train_h5 = fc3.forward(train_h4)
    train_pred = np.argmax(train_h5, 1)
    train_acc = np.mean(train_pred == train_label)
    print('train acc: ', train_acc)
    
    test_h1 = fc1.forward(test_data)
    test_h2 = act_f1.forward(test_h1)
    test_h3 = fc2.forward(test_h2)
    test_h4 = act_f2.forward(test_h3)
    test_h5 = fc3.forward(test_h4)
    test_pred = np.argmax(test_h5, 1)
    test_acc = np.mean(test_pred == test_label)
    print('test acc: ', test_acc)
    
    if result_require == True:
        return test_pred

In [15]:
activation_function_list = [
    "sigmoid", 
    "tanh", 
    "arctan", 
    "relu", 
    "l_relu", 
    "softplus",
    "softsign"
]

for method in activation_function_list:
    print("Method : %s" % method)
    bp(train_data, 
       train_label, 
       test_data, 
       test_label, 
       epochs=1000,
       activation=method, 
       alpha_0=0.05, 
       decay=0.99, 
       epochs_drop=100)
    print("------------------------------------")

Method : sigmoid
After 200/1000 epochs, loss : 0.643664
After 400/1000 epochs, loss : 0.643231
After 600/1000 epochs, loss : 0.642791
After 800/1000 epochs, loss : 0.642332
After 1000/1000 epochs, loss : 0.641844
train acc:  0.65625
test acc:  0.625
------------------------------------
Method : tanh
After 200/1000 epochs, loss : 0.580397
After 400/1000 epochs, loss : 0.503426
After 600/1000 epochs, loss : 0.468613
After 800/1000 epochs, loss : 0.457285
After 1000/1000 epochs, loss : 0.453795
train acc:  0.784375
test acc:  0.6875
------------------------------------
Method : arctan
After 200/1000 epochs, loss : 0.579688
After 400/1000 epochs, loss : 0.493039
After 600/1000 epochs, loss : 0.461288
After 800/1000 epochs, loss : 0.455138
After 1000/1000 epochs, loss : 0.453355
train acc:  0.78125
test acc:  0.6640625
------------------------------------
Method : relu
After 200/1000 epochs, loss : 0.613596
After 400/1000 epochs, loss : 0.550365
After 600/1000 epochs, loss : 0.507724
After 

In [16]:
res = bp(train_data, 
         train_label, 
         test_data, 
         test_label, 
         epochs=1000,
         activation="l_relu", 
         alpha_0=0.1, 
         decay=0.99, 
         epochs_drop=1000, 
         result_require=True)

After 200/1000 epochs, loss : 0.491370
After 400/1000 epochs, loss : 0.473943
After 600/1000 epochs, loss : 0.462637
After 800/1000 epochs, loss : 0.457157
After 1000/1000 epochs, loss : 0.456410
train acc:  0.7828125
test acc:  0.75
