In [2]:
import pandas as pd
import numpy as np

In [3]:
path = "student.xls"

In [4]:
def load_file(file):
    lvl_map = {
        "High" : 0,
        "Middle" : 1,
        "Low" : 2,
        "very_low" : 3,
        "Very Low" : 3
    }
    train_ori = pd.read_excel(file, sheet_name="Training_Data")
    test_ori = pd.read_excel(file, sheet_name="Test_Data")
    train_label = np.array(train_ori.iloc[:, -1].map(lvl_map)).astype(np.int).reshape((-1, ))
    test_label = np.array(test_ori.iloc[:, -1].map(lvl_map)).astype(np.int).reshape((-1, ))
    train_data = np.delete(np.array(train_ori), -1, -1).astype(np.float)
    test_data = np.delete(np.array(test_ori), -1, -1).astype(np.float)
    return train_data, train_label, test_data, test_label, lvl_map

In [5]:
train_data, train_label, test_data, test_label, lvl_map = load_file(path)

ImportError: Missing optional dependency 'xlrd'. Install xlrd >= 1.0.0 for Excel support Use pip or conda to install xlrd.

In [6]:
class FC:
    def __init__(self, W, b, alpha_0, decay, epoch_drop):
        self.W = W.copy()
        self.b = b.copy()
        self.alpha_0 = alpha_0
        self.decay = decay
        self.epoch_drop = epoch_drop
        self.count = 0

    def forward(self, x):
        self.x = x.copy()
        self.m, self.n = x.shape
        return np.dot(self.x, self.W) + self.b

    def backprop(self, back_grad):
        self.grad_W = np.dot(self.x.T, back_grad)
        self.grad_b = np.dot(np.ones(self.m), back_grad)
        self.grad = np.dot(back_grad, self.W.T)
        return self.grad

    def l_rate(self):
        lrate = self.alpha_0 * (self.decay ** (np.floor((1 + self.count) / self.epoch_drop)))
        self.count += 1
        return lrate
    
    def update(self):
        alpha = self.l_rate()
        self.W -= alpha * self.grad_W
        self.b -= alpha * self.grad_b

In [7]:
class Sigmoid:
    def forward(self, x):
        self.x = x.copy()
        self.sig_res = 1 / (1 + np.exp(-x))
        return self.sig_res
    
    def backprop(self, back_grad):
        grad = back_grad * self.sig_res * (1 - self.sig_res)
        return grad

In [8]:
class Relu:
    def forward(self, x):
        self.x = x.copy()
        return np.maximum(x, 0)

    def backprop(self, back_grad):
        grad = back_grad.copy()
        grad[self.x < 0] = 0
        return grad

In [9]:
class Leaky_Relu:
    def forward(self, x):
        self.x = x.copy()
        return np.maximum(x, self.x * 0.01)

    def backprop(self, back_grad):
        grad = back_grad.copy()
        grad[self.x < 0] = grad[self.x < 0] * 0.01
        return grad

In [10]:
class Tanh:
    def forward(self, x):
        self.x = x.copy()
        self.tanh = np.tanh(x)
        return self.tanh
    
    def backprop(self, back_grad):
        grad = back_grad * (1 - self.tanh ** 2)
        return grad

In [11]:
class Arctan:
    def forward(self, x):
        self.x = x.copy()
        return np.arctan(self.x)
    
    def backprop(self, back_grad):
        grad = back_grad / (1 + self.x ** 2)
        return grad

In [12]:
class SoftPlus:
    def forward(self, x):
        self.x = x.copy()
        return np.log(1 + np.exp(self.x))
    
    def backprop(self, back_grad):
        grad = back_grad / (1 + np.exp(-self.x))
        return grad

In [13]:
class SoftSign:
    def forward(self, x):
        self.x = x.copy()
        return self.x / (1 + np.abs(self.x))
    
    def backprop(self, back_grad):
        grad = back_grad / (1 + np.abs(self.x) ** 2)
        return grad

In [14]:
class Softmax:
    def forward(self, x, y):
        self.x = (x.copy() - x.max(axis=1).reshape(-1, 1))
        self.y = y.copy()
        self.m, self.n = self.x.shape
        self.denom = np.sum(np.exp(x), axis=1).reshape((-1, 1))
        self.softmax = np.exp(x) / self.denom
        loss = 0
        for i in range(self.m):
            loss -= np.log(self.softmax[i, y[i]])
        return loss / self.m;

    def dirac(self, a, b):
        return 1 if a == b else 0
    
    def backprop(self):
        grad = np.zeros([self.m, self.n])
        for i in range(self.m):
            for j in range(self.n):
                grad[i, j] = (self.softmax[i, j] - self.dirac(j, self.y[i])) / self.m
        return grad

In [15]:
def bp(train_data, 
       train_label, 
       test_data, 
       test_label, 
       epochs, 
       activation, 
       alpha_0, 
       decay, 
       epochs_drop, 
       result_require=False):
    
    W1 = np.random.randn(5, 16) / np.sqrt(6)
    b1 = np.zeros(16)
    W2 = np.random.randn(16, 4) / np.sqrt(6)
    b2 = np.zeros(4)
    
    activation_function_dict = {
        "arctan"   : Arctan,
        "l_relu"   : Leaky_Relu, 
        "relu"     : Relu, 
        "sigmoid"  : Sigmoid, 
        "tanh"     : Tanh, 
        "softplus" : SoftPlus,
        "softsign" : SoftSign
    }
    
    fc1 = FC(W1, b1, alpha_0, decay, epochs_drop)
    act_f1 = activation_function_dict[activation]()
    fc2 = FC(W2, b2, alpha_0, decay, epochs_drop)
    softmax = Softmax()

    # TRAINING BEGIN
    for i in range(1, epochs + 1):
        h1 = fc1.forward(train_data)
        h2 = act_f1.forward(h1)
        h3 = fc2.forward(h2)
        loss = softmax.forward(h3, train_label)

        if i % (epochs / 5) == 0:
            print("After %d/%d epochs, loss : %f" % (i, epochs, loss))

        h3_grad = softmax.backprop()
        h2_grad = fc2.backprop(h3_grad)
        fc2.update()
        h1_grad = act_f1.backprop(h2_grad)
        x_grad = fc1.backprop(h1_grad)  # x_grad is useless in this supervised learning
        fc1.update()
    # TRAINING FINISH
    
    test_h1 = fc1.forward(test_data)
    test_h2 = act_f1.forward(test_h1)
    test_h3 = fc2.forward(test_h2)
    pred = np.argmax(test_h3, 1)

    acc = np.mean(pred == test_label)

    print('test acc: ', acc)
    
    if result_require == True:
        return pred

In [16]:
activation_function_list = [
    "sigmoid", 
    "tanh", 
    "arctan", 
    "relu", 
    "l_relu", 
    "softplus",
    "softsign"
]

for method in activation_function_list:
    print("Method : %s" % method)
    bp(train_data, 
       train_label, 
       test_data, 
       test_label, 
       epochs=5000,
       activation=method, 
       alpha_0=0.05, 
       decay=0.99, 
       epochs_drop=1000)
    print("------------------------------------")

Method : sigmoid


NameError: name 'train_data' is not defined

In [17]:
res = bp(train_data, 
         train_label, 
         test_data, 
         test_label, 
         epochs=8000,
         activation="softplus", 
         alpha_0=0.1, 
         decay=0.99, 
         epochs_drop=1000, 
         result_require=True)

NameError: name 'train_data' is not defined

In [18]:
rev_lvl_map = {lvl_map.get(k) : k for k in lvl_map.keys()}
np.vectorize(rev_lvl_map.get)(res)

NameError: name 'lvl_map' is not defined