# Introduction

* author : Dan Lim
* language : python
* required library : numpy
* purpose : simple example for neural network

# load dataset

In [1]:
import numpy as np

train = np.loadtxt('trn.txt')
test = np.loadtxt('tst.txt')

X_train = train[:, :-1]
y_train = train[:, -1]
X_test = test[:, :-1]
y_test = test[:, -1]

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(60290, 13)
(60290,)
(18490, 13)
(18490,)


# MultiLayer Perceptron

In [2]:
class MLP(object):
    def __init__(self, hidden_layer_sizes=(100,), learning_rate=0.01, momentum=0.9, batch_size=200,
                 n_epoch=100, seed=24, verbose=False):
        self.hidden_layer_sizes = hidden_layer_sizes
        self.n_hidden_layer = len(hidden_layer_sizes)
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.batch_size = batch_size
        self.n_epoch = n_epoch
        self.seed = seed
        self.verbose = verbose
    def __initialize(self, X):
        np.random.seed(self.seed)
        self.W = [None for _ in range(self.n_hidden_layer+1)] # weigth
        self.b = [None for _ in range(self.n_hidden_layer+1)] # bias
        self.DT = [None for _ in range(self.n_hidden_layer+1)] # delta
        self.Z = [None for _ in range(self.n_hidden_layer+2)] # weighted sum
        self.O = [None for _ in range(self.n_hidden_layer+2)] # neuron output (activated neuron)
        self.WM = [0 for _ in range(len(self.W))] # weight momentum
        self.bM = [0 for _ in range(len(self.b))] # bias momentum
        
        self.W[0] = np.random.normal(0,1, (self.hidden_layer_sizes[0], X.shape[1]))
        for i in range(self.n_hidden_layer-1):
            self.W[i+1] = np.random.normal(0,1, (self.hidden_layer_sizes[i+1], self.hidden_layer_sizes[i]))
        self.W[-1] = np.random.normal(0,1, (1, self.hidden_layer_sizes[-1]))
        
        for i in range(self.n_hidden_layer):
            self.b[i] = np.random.normal(0,1, (self.hidden_layer_sizes[i],1))
        self.b[-1] = np.random.normal(0,1, (1,1))
    def __activate(self, z):
        return 1 / (1 + np.exp(-z))
    def __forward_propagate(self, X):
        self.Z[0] = X.transpose().copy()
        self.O[0] = self.Z[0].copy()
        for i in range(self.n_hidden_layer+1):
            self.Z[i+1] = np.dot(self.W[i], self.O[i]) + self.b[i]
            self.O[i+1] = self.__activate(self.Z[i+1])
        return self.O[-1]
    def __weight_update(self, X, y):
        outputs = self.__forward_propagate(X)
        y = y.reshape(1,-1)
        
        self.DT[-1] = outputs - y
        for i in reversed(range(self.n_hidden_layer)):
            self.DT[i] = self.O[i+1] * (1-self.O[i+1]) * np.dot(self.W[i+1].transpose(), self.DT[i+1])
        
        for i in range(self.n_hidden_layer+1):
            W_prev = self.W[i].copy()
            b_prev = self.b[i].copy()
            self.W[i] = self.W[i] - self.learning_rate * (np.dot(self.DT[i], self.O[i].transpose())
                                                          / X.shape[0]) + self.momentum*self.WM[i]
            self.b[i] = self.b[i] - self.learning_rate * (np.dot(self.DT[i], np.ones((X.shape[0],1)))
                                                          / X.shape[0]) + self.momentum*self.bM[i]
            self.WM[i] = self.W[i] - W_prev
            self.bM[i] = self.b[i] - b_prev
        
    def __log_loss(self, X, y):
        outputs = self.__forward_propagate(X)
        outputs = outputs.reshape(-1)
        outputs = np.clip(outputs, 1e-10, 1-1e-10)
        return -np.mean(y*np.log(outputs) + (1-y)*np.log(1-outputs))
    def fit(self, X, y):
        self.__initialize(X)
        for epoch in range(self.n_epoch):
            for i in range(0, X.shape[0], self.batch_size):
                self.__weight_update(X[i:(i+self.batch_size),:], y[i:(i+self.batch_size)])
            if self.verbose:
                print('epoch[%d/%d], loss = %f' % (epoch+1, self.n_epoch, self.__log_loss(X, y)))
    def predict(self, X):
        outputs = self.__forward_propagate(X)
        outputs = outputs.reshape(-1)
        outputs[outputs>=0.5] = 1
        outputs[outputs<0.5] = 0
        return outputs
    def score(self, X, y):
        predicted = self.predict(X)
        return np.mean(y == predicted)

# selected hyper parameters

* initial weights : randomly drawn from gaussian distribution ~ N(0,1)
* number of layers : 2 hidden layer
* number of nodes in each layers : (13, 40, 20, 1) := (input, hidden1, hidden2, output)
* learning rate : 0.1
* momentum : 0.9
* number of epochs : 100

# train mlp

In [3]:
mlp = MLP(hidden_layer_sizes=(60,30), learning_rate=0.1, momentum=0.9, batch_size=200, n_epoch=100, verbose=False)
mlp.fit(X_train, y_train)

# error rate on test set

In [4]:
score = mlp.score(X_test, y_test)
print('%.2f%%' % ((1 - score) * 100))

16.39%
