# Neural Network

In [239]:
import sklearn as sk
import numpy as np
import pandas as pd
import time
from sklearn.cross_validation import KFold
from sklearn import tree
from sklearn import ensemble
from sklearn import datasets
import theano.tensor as th
from scipy import misc
import copy
from numpy.random import uniform
from numpy.random import normal
from sklearn.datasets import load_iris
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_classification
from math import copysign

import matplotlib.pylab as plt
%matplotlib inline

In [240]:
class network:
    def __init__(self, layers_list, learning_rate, alpha, activation_functions, cost_func, mode, \
                 cou_iter, early_stop, regularization, reg_param, batch_size):
        self.layers_count = len(layers_list)
        self.weight_list = []
        self.learning_rate = learning_rate
        self.layers_list = layers_list
        self.alpha = alpha
        self.cost_func = cost_func
        self.activation_functions = activation_functions
        self.mode = mode
        self.batch_size = batch_size
        self.cou_iter = cou_iter
        self.early_stop = early_stop
        self.regularization = regularization
        self.reg_param = reg_param
        for i in range(1, self.layers_count):
            m = np.asarray(normal(0, 0.15, (self.layers_list[i - 1] + 1, self.layers_list[i])))
            self.weight_list.append(m)
            
    
    def sigmoid(self, x):
        one = np.ones(len(x))
        val = one / (one + np.exp(-x * self.alpha))
        for i in range(len(val)):
            if (val[i] == 0):
                val[i] = 0.00001
            elif (val[i] == 1):
                val[i] = 0.99999
        return one / (one + np.exp(-x * self.alpha))
    
    def der_sigmoid(self, x):
        one = np.ones(len(x))
        return (one - self.sigmoid(x)) * self.sigmoid(x) * self.alpha
    
    def x(self, x):
        return x
    
    def der_x(self, x):
        return np.ones(len(x))
    
    def hyp_tg(self, x):
        return (np.exp(self.alpha * x) - np.exp(-self.alpha * x)) / (np.exp(self.alpha * x) + np.exp(-self.alpha * x))
    
    def der_hyp_tg(self, x):
        return self.alpha * (1 - (self.hyp_tg(x)) ** 2)
    
    def logistic_cost(self, y_true, y_pred):
        val = 0
        if (self.mode == 'class'):
            z = np.zeros(len(y_pred))
            z[y_true] = 1
            one = np.ones(len(y_pred))
            y_true = z.copy()
            val = -np.sum(y_true * np.log(y_pred) + (one - y_true) * np.log(one - y_pred))
        elif (self.mode == 'reg'):
            val = -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return val
    
    def der_logistic_cost(self, y_true, y_pred):
        val = 0
        if (self.mode == 'class'):
            z = np.zeros(len(y_pred))
            z[y_true] = 1
            one = np.ones(len(y_pred))
            y_true = z
            val = ((one*1.0 - y_true*1.0) / (one - y_pred*1.0) - (y_true*1.0 / y_pred))
        elif (self.mode == 'reg'):
            val = ((1.0 - y_true) / (1.0 - y_pred) - y_true / y_pred)
        return val
    
    def square_cost(self, y_true, y_pred):
        return  0.5*((y_true - y_pred) ** 2.0)
    
    def der_square_cost(self, y_true, y_pred):
        return (y_pred - y_true)
    
    def add_first_layer(self, x_shape):
        self.weight_list.insert(0, np.asarray(normal(0, 0.15, (x_shape, self.layers_list[0]))))
    
    def predict(self, x):
        x = np.asarray(x)
        x = np.insert(x, 0, [-1], axis = 1)
        ans = []
        for s_num, sample in enumerate(x):
            v = sample
            for num, layer in enumerate(self.weight_list):
                v = np.dot(layer.T, v)
                if (self.activation_functions[num] == 'sigmoid'):
                    v = self.sigmoid(v)
                elif (self.activation_functions[num] == 'hyp_tg'):
                    v = self.hyp_tg(v)
                elif (self.activation_functions[num] == 'x'):
                    v = self.x(v)
                if (num != self.layers_count - 1):
                    v = np.insert(v, 0, [-1])
            if (self.mode == 'class'):
                ans.append(np.argmax(v))
            elif (self.mode == 'reg'):
                ans.append(v)
        return ans
    
    
    def fit(self, x, y):
        y = np.asarray(y)
        x = np.insert(np.asarray(x), 0, -1, axis = 1)
        self.add_first_layer(x.shape[1])
        k = 0
        total_err = np.inf
        x_size = len(x)
        while (k < self.cou_iter and (total_err / x_size) > self.early_stop):
            print k, total_err / len(x)
            total_err = 0.0
            x, y = shuffle(x, y)
            for s_num, sample in enumerate(x):
                neuron_sum = []
                neuron_out = []
                neuron_out.append(np.asarray(sample, dtype = np.float))
                v = sample
                for num, layer in enumerate(self.weight_list):
                    v = np.dot(layer.T, v)
                    neuron_sum.append(v)
                    if (self.activation_functions[num] == 'sigmoid'):
                        v = self.sigmoid(v)
                    elif (self.activation_functions[num] == 'hyp_tg'):
                        v = self.hyp_tg(v)
                    elif(self.activation_functions[num] == 'x'):
                        v = self.x(v) 
                    if (num != self.layers_count - 1):
                        v = np.insert(v, 0, [-1])
                    neuron_out.append(v)

                if (self.cost_func == "logistic"):
                    total_err += self.logistic_cost(y[s_num], v)
                elif (self.cost_func == "square"):
                    total_err += self.square_cost(y[s_num], v)
                    
                if (neuron_out[-1].shape[0] == 1):
                    neuron_out[-1] = neuron_out[-1][0]

                if (self.cost_func == "logistic" and self.activation_functions[-1] == "sigmoid"):
                    der_neuron = [(self.der_logistic_cost(y[s_num], neuron_out[-1]) * self.der_sigmoid(neuron_sum[-1]))]
                elif (self.cost_func == "logistic" and self.activation_functions[-1] == "hyp_tg"):
                    der_neuron = [(self.der_logistic_cost(y[s_num], neuron_out[-1]) * self.der_hyp_tg(neuron_sum[-1]))]
                elif (self.cost_func == 'logistic' and self.activation_functions[-1] == "x"):
                    der_neuron = [(self.der_logistic_cost(y[s_num], neuron_out[-1]) * self.der_x(neuron_sum[-1]))]
                #elif(self.cost_func == "square" and self.activation_functions[-1] == "sigmoid"):
                #    der_neuron = [(self.der_square_cost(y[s_num], neuron_out[-1]) * self.der_sigmoid(neuron_sum[-1]))]
                #elif (self.cost_func == "square" and self.activation_functions[-1] == "hyp_tg"):
                #    der_neuron = [(self.der_square_cost(y[s_num], neuron_out[-1]) * self.der_hyp_tg(neuron_sum[-1]))]
                elif (self.cost_func == 'square' and self.activation_functions[-1] == "x"):
                    der_neuron = [(self.der_square_cost(y[s_num], neuron_out[-1]) * self.der_x(neuron_sum[-1]))]
                
                num = 1
                for layer in reversed(self.weight_list[1:]):
                    if (self.activation_functions[self.layers_count - num - 1] == 'sigmoid'):
                        der_neuron.insert(0, np.dot(layer[1:], der_neuron[0]) * self.der_sigmoid(neuron_sum[len(neuron_sum) - num - 1]))
                    elif (self.activation_functions[self.layers_count - num - 1] == 'hyp_tg'):
                        der_neuron.insert(0, np.dot(layer[1:], der_neuron[0]) * self.der_hyp_tg(neuron_sum[len(neuron_sum) - num - 1]))
                    elif (self.activation_functions[self.layers_count - num - 1] == 'x'):
                        der_neuron.insert(0, np.dot(layer[1:], der_neuron[0]) * self.der_x(neuron_sum[len(neuron_sum) - num - 1]))
                    num += 1

                for layer_num in range(self.layers_count):
                    if (self.regularization == 'l1'):
                        self.weight_list[self.layers_count - layer_num - 1] -= (self.learning_rate * \
                        np.dot(neuron_out[len(neuron_out) - layer_num - 2].T.reshape(-1, 1), der_neuron[len(der_neuron) - layer_num - 1].reshape(1, -1)) +\
                        self.reg_param * np.sign(self.weight_list[self.layers_count - layer_num - 1]))
                    elif (self.regularization == 'l2'):
                        self.weight_list[self.layers_count - layer_num - 1] -= (self.learning_rate * \
                        np.dot(neuron_out[len(neuron_out) - layer_num - 2].T.reshape(-1, 1), der_neuron[len(der_neuron) - layer_num - 1].reshape(1, -1)) +\
                        self.reg_param * self.weight_list[self.layers_count - layer_num - 1])
                
            k += 1

            
            
                    

# Test

In [245]:
iris = load_iris()
x = iris.data
y = iris.target

x, y = shuffle(x, y)

xtest = x[100:]
ytest = y[100:]
x = x[:100]
y = y[:100]

nt = network([4, 3], 0.05, 1.0, ['sigmoid', 'sigmoid'], "logistic", 'class', 80, early_stop=0.0, \
             regularization='l2', reg_param = 0.0001)
nt.fit(x, y)
ypred = nt.predict(xtest)
print ypred, ytest
print accuracy_score(ytest, ypred)

0 inf
1 1.95702077974
2 1.8502977456
3 1.64231446045
4 1.35483462012
5 1.17016997601
6 1.06944915984
7 1.01256850445
8 0.991419063763
9 0.951496789614
10 0.944787312325
11 0.912723261623
12 0.900291129918
13 0.858676313468
14 0.859748492182
15 0.828222201933
16 0.813894557096
17 0.793053377703
18 0.749793930714
19 0.715163983958
20 0.663819470042
21 0.639873875553
22 0.681074250077
23 0.577762579758
24 0.62680603369
25 0.543553970065
26 0.540371118647
27 0.585805296481
28 0.51759451564
29 0.526848106402
30 0.494043430206
31 0.490791252456
32 0.497564258519
33 0.448646820876
34 0.459715749229
35 0.475461785909
36 0.431817660657
37 0.446850483883
38 0.499651464386
39 0.42365224046
40 0.492762003774
41 0.330922026811
42 0.446006873848
43 0.430481576834
44 0.385879346101
45 0.403416806144
46 0.345628146415
47 0.367719214491
48 0.324810633659
49 0.392820301072
50 0.430649036295
51 0.455071564366
52 0.365080237777
53 0.366712654
54 0.351826715111
55 0.366310804779
56 0.372704382971
57 0.3662

In [249]:
x, y = make_classification(n_samples=500, n_features=20, n_informative=10, n_classes=4)
xtest = x[450:]
ytest = y[450:]
x = x[:450]
y = y[:450]
nt = network([35, 4], 0.1, 1.0, ['sigmoid', 'sigmoid'], "logistic", 'class', 50, early_stop=0.0, \
             regularization='l2', reg_param = 0.001)
nt.fit(x, y)

ypred = nt.predict(xtest)
print accuracy_score(ypred, ytest)

0 inf
1 1.92145529992
2 1.81168815265
3 1.72375462834
4 1.68614000415
5 1.64731402645
6 1.6708523839
7 1.60123567287
8 1.59328521091
9 1.53129508097
10 1.5737240934
11 1.57158593011
12 1.56490795902
13 1.57202521308
14 1.55676504571
15 1.55648045632
16 1.5539984295
17 1.53274492044
18 1.48680449782
19 1.5714621444
20 1.52672902506
21 1.51611052873
22 1.53867607624
23 1.53428500727
24 1.53639165945
25 1.51738589156
26 1.53705665368
27 1.57563606697
28 1.54895920731
29 1.57481665791
30 1.54717906365
31 1.53923310733
32 1.53634314475
33 1.51338633908
34 1.53865759821
35 1.52604302711
36 1.50671125132
37 1.51454112846
38 1.53907060538
39 1.53379944575
40 1.5545107563
41 1.55847176115
42 1.54246939833
43 1.54140731388
44 1.50902548136
45 1.50053150963
46 1.52303802434
47 1.53624328595
48 1.5616741874
49 1.52246462643
0.7


# Read Data

In [250]:
im = misc.imread('data/big_alphabet_29x29/mutant-0-0-0.bmp', flatten='grey')
alphabet_size = 26
im_size = 29

In [251]:
x = []
y = []
x_test = []
for letter in range(alphabet_size):
    for i in range(9):
        path = "data/big_alphabet_29x29/mutant-" + str(letter) + "-" + str(i) + "-0.bmp"
        im = misc.imread(path, flatten='grey')
        x.append(im.reshape(im_size * im_size))
        x[-1] /= 255
        y.append(letter)
for letter in range(alphabet_size):
    path = "data/big_alphabet_29x29/class-" + str(letter) + ".bmp"
    im = misc.imread(path, flatten='grey')
    x_test.append(im.reshape(im_size * im_size))
    x_test[-1] /= 255
ytest = np.arange(26)

In [259]:
nt = network(layers_list=[43, 26], learning_rate=0.03, alpha=1.0, \
             activation_functions=['sigmoid', 'sigmoid'], cost_func="logistic", \
             mode='class', cou_iter=100, early_stop=1.5, regularization = 'l2', reg_param = 0.0001)
nt.fit(x, y)

ypred = nt.predict(x_test)
print ypred
print accuracy_score(ytest, ypred)

0 inf
1 4.67422143208
2 4.3179979001
3 4.01570014272
4 3.67543148333
5 3.26670565283
6 2.95915974769
7 2.66005716986
8 2.37449560188
9 2.13987030762
10 1.91283366665
11 1.74171563305
12 1.57693752938
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
1.0
