# Neural Network

In [1]:
import sklearn as sk
import numpy as np
import pandas as pd
import time
from sklearn.cross_validation import KFold
from sklearn import tree
from sklearn import ensemble
from sklearn import datasets
import theano.tensor as th
from scipy import misc
import copy
from numpy.random import uniform
from numpy.random import normal
from sklearn.datasets import load_iris
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_classification
from math import copysign

import matplotlib.pylab as plt
%matplotlib inline

In [46]:
class network:
    def __init__(self, layers_list, learning_rate, alpha, activation_functions, cost_func, mode, \
                 cou_iter, early_stop, regularization, reg_param, batch_size):
        self.layers_count = len(layers_list)
        self.weight_list = []
        self.learning_rate = learning_rate
        self.layers_list = layers_list
        self.alpha = alpha
        self.cost_func = cost_func
        self.activation_functions = activation_functions
        self.mode = mode
        self.batch_size = batch_size
        self.cou_iter = cou_iter
        self.early_stop = early_stop
        self.regularization = regularization
        self.reg_param = reg_param
        for i in range(1, self.layers_count):
            m = np.asarray(normal(0, 0.15, (self.layers_list[i - 1] + 1, self.layers_list[i])))
            self.weight_list.append(m)
            
    
    def sigmoid(self, x):
        one = np.ones(len(x))
        val = one / (one + np.exp(-x * self.alpha))
        for i in range(len(val)):
            if (val[i] == 0):
                val[i] = 0.00001
            elif (val[i] == 1):
                val[i] = 0.99999
        return one / (one + np.exp(-x * self.alpha))
    
    def der_sigmoid(self, x):
        one = np.ones(len(x))
        return (one - self.sigmoid(x)) * self.sigmoid(x) * self.alpha
    
    def x(self, x):
        return x
    
    def der_x(self, x):
        return np.ones(len(x))
    
    def hyp_tg(self, x):
        return (np.exp(self.alpha * x) - np.exp(-self.alpha * x)) / (np.exp(self.alpha * x) + np.exp(-self.alpha * x))
    
    def der_hyp_tg(self, x):
        return self.alpha * (1 - (self.hyp_tg(x)) ** 2)
    
    def logistic_cost(self, y_true, y_pred):
        val = 0
        if (self.mode == 'class'):
            z = np.zeros(len(y_pred))
            z[y_true] = 1
            one = np.ones(len(y_pred))
            y_true = z.copy()
            val = -np.sum(y_true * np.log(y_pred) + (one - y_true) * np.log(one - y_pred))
        elif (self.mode == 'reg'):
            val = -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return val
    
    def der_logistic_cost(self, y_true, y_pred):
        val = 0
        if (self.mode == 'class'):
            z = np.zeros(len(y_pred))
            z[y_true] = 1
            one = np.ones(len(y_pred))
            y_true = z
            val = ((one*1.0 - y_true*1.0) / (one - y_pred*1.0) - (y_true*1.0 / y_pred))
        elif (self.mode == 'reg'):
            val = ((1.0 - y_true) / (1.0 - y_pred) - y_true / y_pred)
        return val
    
    def add_two_lists(self, l1, l2):
        for pos in range(len(l1)):
            l1[pos] += l2[pos]
        return l1
    
    def div_list(self, l1, m):
        for pos in range(len(l1)):
            l1[pos] /= m
        return l1
    
    def square_cost(self, y_true, y_pred):
        return  0.5*((y_true - y_pred) ** 2.0)
    
    def der_square_cost(self, y_true, y_pred):
        return (y_pred - y_true)
    
    def add_first_layer(self, x_shape):
        self.weight_list.insert(0, np.asarray(normal(0, 0.15, (x_shape, self.layers_list[0]))))
    
    def predict(self, x):
        x = np.asarray(x)
        x = np.insert(x, 0, [-1], axis = 1)
        ans = []
        for s_num, sample in enumerate(x):
            v = sample
            for num, layer in enumerate(self.weight_list):
                v = np.dot(layer.T, v)
                if (self.activation_functions[num] == 'sigmoid'):
                    v = self.sigmoid(v)
                elif (self.activation_functions[num] == 'hyp_tg'):
                    v = self.hyp_tg(v)
                elif (self.activation_functions[num] == 'x'):
                    v = self.x(v)
                if (num != self.layers_count - 1):
                    v = np.insert(v, 0, [-1])
            if (self.mode == 'class'):
                ans.append(np.argmax(v))
            elif (self.mode == 'reg'):
                ans.append(v)
        return ans
    
    
    def fit(self, x, y):
        y = np.asarray(y)
        x = np.insert(np.asarray(x), 0, -1, axis = 1)
        self.add_first_layer(x.shape[1])
        k = 0
        total_err = np.inf
        x_size = len(x)
        ind_frag = x_size * 0.9
        x, y = shuffle(x, y)
        test_x = x[ind_frag:]
        test_y = y[ind_frag:]
        x_new = x[:ind_frag]
        y_new = y[:ind_frag]
        err_arr = []
        result_matrix = []
        while (k < self.cou_iter and (len(err_arr) < 10 or err_arr[-1] - err_arr[len(err_arr) - 9] >= self.early_stop)):
            print k, total_err
            total_err = 0.0
            x_new, y_new = shuffle(x_new, y_new)
            balance = self.batch_size
            for s_num, sample in enumerate(x_new):
                now_matrix = []
                if (balance == 0):
                    #print 'I here!!!'
                    #return 0
                    balance = self.batch_size
                    self.weight_list = self.add_two_lists(self.weight_list, self.div_list(result_matrix, self.batch_size))
                    result_matrix = []
                neuron_sum = []
                neuron_out = []
                neuron_out.append(np.asarray(sample, dtype = np.float))
                v = sample
                for num, layer in enumerate(self.weight_list):
                    v = np.dot(layer.T, v)
                    neuron_sum.append(v)
                    if (self.activation_functions[num] == 'sigmoid'):
                        v = self.sigmoid(v)
                    elif (self.activation_functions[num] == 'hyp_tg'):
                        v = self.hyp_tg(v)
                    elif(self.activation_functions[num] == 'x'):
                        v = self.x(v) 
                    if (num != self.layers_count - 1):
                        v = np.insert(v, 0, [-1])
                    neuron_out.append(v)
                
                if (neuron_out[-1].shape[0] == 1):
                    neuron_out[-1] = neuron_out[-1][0]

                if (self.cost_func == "logistic" and self.activation_functions[-1] == "sigmoid"):
                    der_neuron = [(self.der_logistic_cost(y_new[s_num], neuron_out[-1]) * self.der_sigmoid(neuron_sum[-1]))]
                elif (self.cost_func == "logistic" and self.activation_functions[-1] == "hyp_tg"):
                    der_neuron = [(self.der_logistic_cost(y_new[s_num], neuron_out[-1]) * self.der_hyp_tg(neuron_sum[-1]))]
                elif (self.cost_func == 'logistic' and self.activation_functions[-1] == "x"):
                    der_neuron = [(self.der_logistic_cost(y_new[s_num], neuron_out[-1]) * self.der_x(neuron_sum[-1]))]
                elif (self.cost_func == 'square' and self.activation_functions[-1] == "x"):
                    der_neuron = [(self.der_square_cost(y_new[s_num], neuron_out[-1]) * self.der_x(neuron_sum[-1]))]
                
                num = 1
                for layer in reversed(self.weight_list[1:]):
                    if (self.activation_functions[self.layers_count - num - 1] == 'sigmoid'):
                        der_neuron.insert(0, np.dot(layer[1:], der_neuron[0]) * self.der_sigmoid(neuron_sum[len(neuron_sum) - num - 1]))
                    elif (self.activation_functions[self.layers_count - num - 1] == 'hyp_tg'):
                        der_neuron.insert(0, np.dot(layer[1:], der_neuron[0]) * self.der_hyp_tg(neuron_sum[len(neuron_sum) - num - 1]))
                    elif (self.activation_functions[self.layers_count - num - 1] == 'x'):
                        der_neuron.insert(0, np.dot(layer[1:], der_neuron[0]) * self.der_x(neuron_sum[len(neuron_sum) - num - 1]))
                    num += 1

                for layer_num in range(self.layers_count):
                    #if (self.regularization == 'l1'):
                    #    self.weight_list[self.layers_count - layer_num - 1] -= self.learning_rate *( \
                    #    np.dot(neuron_out[len(neuron_out) - layer_num - 2].T.reshape(-1, 1), der_neuron[len(der_neuron) - layer_num - 1].reshape(1, -1)) +\
                    #    self.reg_param * np.sign(self.weight_list[self.layers_count - layer_num - 1]))
                        
                    #elif (self.regularization == 'l2'):
                    #    self.weight_list[self.layers_count - layer_num - 1] -= self.learning_rate *(\
                    #    np.dot(neuron_out[len(neuron_out) - layer_num - 2].T.reshape(-1, 1), der_neuron[len(der_neuron) - layer_num - 1].reshape(1, -1)) +\
                    #    self.reg_param * self.weight_list[self.layers_count - layer_num - 1])
                    #if (balance == self.batch_size):
                    now_matrix.insert(0, -1.0*self.learning_rate* np.dot(neuron_out[len(neuron_out) - layer_num - 2].T.reshape(-1, 1), der_neuron[len(der_neuron) - layer_num - 1].reshape(1, -1)))
                if (len(result_matrix) == 0):
                    result_matrix = list(now_matrix)
                else:
                    result_matrix = self.add_two_lists(result_matrix, now_matrix)
                balance -= 1
                #print len(result_matrix)
                
            k += 1
            y_pred = self.predict(test_x[:, 1:])
            if (self.mode == 'class'):
                total_err = accuracy_score(test_y, y_pred)
            elif (self.mode == 'reg'):
                total_err = mean_squared_error(test_y, y_pred)
            err_arr.append(total_err)
            #print result_matrix
            #return 0
            
            
            
                    

# Test

In [47]:
iris = load_iris()
x = iris.data
y = iris.target

x, y = shuffle(x, y)

xtest = x[110:]
ytest = y[110:]
x = x[:110]
y = y[:110]

nt = network([10, 3], 0.05, 1.0, ['sigmoid', 'sigmoid'], "logistic", 'class', 50, early_stop=-1.0, \
             regularization='l2', reg_param = 0.0, batch_size = 5)
nt.fit(x, y)
ypred = nt.predict(xtest)
print ypred, ytest
print accuracy_score(ytest, ypred)



0 inf
1 0.454545454545
2 0.454545454545
3 0.454545454545
4 0.454545454545
5 0.454545454545
6 0.909090909091
7 0.909090909091
8 0.909090909091
9 0.909090909091
10 0.909090909091
11 0.454545454545
12 0.909090909091
13 0.909090909091
14 1.0
15 0.909090909091
16 1.0
17 1.0
18 0.909090909091
19 1.0
20 1.0
21 1.0
22 1.0
23 1.0
24 1.0
25 1.0
26 1.0
27 1.0
28 1.0
29 1.0
30 1.0
31 1.0
32 1.0
33 1.0
34 1.0
35 1.0
36 1.0
37 1.0
38 1.0
39 1.0
40 1.0
41 1.0
42 1.0
43 1.0
44 1.0
45 1.0
46 1.0
47 1.0
48 1.0
49 1.0
[1, 2, 0, 2, 2, 2, 1, 0, 1, 2, 2, 0, 1, 0, 2, 2, 0, 1, 2, 0, 2, 1, 2, 0, 2, 1, 2, 1, 1, 0, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1] [1 2 0 2 1 2 1 0 1 2 1 0 1 0 2 2 0 1 1 0 2 1 2 0 2 1 1 1 1 0 2 2 1 1 1 2 1
 2 2 1]
0.875


In [50]:
x, y = make_classification(n_samples=500, n_features=20, n_informative=10, n_classes=4)
xtest = x[450:]
ytest = y[450:]
x = x[:450]
y = y[:450]
nt = network([20, 4], 0.01, 1.0, ['sigmoid', 'sigmoid'], "logistic", 'class', 80, early_stop=-1, \
             regularization='l2', reg_param = 0.0, batch_size = 10)
nt.fit(x, y)

ypred = nt.predict(xtest)
print accuracy_score(ypred, ytest)



0 inf
1 0.355555555556
2 0.444444444444
3 0.444444444444
4 0.422222222222
5 0.466666666667
6 0.466666666667
7 0.466666666667
8 0.466666666667
9 0.466666666667
10 0.466666666667
11 0.488888888889
12 0.488888888889
13 0.511111111111
14 0.488888888889
15 0.488888888889
16 0.511111111111
17 0.511111111111
18 0.511111111111
19 0.488888888889
20 0.488888888889
21 0.511111111111
22 0.488888888889
23 0.511111111111
24 0.511111111111
25 0.533333333333
26 0.533333333333
27 0.533333333333
28 0.555555555556
29 0.577777777778
30 0.6
31 0.622222222222
32 0.622222222222
33 0.622222222222
34 0.644444444444
35 0.622222222222
36 0.622222222222
37 0.622222222222
38 0.622222222222
39 0.622222222222
40 0.622222222222
41 0.622222222222
42 0.6
43 0.577777777778
44 0.577777777778
45 0.6
46 0.6
47 0.6
48 0.6
49 0.6
50 0.6
51 0.6
52 0.6
53 0.6
54 0.6
55 0.6
56 0.577777777778
57 0.577777777778
58 0.577777777778
59 0.577777777778
60 0.577777777778
61 0.577777777778
62 0.577777777778
63 0.577777777778
64 0.5777777

# Read Data

In [51]:
im = misc.imread('data/big_alphabet_29x29/mutant-0-0-0.bmp', flatten='grey')
alphabet_size = 26
im_size = 29

In [52]:
x = []
y = []
x_test = []
y_test = []
for letter in range(alphabet_size):
    for i in range(9):
        path = "data/big_alphabet_29x29/mutant-" + str(letter) + "-" + str(i) + "-0.bmp"
        im = misc.imread(path, flatten='grey')
        if (i == 0 or i == 1):
            x_test.append(im.reshape(im_size * im_size))
            x_test[-1] /= 255
            y_test.append(letter)
        else:
            x.append(im.reshape(im_size * im_size))
            x[-1] /= 255
            y.append(letter)
for letter in range(alphabet_size):
    path = "data/big_alphabet_29x29/class-" + str(letter) + ".bmp"
    im = misc.imread(path, flatten='grey')
    x_test.append(im.reshape(im_size * im_size))
    x_test[-1] /= 255
    y_test.append(letter)
x = np.asarray(x)
y = np.asarray(y)
x_test = np.asarray(x_test)
y_test = np.asarray(y_test)
#ytest = np.arange(26)

In [53]:
print x.shape, y.shape, x_test.shape, y_test.shape

(182, 841) (182,) (78, 841) (78,)


In [60]:
nt = network(layers_list=[25, alphabet_size, x.shape[1]], learning_rate=0.01, alpha=1.0, \
             activation_functions=['sigmoid', 'sigmoid','x'], cost_func="logistic", \
             mode='class', cou_iter=800, early_stop=-1, regularization = 'l2', reg_param = 0.05, batch_size = 10)
nt.fit(x, y)

ypred = nt.predict(x_test)
print ypred
print accuracy_score(y_test, ypred)



0 inf
1 0.0
2 0.0526315789474
3 0.0526315789474
4 0.0
5 0.0
6 0.0
7 0.0
8 0.0
9 0.0
10 0.0
11 0.0
12 0.0
13 0.0
14 0.0
15 0.0
16 0.0
17 0.0
18 0.0
19 0.0
20 0.0
21 0.0
22 0.0
23 0.0
24 0.0
25 0.0
26 0.0
27 0.0
28 0.0
29 0.0
30 0.0
31 0.0
32 0.0
33 0.0
34 0.0
35 0.0
36 0.0
37 0.0
38 0.0
39 0.0
40 0.0
41 0.0
42 0.0
43 0.0
44 0.0
45 0.0
46 0.0
47 0.0
48 0.0
49 0.0
50 0.0
51 0.0
52 0.0
53 0.0
54 0.0
55 0.0
56 0.0
57 0.0
58 0.0
59 0.0
60 0.0
61 0.0
62 0.0
63 0.0
64 0.0
65 0.0
66 0.0
67 0.0
68 0.0
69 0.0
70 0.0
71 0.0
72 0.0
73 0.0
74 0.0
75 0.0
76 0.0
77 0.0
78 0.0
79 0.0
80 0.0
81 0.0
82 0.0
83 0.0
84 0.0
85 0.0
86 0.0
87 0.0
88 0.0
89 0.0
90 0.0
91 0.0
92 0.0
93 0.0
94 0.0
95 0.0
96 0.0
97 0.0
98 0.0
99 0.0
100 0.0
101 0.0
102 0.0
103 0.0
104 0.0
105 0.0
106 0.0
107 0.0526315789474
108 0.0
109 0.0
110 0.0526315789474
111 0.0526315789474
112 0.0526315789474
113 0.0526315789474
114 0.0526315789474
115 0.0526315789474
116 0.0526315789474
117 0.0526315789474
118 0.0526315789474
119 0.05263157

In [61]:
print y_test

[ 0  0  1  1  2  2  3  3  4  4  5  5  6  6  7  7  8  8  9  9 10 10 11 11 12
 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23 24 24
 25 25  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22
 23 24 25]
