In [1]:
import numpy as np
from tqdm import tqdm

data_train = np.genfromtxt('data/train.csv', delimiter=',')
data_test = np.genfromtxt('data/test.csv', delimiter=',')

X_train, y_train = data_train[:,:-1]/255, data_train[:,-1]
X_test, y_test = data_test[:,:-1]/255, data_test[:,-1]

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)


class NeuralNetwork:
    def __init__(self,
                 R=26,
                 eta=0.1,
                 hidden_layers=[50]):
        self.R = R
        self.eta = eta
        self.n_neurons = hidden_layers + [self.R]
        self.n_layers = len(self.n_neurons)
        
    def loss_func(self, y_, O_):
        y_enc = self.one_hot_encoding(y_)
        error = ( y_enc - O_ )
        return np.square( np.linalg.norm(error) )
        
    def sigmoid(self, x):
        return 1/(1+np.exp(-x))

    def sigmoid_derivative(self, x):
        return x*(1-x)

    def activation(self, data, func='sig'):
        if func == 'sig':
            return self.sigmoid(data)
        return data

    def one_hot_encoding(self, y_):
        '''
            Expects a numpy array
        '''
        enc = np.zeros((y_.shape[0], self.R), dtype=np.int8)
        for i, y in enumerate(y_):
            enc[i, int(y)] = 1
        return enc
    
    
    def feed_forward(self, X_):
        self.outputs = []
        for i in range(self.n_layers):
            if i == 0:
                output = self.activation(np.dot(X_, self.weights[i])+self.biases[i])
            else :
                output = self.activation(np.dot(self.outputs[-1], self.weights[i])+self.biases[i])
            self.outputs.append(output)
        
    def calculate_delta(self, y_):
        self.deltas = [] # Careful delta is reversed
        for i in reversed(range(self.n_layers)):
            if i == self.n_layers-1 : # Output Layer
                y_enc = self.one_hot_encoding(y_)
                error = y_enc - self.outputs[i]
                delta = -1*error*self.sigmoid_derivative(self.outputs[i])
            else:
                error = np.dot( self.deltas[-1], self.weights[i+1].T)
                delta = (error*self.sigmoid_derivative(self.outputs[i]))
            self.deltas.append(delta)

        self.deltas = self.deltas[::-1]
        
    def update_theta(self, X_):
        for i in range(self.n_layers):
            if i == 0 : # First layer
                update = np.dot(X_.T, self.deltas[i])
            else :
                update = np.dot(self.outputs[i-1].T, self.deltas[i])
            self.weights[i] -= 0.1*update/100
            self.biases[i] -= 0.1*np.sum(self.deltas[i], axis=0)/100
    
    def fit(self, X_, y_):
        
        self.N = X_.shape[1]
        self.M = X_.shape[0]
        
        self._intialize_weights(type='glorot-normal')
        
        max_itr = 1000
        b_size = 100
        
        indices = np.random.choice(X_.shape[0], size=X_.shape[0])
        X_ = X_[indices]
        y_ = y_[indices]

        for i in tqdm(range(max_itr)):
            for idx in range(0, self.M, b_size):
                self.feed_forward(X_[idx:idx+b_size,:])
                self.calculate_delta(y_[idx:idx+b_size])
                self.update_theta(X_[idx:idx+b_size,:])
                
    def predict(self, X_):
        self.feed_forward(X_)
        return self.outputs[-1]
    
    def score(self, X_, y_):
        y_pred = np.argmax(self.predict(X_), axis=1)
        print(y_pred)
        return 100*(y_pred == y_).sum()/y_.shape[0]
    
    def _intialize_weights(self, type='glorot-normal'):
        self.weights=[]
        self.biases=[]
        self.layers=[784,50,26]
        
        for i in range(self.n_layers):
            fan_in = self.layers[i]
            fan_out = self.layers[i+1]
            #Glorot/Xevier Normal initialization
            if type=='glorot-normal':
                self.weights.append(np.random.normal(scale=(2/(fan_in+fan_out)), size=(fan_in, fan_out)))
                self.biases.append(np.random.normal(scale=2/(fan_in+fan_out), size=fan_out))
            #He-Normal intilization
            elif type=='he-normal':
                self.weights.append(np.random.normal(scale=np.sqrt(2/fan_in), size=(fan_in, fan_out)))
                self.biases.append(np.random.normal(scale=np.sqrt(2/fan_in), size=fan_out))
            #Glorot/Xevier uniform initialization
            elif type=='glorot-uniform':
                self.weights.append(np.random.uniform(low=-2/(fan_in+fan_out),\
                                                      high=2/(fan_in+fan_out),\
                                                      size=(fan_in, fan_out)))
                self.biases.append(np.random.uniform(low=-2/(fan_in+fan_out),\
                                                         high=2/(fan_in+fan_out),\
                                                         size=fan_out))
            #He-uniform intilization
            elif type=='he-uniform':
                self.weights.append(np.random.uniform(low=-np.sqrt(2/fan_in),\
                                                      high=np.sqrt(2/fan_in),\
                                                      size=(fan_in, fan_out)))
                self.biases.append(np.random.uniform(low=-np.sqrt(2/fan_in),\
                                                         high=np.sqrt(2/fan_in),\
                                                         size=fan_out))
            elif type=='uniform':
                self.weights.append(np.random.uniform(low=-0.025,\
                                                      high=0.025,\
                                                      size=(fan_in, fan_out)))
                self.biases.append(np.random.uniform(low=-0.025,\
                                                         high=0.025,\
                                                         size=fan_out))
    


nn = NeuralNetwork(eta=0.1, hidden_layers=[50])
nn.fit(X_train, y_train)
nn.score(X_test, y_test)

  0%|          | 2/1000 [00:00<01:03, 15.70it/s]

(13000, 784) (13000,) (6500, 784) (6500,)


100%|██████████| 1000/1000 [01:10<00:00, 14.09it/s]

[25 13  6 ...  3  4 12]





86.61538461538461

In [2]:
nn.score(X_train, y_train)

[24  5 24 ...  7 15 25]


91.44615384615385

In [6]:
data_train = np.genfromtxt('data/train.csv', delimiter=',')
data_test = np.genfromtxt('data/test.csv', delimiter=',')

X_train, y_train = data_train[:,:-1]/255, data_train[:,-1]
X_test, y_test = data_test[:,:-1]/255, data_test[:,-1]

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)


class NeuralNetwork:
    def __init__(self,
                 R=26,
                 eta=0.1,
                 hidden_layers=[50]):
#         np.random.seed(25)
        
        self.R = R
        self.eta = eta
        self.n_neurons = hidden_layers + [self.R]
        self.n_layers = len(self.n_neurons)
        
    def loss_func(self, y_, O_):
        y_enc = self.one_hot_encoding(y_)
        error = ( y_enc - O_ )
        return np.square( np.linalg.norm(error) )
        
    def sigmoid(self, x):
        return 1/(1.0+np.exp(-x))

    def sigmoid_derivative(self, x):
        return x*(1.0-x)

    def activation(self, data, func='sig'):
        if func == 'sig':
            return self.sigmoid(data)
        return data

    def one_hot_encoding(self, y_):
        '''
            Expects a numpy array
        '''
        enc = np.zeros((y_.shape[0], self.R))
        for i, y in enumerate(y_):
            enc[i, int(y)] = 1.0
        return enc
    
    
    def feed_forward(self, X_):
        self.outputs = []
        m = X_.shape[0]

        for i in range(self.n_layers):
            if i == 0 :
                output = self.activation( np.dot( X_, self.weights[i]) + self.biases[i] )
           
            else :
                output = self.activation( np.dot( self.outputs[-1], self.weights[i]) + self.biases[i] )
                
            self.outputs.append(output)
#             print(output.shape)

        
    def calculate_delta(self, y_):
        self.deltas = [] # Careful delta is reversed
        for i in reversed(range(self.n_layers)):
            
            if i == self.n_layers-1 : # Output Layer
                y_enc = self.one_hot_encoding(y_)
                error = y_enc - self.outputs[i]
                delta = error*self.sigmoid_derivative(self.outputs[i])

            else:
                error = np.dot( self.deltas[-1], self.weights[i+1].T )
                delta = (error*self.sigmoid_derivative(self.outputs[i]))
                
            self.deltas.append(delta)
#             print(delta.shape)

        self.deltas = self.deltas[::-1]
        
    def update_theta(self, X_):
        for i in range(self.n_layers):
            if i == 0 : # First layer
                update = np.dot(X_.T, self.deltas[i])
            else :
                update = np.dot(self.outputs[i-1].T, self.deltas[i])
            self.weights[i] += self.eta*update/self.deltas[i].shape[0]
            self.biases[i] += self.eta*np.sum(self.deltas[i], axis=0)/self.deltas[i].shape[0]
    
    def fit(self, X_, y_):
        self.N = X_.shape[1]
        self.M = X_.shape[0]
        
        self.weights = []
        self.biases = []
        for neuron in self.n_neurons:
            if self.weights == []:
                self.weights.append( np.random.uniform(-0.1, 0.1,(self.N, neuron)) )
            else:
                self.weights.append( np.random.uniform(-0.1, 0.1,(self.weights[-1].shape[1], neuron)) )
            self.biases.append( np.random.uniform(-0.1, 0.1,(1, neuron)) )
            print(self.weights[-1].shape)
            print(self.biases[-1].shape)
        
        prev_avg = np.inf
        sum_loss = 0
        total_itr = 0
        max_itr = 2000*130
        itr = 0 # For relooping over dataset
        ITERATION_TO_EVAL_AT = 100000000
        tol = 1e-40
        b_size = 100
        
        while True:
            for idx in range(0, self.M, b_size): # Possibly loss of last few training eg if not a multiple
                if total_itr > max_itr:
                    break
                if itr == ITERATION_TO_EVAL_AT:
                    if abs(sum_loss/itr - prev_avg) < tol:
                        break
                    prev_avg = sum_loss/itr
                    itr = 0
                    sum_loss = 0

                self.feed_forward(X_[idx:idx+b_size,:])
                self.calculate_delta(y_[idx:idx+b_size])
                self.update_theta(X_[idx:idx+b_size,:])
                
#                 loss = self.loss_func(y_[idx:idx+b_size], self.outputs[-1])
#                 print(loss)
                sum_loss += 0
                itr += 1
                total_itr += 1

            if total_itr > max_itr or abs(sum_loss/itr - prev_avg) < tol:
                break
            
        
    def predict(self, X_):
        self.feed_forward(X_)
        return self.outputs[-1]
    
    def score(self, X_, y_):
        y_pred = np.argmax(self.predict(X_), axis=1)
        print(y_pred)
        return 100*(y_pred == y_).sum()/y_.shape[0]


nn = NeuralNetwork(eta=0.1, hidden_layers=[50])
from time import time
t0=time()
nn.fit(X_train, y_train)
print(time()-t0)
nn.score(X_test, y_test)

(13000, 784) (13000,) (6500, 784) (6500,)
(784, 50)
(1, 50)
(50, 26)
(1, 26)
136.64449405670166
[25 13  6 ...  3  4 12]


88.70769230769231

In [7]:
nn.score(X_train, y_train)

[24  5 24 ...  7 15 25]


95.58461538461539