In [15]:
import numpy as np
import random

training_data_path = "../data/devnagri_train.csv"
testing_data_path = "../data/devnagri_test_public.csv"
output_path = "../data/out.txt"
output_size = 46
hidden_layers_sizes = [ 50]
activation = 'sigmoid'
input_size = -1
batch_size = 32
n0 = 0.001
max_iterations = 10000

In [2]:
def relu(x):
    return (x>0) * x

def tanh(x):
    return np.tanh(x)

def sigmoid(x):
    return 1/(1+np.exp(-1 * x))

def reluPrime(x):
    return (x>0)+0

def tanhPrime(x):
    return 1 - np.power(x,2)

def sigmoidPrime(x):
    return x * (1 - x)

def exp_normalize(x):
    b = np.amax(x,axis=1,keepdims = True)
    y = np.exp(x - b)
    return y / y.sum(axis=1,keepdims=True)

In [19]:
class NeuralNetwork:
    
    def __init__(self,input_size,output_size,hidden_layers_sizes, activation):
        self.weights = []
        self.biases = []
        
        if(activation == 'relu'):
            self.activation = relu
            self.activationPrime = reluPrime
        elif(activation == 'tanh'):
            self.activation = tanh
            self.activationPrime = tanhPrime
        else:
            self.activation = sigmoid
            self.activationPrime = sigmoidPrime
        
        self.input_size = input_size
        self.output_size = output_size
        self.hiddent_layers_sizes = hidden_layers_sizes
        
        prev_layer_count = input_size
        
        for i in range(len(hidden_layers_sizes) + 1):
            if i==len(hidden_layers_sizes):
                self.weights.append(np.random.rand(prev_layer_count, output_size)/100)
                self.biases.append(np.random.rand(1, output_size)/100)        
            else:
                hidden_layer_count = hidden_layers_sizes[i]
                self.weights.append(np.random.rand(prev_layer_count, hidden_layer_count)/100)
                self.biases.append(np.random.rand(1, hidden_layer_count)/100)
                prev_layer_count = hidden_layer_count
        
    def train(self,inpX,inpY,batch_size,n0,max_iterations):
        max_examples = inpX.shape[0]
        max_possible_iterations = int(0.5 + max_examples / batch_size)
        num_hidden_layers = len(self.weights) - 1
        
        for n in range(max_iterations):
            # Forming Mini Batches
            i_eff = n%max_possible_iterations
            
            outputs = []
            
            if i_eff != max_possible_iterations - 1:
                X = inpX[i_eff*batch_size: (i_eff+1)*batch_size]
                Y = inpY[i_eff*batch_size: (i_eff+1)*batch_size]
            else:
                X = inpX[i_eff*batch_size:]
                Y = inpY[i_eff*batch_size:]
            
            # Updating Learning Rate
            lr = n0 / np.sqrt(n+1) 
                
            # Neural Network Forward Propagation
            outputs.append(X)
            prev_layer_output = X
            for i in range(num_hidden_layers + 1):
                weight = self.weights[i]
                bias = self.biases[i]
                if i == num_hidden_layers:
                    prev_layer_output = sigmoid(prev_layer_output.dot(weight) + bias)
                else:
                    prev_layer_output = self.activation(prev_layer_output.dot(weight) + bias)
                outputs.append(prev_layer_output)
            
            # Backpropagation
            dWs = []
            dbs = []
            
            y_onehot = np.zeros((Y.shape[0],self.output_size))
            y_onehot[range(Y.shape[0]),Y] = 1
            
            for i in range(num_hidden_layers + 1,0,-1):
                if i == num_hidden_layers + 1:
                    delta = (outputs[i] - y_onehot).dot(2/Y.shape[0]) * sigmoidPrime(outputs[i])
                else:
                    delta = delta.dot(self.weights[i].T) * self.activationPrime(outputs[i])
                dW = (outputs[i-1].T).dot(delta)
                dWs.append(dW)
                dbs.append(np.sum(delta,axis=0,keepdims=True))
                
            if (n%100 == 0):
                loss = np.sum(np.power(outputs[-1] - y_onehot,2) )/Y.shape[0]
                labels = np.argmax(outputs[-1],axis = 1)
                accuracy = 100 * np.sum(labels == Y)/Y.shape[0]
                print("Iteration ",n,"\tLoss = ",loss,"\tAccuracy = ",accuracy,"%")
                
            dWs.reverse()
            dbs.reverse()

            # Gradient Descent Parameter Update
            for i in range(len(dWs)):
                self.weights[i] += dWs[i].dot(-1 * lr)
                self.biases[i] += dbs[i].dot(-1 * lr)

    def predict(self,X):
        return self.forward_run(X)
        
    def forward_run(self,X):
        prev_layer_output = X
        num_hidden_layers = len(self.weights) - 1
        for i in range(num_hidden_layers + 1):
            weight = self.weights[i]
            bias = self.biases[i]
            if i == num_hidden_layers:
                probabilities = sigmoid(prev_layer_output.dot(weight) + bias)
                labels = np.argmax(probabilities,axis = 1)
                return labels
            else:
                prev_layer_output = self.activation(prev_layer_output.dot(weight) + bias)

In [4]:
def load_data(path,avg,std):
    if avg is None:
        input_data = np.loadtxt(open(path, "rb"), delimiter=",")
        Y = input_data[:,0].copy()
        X = input_data[:,1:].copy()
        avg = np.average(X,axis=0)
        X = X - avg
        std = np.std(X,axis=0)
        std[(std == 0)] = 1
        X = X / std
        return X,Y,avg,std
    else:
        input_data = np.loadtxt(open(path, "rb"), delimiter=",")
        X = input_data[:,1:].copy()
        X = (X - avg)/std
        return X

In [5]:
inpX,Y,avg,std = load_data(training_data_path,None,None)

In [20]:
X = inpX.copy()
input_size = X.shape[1]
network = NeuralNetwork(input_size,output_size,hidden_layers_sizes,activation)
network.train(X,Y.astype(int),batch_size,n0,max_iterations)

Iteration  0 	Loss =  13.052155417035252 	Accuracy =  0.0 %
Iteration  100 	Loss =  12.291752180886219 	Accuracy =  6.25 %
Iteration  200 	Loss =  11.940904700552725 	Accuracy =  3.125 %
Iteration  300 	Loss =  11.738440489795808 	Accuracy =  0.0 %
Iteration  400 	Loss =  11.52352440508204 	Accuracy =  0.0 %
Iteration  500 	Loss =  11.361664815758777 	Accuracy =  0.0 %
Iteration  600 	Loss =  11.208653478672904 	Accuracy =  3.125 %
Iteration  700 	Loss =  11.06072975065171 	Accuracy =  3.125 %
Iteration  800 	Loss =  10.971640980379405 	Accuracy =  0.0 %
Iteration  900 	Loss =  10.865334320170124 	Accuracy =  3.125 %
Iteration  1000 	Loss =  10.80541397488632 	Accuracy =  0.0 %
Iteration  1100 	Loss =  10.529237415230677 	Accuracy =  0.0 %
Iteration  1200 	Loss =  10.430819930038659 	Accuracy =  0.0 %
Iteration  1300 	Loss =  10.545250147597244 	Accuracy =  0.0 %
Iteration  1400 	Loss =  10.299883073837652 	Accuracy =  0.0 %
Iteration  1500 	Loss =  10.216055690039742 	Accuracy =  0.0 

In [None]:
predictions = network.predict(X.copy())
print(100 * np.sum(predictions == Y)/Y.shape[0])
print(np.average(predictions))

In [None]:
testX = load_data(testing_data_path,avg,std)

In [None]:
predictions = network.predict(testX)
np.savetxt(output_path,predictions,fmt="%i")