In [55]:
import numpy as np
import random
import sys
from scipy.special import expit as sigmoid

# training_data_path = sys.argv[1]
# testing_data_path = sys.argv[2]
# output_path = sys.argv[3]
# batch_size = int(sys.argv[4])
# activation = sys.argv[5]
# hidden_layers_sizes = []
# for i in range(6,len(sys.argv)):
#     hidden_layers_sizes.append(int(sys.argv[i]))
    
training_data_path = "../data/devnagri_train.csv"
testing_data_path = "../data/devnagri_test_public.csv"
output_path = "../data/nn/b/cs1160328.txt"
batch_size = 128
activation = 'relu'
hidden_layers_sizes = [200]

n0 = 0.01

In [2]:
def relu(x):
    return (x>0) * x

def tanh(x):
    return np.tanh(x)

def reluPrime(x):
    return (x>0)+0

def tanhPrime(x):
    return 1 - np.power(x,2)

def sigmoidPrime(x):
    return x * (1 - x)

def exp_normalize(x):
    b = np.amax(x,axis=1,keepdims = True)
    y = np.exp(x - b)
    return y / y.sum(axis=1,keepdims=True)

In [10]:
class NeuralNetwork:
    
    def __init__(self,input_size,output_size,hidden_layers_sizes, activation):
        self.weights = []
        self.biases = []
        
        if(activation == 'relu'):
            self.activation = relu
            self.activationPrime = reluPrime
        elif(activation == 'tanh'):
            self.activation = tanh
            self.activationPrime = tanhPrime
        else:
            self.activation = sigmoid
            self.activationPrime = sigmoidPrime
        
        self.input_size = input_size
        self.output_size = output_size
        self.hiddent_layers_sizes = hidden_layers_sizes
        
        prev_layer_count = input_size
        
        for i in range(len(hidden_layers_sizes) + 1):
            if i==len(hidden_layers_sizes):
                self.weights.append(np.random.rand(prev_layer_count, output_size)/100)
                self.biases.append(np.random.rand(1, output_size)/100)        
            else:
                hidden_layer_count = hidden_layers_sizes[i]
                self.weights.append(np.random.rand(prev_layer_count, hidden_layer_count)/100)
                self.biases.append(np.random.rand(1, hidden_layer_count)/100)
                prev_layer_count = hidden_layer_count
        
    def train(self,inpX,inpY,batch_size,n0,max_iterations):
        max_examples = inpX.shape[0]
        max_possible_iterations = int(0.5 + max_examples / batch_size)
        num_hidden_layers = len(self.weights) - 1
        
        for n in range(max_iterations):
            # Forming Mini Batches
            i_eff = n%max_possible_iterations
            
            outputs = []
            
            if i_eff != max_possible_iterations - 1:
                X = inpX[i_eff*batch_size: (i_eff+1)*batch_size]
                Y = inpY[i_eff*batch_size: (i_eff+1)*batch_size]
            else:
                X = inpX[i_eff*batch_size:]
                Y = inpY[i_eff*batch_size:]
            
            # Updating Learning Rate
            lr = n0 / np.sqrt(n+1) 
                
            # Neural Network Forward Propagation
            outputs.append(X)
            prev_layer_output = X
            for i in range(num_hidden_layers + 1):
                weight = self.weights[i]
                bias = self.biases[i]
                if i == num_hidden_layers:
                    prev_layer_output = exp_normalize(prev_layer_output.dot(weight) + bias)
                else:
                    prev_layer_output = self.activation(prev_layer_output.dot(weight) + bias)
                outputs.append(prev_layer_output)
            
            # Backpropagation
            dWs = []
            dbs = []
            
            for i in range(num_hidden_layers + 1,0,-1):
                if i == num_hidden_layers + 1:
                    delta = outputs[i].copy()
                    delta[range(Y.shape[0]),Y] -= 1
                else:
                    delta = delta.dot(self.weights[i].T) * self.activationPrime(outputs[i])
                dW = (outputs[i-1].T).dot(delta)
                dWs.append(dW)
                dbs.append(np.sum(delta,axis=0,keepdims=True))
                
            if (n%100 == 0):
                probabilities = outputs[-1]
                loss = np.sum(-1*np.log(probabilities[range(Y.shape[0]),Y])) / Y.shape[0]
                labels = np.argmax(outputs[-1],axis = 1)
                accuracy = 100 * np.sum(labels == Y)/Y.shape[0]
                print("Iteration ",n," ,Loss = ",loss," ,Accuracy = ",accuracy,"%")
                
            dWs.reverse()
            dbs.reverse()

            # Gradient Descent Parameter Update
            for i in range(len(dWs)):
                self.weights[i] += dWs[i].dot(-1 * lr)
                self.biases[i] += dbs[i].dot(-1 * lr)

    def predict(self,X):
        return self.forward_run(X)
        
    def forward_run(self,X):
        prev_layer_output = X
        num_hidden_layers = len(self.weights) - 1
        for i in range(num_hidden_layers + 1):
            weight = self.weights[i]
            bias = self.biases[i]
            if i == num_hidden_layers:
                probabilities = exp_normalize(prev_layer_output.dot(weight) + bias)
                labels = np.argmax(probabilities,axis = 1)
                return labels
            else:
                prev_layer_output = self.activation(prev_layer_output.dot(weight) + bias)

In [4]:
def load_data(path,avg,std):
    if avg is None:
        input_data = np.loadtxt(open(path, "rb"), delimiter=",")
        Y = input_data[:,0].copy()
        X = input_data[:,1:].copy()
        avg = np.average(X,axis=0)
        X = X - avg
        std = np.std(X,axis=0)
        std[(std == 0)] = 1
        X = X / std
        return X,Y,avg,std
    else:
        input_data = np.loadtxt(open(path, "rb"), delimiter=",")
        X = input_data[:,1:].copy()
        X = (X - avg)/std
        return X

In [5]:
inpX,Y,avg,std = load_data(training_data_path,None,None)

In [56]:
X = inpX.copy()

input_size = X.shape[1]
output_size = int(np.amax(Y))+1
num_examples = X.shape[0]
max_iterations = int(40*(num_examples/batch_size))

network = NeuralNetwork(input_size,output_size,hidden_layers_sizes,activation)
network.train(X,Y.astype(int),batch_size,n0,max_iterations)

Iteration  0  ,Loss =  3.8317014969139285  ,Accuracy =  0.0 %
Iteration  100  ,Loss =  1.7088075370765061  ,Accuracy =  54.6875 %
Iteration  200  ,Loss =  1.2941586624640817  ,Accuracy =  69.53125 %
Iteration  300  ,Loss =  1.1486513990162552  ,Accuracy =  69.53125 %
Iteration  400  ,Loss =  0.895254466733158  ,Accuracy =  78.125 %
Iteration  500  ,Loss =  0.93088707206551  ,Accuracy =  72.65625 %
Iteration  600  ,Loss =  0.9785850908233098  ,Accuracy =  75.0 %
Iteration  700  ,Loss =  0.7400555974919976  ,Accuracy =  80.46875 %
Iteration  800  ,Loss =  0.5688956905952327  ,Accuracy =  83.59375 %
Iteration  900  ,Loss =  0.6146634781413804  ,Accuracy =  83.59375 %
Iteration  1000  ,Loss =  0.6240115101126972  ,Accuracy =  83.59375 %
Iteration  1100  ,Loss =  0.7217665907078903  ,Accuracy =  79.6875 %
Iteration  1200  ,Loss =  0.706501395694505  ,Accuracy =  79.6875 %
Iteration  1300  ,Loss =  0.5942898130527431  ,Accuracy =  89.84375 %
Iteration  1400  ,Loss =  0.7089700976085631  ,Acc

Iteration  11800  ,Loss =  0.2207025791533423  ,Accuracy =  96.09375 %
Iteration  11900  ,Loss =  0.2261633004560764  ,Accuracy =  94.53125 %
Iteration  12000  ,Loss =  0.19893212156898799  ,Accuracy =  96.09375 %
Iteration  12100  ,Loss =  0.2708385605270903  ,Accuracy =  93.75 %
Iteration  12200  ,Loss =  0.24657388602657018  ,Accuracy =  91.40625 %
Iteration  12300  ,Loss =  0.42185500663790476  ,Accuracy =  91.40625 %
Iteration  12400  ,Loss =  0.25577859669939884  ,Accuracy =  93.75 %
Iteration  12500  ,Loss =  0.1402055396571771  ,Accuracy =  96.875 %
Iteration  12600  ,Loss =  0.19733588668792051  ,Accuracy =  94.53125 %
Iteration  12700  ,Loss =  0.23445521972210562  ,Accuracy =  95.3125 %
Iteration  12800  ,Loss =  0.2659165436912198  ,Accuracy =  93.75 %
Iteration  12900  ,Loss =  0.2987084940127783  ,Accuracy =  92.96875 %
Iteration  13000  ,Loss =  0.22681368739378818  ,Accuracy =  93.75 %
Iteration  13100  ,Loss =  0.24408965320231948  ,Accuracy =  94.53125 %
Iteration  13

Iteration  23400  ,Loss =  0.10727268224142844  ,Accuracy =  98.4375 %
Iteration  23500  ,Loss =  0.20870787841079408  ,Accuracy =  94.53125 %
Iteration  23600  ,Loss =  0.12351537030597429  ,Accuracy =  96.09375 %
Iteration  23700  ,Loss =  0.15918679419087156  ,Accuracy =  96.09375 %
Iteration  23800  ,Loss =  0.22527906181869267  ,Accuracy =  93.75 %
Iteration  23900  ,Loss =  0.140011271398698  ,Accuracy =  96.875 %
Iteration  24000  ,Loss =  0.2156645879463161  ,Accuracy =  95.3125 %
Iteration  24100  ,Loss =  0.09859288654405096  ,Accuracy =  96.875 %
Iteration  24200  ,Loss =  0.15381842359436582  ,Accuracy =  97.65625 %
Iteration  24300  ,Loss =  0.14837208517838263  ,Accuracy =  95.3125 %
Iteration  24400  ,Loss =  0.13930672597916094  ,Accuracy =  96.875 %


In [57]:
predictions = network.predict(X.copy())
print(100 * np.sum(predictions == Y)/Y.shape[0])
# print(np.average(predictions))

96.64066496163683


In [58]:
testX = load_data(testing_data_path,avg,std)

In [59]:
predictions = network.predict(testX)
np.savetxt(output_path,predictions,fmt="%i")