In [17]:
import numpy as np
import random
import sys
from scipy.special import expit as sigmoid

training_data_path = sys.argv[1]
testing_data_path = sys.argv[2]
output_path = sys.argv[3]
batch_size = int(sys.argv[4])
n0 = float(sys.argv[5])
activation = sys.argv[6]
hidden_layers_sizes = []
for i in range(7,len(sys.argv)):
    hidden_layers_sizes.append(int(sys.argv[i]))

# training_data_path = "../data/devnagri_train.csv"
# testing_data_path = "../data/devnagri_test_public.csv"
# output_path = "../data/nn/a/cs1160328.txt"
# batch_size = 512
# n0 = 0.01
# activation = 'sigmoid'
# hidden_layers_sizes = [100]

In [2]:
def relu(x):
    return (x>0) * x

def tanh(x):
    return np.tanh(x)

def reluPrime(x):
    return (x>0)+0

def tanhPrime(x):
    return 1 - np.power(x,2)

def sigmoidPrime(x):
    return x * (1 - x)

def exp_normalize(x):
    b = np.amax(x,axis=1,keepdims = True)
    y = np.exp(x - b)
    return y / y.sum(axis=1,keepdims=True)

In [19]:
class NeuralNetwork:
    
    def __init__(self,input_size,output_size,hidden_layers_sizes, activation):
        self.weights = []
        self.biases = []
        
        if(activation == 'relu'):
            self.activation = relu
            self.activationPrime = reluPrime
        elif(activation == 'tanh'):
            self.activation = tanh
            self.activationPrime = tanhPrime
        else:
            self.activation = sigmoid
            self.activationPrime = sigmoidPrime
        
        self.input_size = input_size
        self.output_size = output_size
        self.hiddent_layers_sizes = hidden_layers_sizes
        
        prev_layer_count = input_size
        
        for i in range(len(hidden_layers_sizes) + 1):
            if i==len(hidden_layers_sizes):
                self.weights.append(np.random.rand(prev_layer_count, output_size)/100)
                self.biases.append(np.random.rand(1, output_size)/100)        
            else:
                hidden_layer_count = hidden_layers_sizes[i]
                self.weights.append(np.random.rand(prev_layer_count, hidden_layer_count)/100)
                self.biases.append(np.random.rand(1, hidden_layer_count)/100)
                prev_layer_count = hidden_layer_count
        
    def train(self,inpX,inpY,batch_size,n0,max_iterations):
        max_examples = inpX.shape[0]
        max_possible_iterations = int(0.5 + max_examples / batch_size)
        num_hidden_layers = len(self.weights) - 1
        
        count = 0
            
        lr = n0
        totLoss = 0
        prevAvgLoss = sys.float_info.max
        epoch = 0
        
        for n in range(max_iterations):
            # Forming Mini Batches
            i_eff = n%max_possible_iterations
            
            # Updating Learning Rate
            if (i_eff == 0 and n!=0):
                avgLoss = totLoss/max_possible_iterations
                
                if(np.absolute(avgLoss - prevAvgLoss) < 0.0001 * prevAvgLoss):
                    stopCount += 1
                    if stopCount > 1:
                        break
                else:
                    stopCount = 0
                if(avgLoss >= prevAvgLoss):
                    count += 1
                    lr = n0 / np.sqrt(count+1)
                print("Epoch = ",epoch," Average Loss = ",avgLoss," New Learning Rate = ",lr)
                epoch += 1
                prevAvgLoss = avgLoss
                totLoss = 0
            
            outputs = []
            
            if i_eff != max_possible_iterations - 1:
                X = inpX[i_eff*batch_size: (i_eff+1)*batch_size]
                Y = inpY[i_eff*batch_size: (i_eff+1)*batch_size]
            else:
                X = inpX[i_eff*batch_size:]
                Y = inpY[i_eff*batch_size:]
                
            # Neural Network Forward Propagation
            outputs.append(X)
            prev_layer_output = X
            for i in range(num_hidden_layers + 1):
                weight = self.weights[i]
                bias = self.biases[i]
                if i == num_hidden_layers:
                    prev_layer_output = sigmoid(prev_layer_output.dot(weight) + bias)
                else:
                    prev_layer_output = self.activation(prev_layer_output.dot(weight) + bias)
                outputs.append(prev_layer_output)
            
            # Backpropagation
            dWs = []
            dbs = []
            
            y_onehot = np.zeros((Y.shape[0],self.output_size))
            y_onehot[range(Y.shape[0]),Y] = 1
            
            for i in range(num_hidden_layers + 1,0,-1):
                if i == num_hidden_layers + 1:
                    delta = (outputs[i] - y_onehot).dot(2/Y.shape[0]) * sigmoidPrime(outputs[i])
                else:
                    delta = delta.dot(self.weights[i].T) * self.activationPrime(outputs[i])
                dW = (outputs[i-1].T).dot(delta)
                dWs.append(dW)
                dbs.append(np.sum(delta,axis=0,keepdims=True))

            if (n%100 == 0):
                loss_ = np.sum(np.power(outputs[-1] - y_onehot,2) )/Y.shape[0]
                labels_ = np.argmax(outputs[-1],axis = 1)
                accuracy_ = 100 * np.sum(labels_ == Y)/Y.shape[0]
                print("Iteration ",n,"\tLoss = ",loss_,"\tAccuracy = ",accuracy_,"%")
                
            dWs.reverse()
            dbs.reverse()

            # Gradient Descent Parameter Update
            for i in range(len(dWs)):
                self.weights[i] += dWs[i].dot(-1 * lr)
                self.biases[i] += dbs[i].dot(-1 * lr)

            loss = np.sum(np.power(outputs[-1] - y_onehot,2) )/Y.shape[0]
            totLoss += loss
                
    def predict(self,X):
        return self.forward_run(X)
        
    def forward_run(self,X):
        prev_layer_output = X
        num_hidden_layers = len(self.weights) - 1
        for i in range(num_hidden_layers + 1):
            weight = self.weights[i]
            bias = self.biases[i]
            if i == num_hidden_layers:
                probabilities = sigmoid(prev_layer_output.dot(weight) + bias)
                labels = np.argmax(probabilities,axis = 1)
                return labels
            else:
                prev_layer_output = self.activation(prev_layer_output.dot(weight) + bias)

In [4]:
def load_data(path,avg,std):
    if avg is None:
        input_data = np.loadtxt(open(path, "rb"), delimiter=",")
        Y = input_data[:,0].copy()
        X = input_data[:,1:].copy()
        avg = np.average(X,axis=0)
        X = X - avg
        std = np.std(X,axis=0)
        std[(std == 0)] = 1
        X = X / std
        return X,Y,avg,std
    else:
        input_data = np.loadtxt(open(path, "rb"), delimiter=",")
        X = input_data[:,1:].copy()
        X = (X - avg)/std
        return X

In [5]:
inpX,Y,avg,std = load_data(training_data_path,None,None)

In [20]:
X = inpX.copy()

input_size = X.shape[1]
output_size = int(np.amax(Y))+1
num_examples = X.shape[0]
max_iterations = int(40*(num_examples/batch_size))
if(max_iterations < 25000):
    max_iterations = 25000
network = NeuralNetwork(input_size,output_size,hidden_layers_sizes,activation)
network.train(X,Y.astype(int),batch_size,n0,max_iterations)

Iteration  0 	Loss =  32.14825318333342 	Accuracy =  2.34375 %
Iteration  100 	Loss =  32.53796718342765 	Accuracy =  1.3671875 %
Epoch =  0  Average Loss =  32.03512367346666  New Learning Rate =  0.01
Iteration  200 	Loss =  30.906488228664266 	Accuracy =  1.5625 %
Iteration  300 	Loss =  30.2690481252686 	Accuracy =  3.125 %
Epoch =  1  Average Loss =  30.96630908246529  New Learning Rate =  0.01
Iteration  400 	Loss =  29.26547757561292 	Accuracy =  1.5625 %
Epoch =  2  Average Loss =  30.000979333473232  New Learning Rate =  0.01
Iteration  500 	Loss =  29.876958827903483 	Accuracy =  1.171875 %
Iteration  600 	Loss =  29.36768140903809 	Accuracy =  1.5625 %
Epoch =  3  Average Loss =  29.157723298814044  New Learning Rate =  0.01
Iteration  700 	Loss =  27.47424872461346 	Accuracy =  1.953125 %
Epoch =  4  Average Loss =  28.439324679264715  New Learning Rate =  0.01
Iteration  800 	Loss =  27.121728266458874 	Accuracy =  1.953125 %
Iteration  900 	Loss =  27.557161457289208 	Acc

Iteration  7100 	Loss =  25.135895192552248 	Accuracy =  3.90625 %
Epoch =  46  Average Loss =  23.857518226825842  New Learning Rate =  0.01
Iteration  7200 	Loss =  22.47239208684573 	Accuracy =  1.5625 %
Iteration  7300 	Loss =  25.300112831627818 	Accuracy =  1.7578125 %
Epoch =  47  Average Loss =  23.837899205602085  New Learning Rate =  0.01
Iteration  7400 	Loss =  24.018893552691456 	Accuracy =  1.3671875 %
Epoch =  48  Average Loss =  23.81880695399596  New Learning Rate =  0.01
Iteration  7500 	Loss =  23.27858026505802 	Accuracy =  3.3203125 %
Iteration  7600 	Loss =  22.436052800270716 	Accuracy =  3.7109375 %
Epoch =  49  Average Loss =  23.800212303005107  New Learning Rate =  0.01
Iteration  7700 	Loss =  23.566575145355547 	Accuracy =  2.34375 %
Iteration  7800 	Loss =  22.723771322000825 	Accuracy =  2.1484375 %
Epoch =  50  Average Loss =  23.782089143711186  New Learning Rate =  0.01
Iteration  7900 	Loss =  23.30911408919691 	Accuracy =  3.90625 %
Epoch =  51  Aver

Iteration  14100 	Loss =  22.318556031526512 	Accuracy =  2.5390625 %
Iteration  14200 	Loss =  24.007385643664218 	Accuracy =  3.90625 %
Epoch =  92  Average Loss =  23.26116292710654  New Learning Rate =  0.01
Iteration  14300 	Loss =  23.425326557741386 	Accuracy =  3.125 %
Epoch =  93  Average Loss =  23.25188607886828  New Learning Rate =  0.01
Iteration  14400 	Loss =  22.44001997848064 	Accuracy =  2.9296875 %
Iteration  14500 	Loss =  24.478083427976475 	Accuracy =  0.9765625 %
Epoch =  94  Average Loss =  23.242695878555626  New Learning Rate =  0.01
Iteration  14600 	Loss =  23.443098244319867 	Accuracy =  2.34375 %
Epoch =  95  Average Loss =  23.23359299915998  New Learning Rate =  0.01
Iteration  14700 	Loss =  22.13950428142427 	Accuracy =  1.953125 %
Iteration  14800 	Loss =  24.182682060565664 	Accuracy =  2.5390625 %
Epoch =  96  Average Loss =  23.224578263207825  New Learning Rate =  0.01
Iteration  14900 	Loss =  21.837084947725906 	Accuracy =  2.5390625 %
Epoch =  

Iteration  21100 	Loss =  23.396424175976424 	Accuracy =  2.34375 %
Epoch =  137  Average Loss =  22.941069800543332  New Learning Rate =  0.01
Iteration  21200 	Loss =  23.25852805530915 	Accuracy =  1.953125 %
Epoch =  138  Average Loss =  22.93567628993503  New Learning Rate =  0.01
Iteration  21300 	Loss =  23.089763997239363 	Accuracy =  1.953125 %
Iteration  21400 	Loss =  24.00337204654811 	Accuracy =  1.953125 %
Epoch =  139  Average Loss =  22.930322869460515  New Learning Rate =  0.01
Iteration  21500 	Loss =  24.28788030075738 	Accuracy =  3.3203125 %
Epoch =  140  Average Loss =  22.925009390907483  New Learning Rate =  0.01
Iteration  21600 	Loss =  22.042893010832515 	Accuracy =  3.3203125 %
Iteration  21700 	Loss =  22.515462899476205 	Accuracy =  2.734375 %
Epoch =  141  Average Loss =  22.91973576556713  New Learning Rate =  0.01
Iteration  21800 	Loss =  22.77315121911805 	Accuracy =  2.9296875 %
Epoch =  142  Average Loss =  22.91450191769277  New Learning Rate =  0.

In [14]:
predictions = network.predict(X.copy())
print("Accuraccy on Training Data = ",100 * np.sum(predictions == Y)/Y.shape[0])
# print("Average of predictions on Training Data = ",np.average(predictions))

Accuraccy on Training Data =  82.48337595907928


In [8]:
testX = load_data(testing_data_path,avg,std)

In [9]:
predictions = network.predict(testX)
np.savetxt(output_path,predictions,fmt="%i")