In [32]:
import numpy as np
import random

training_data_path = "../data/devnagri_train.csv"
testing_data_path = "../data/devnagri_test_public.csv"
output_path = "../data/out.txt"
output_size = 46
hidden_layers_sizes = [ 25]
activation = 'sigmoid'
input_size = -1
batch_size = 32
n0 = 0.01
max_iterations = 100000

In [2]:
def relu(x):
    return (x>0) * x

def tanh(x):
    return np.tanh(x)

def sigmoid(x):
    return 1/(1+np.exp(-1 * x))

def reluPrime(x):
    return (x>0)+0

def tanhPrime(x):
    return 1 - np.power(x,2)

def sigmoidPrime(x):
    return x * (1 - x)

def exp_normalize(x):
    b = np.amax(x,axis=1,keepdims = True)
    y = np.exp(x - b)
    return y / y.sum(axis=1,keepdims=True)

In [10]:
class NeuralNetwork:
    
    def __init__(self,input_size,output_size,hidden_layers_sizes, activation):
        self.weights = []
        self.biases = []
        
        if(activation == 'relu'):
            self.activation = relu
            self.activationPrime = reluPrime
        elif(activation == 'tanh'):
            self.activation = tanh
            self.activationPrime = tanhPrime
        else:
            self.activation = sigmoid
            self.activationPrime = sigmoidPrime
        
        self.input_size = input_size
        self.output_size = output_size
        self.hiddent_layers_sizes = hidden_layers_sizes
        
        prev_layer_count = input_size
        
        for i in range(len(hidden_layers_sizes) + 1):
            if i==len(hidden_layers_sizes):
                self.weights.append(np.random.rand(prev_layer_count, output_size)/100)
                self.biases.append(np.random.rand(1, output_size)/100)        
            else:
                hidden_layer_count = hidden_layers_sizes[i]
                self.weights.append(np.random.rand(prev_layer_count, hidden_layer_count)/100)
                self.biases.append(np.random.rand(1, hidden_layer_count)/100)
                prev_layer_count = hidden_layer_count
        
    def train(self,inpX,inpY,batch_size,n0,max_iterations):
        max_examples = inpX.shape[0]
        max_possible_iterations = int(0.5 + max_examples / batch_size)
        num_hidden_layers = len(self.weights) - 1
        
        for n in range(max_iterations):
            # Forming Mini Batches
            i_eff = n%max_possible_iterations
            
            outputs = []
            
            if i_eff != max_possible_iterations - 1:
                X = inpX[i_eff*batch_size: (i_eff+1)*batch_size]
                Y = inpY[i_eff*batch_size: (i_eff+1)*batch_size]
            else:
                X = inpX[i_eff*batch_size:]
                Y = inpY[i_eff*batch_size:]
            
            # Updating Learning Rate
            lr = n0 / np.sqrt(n+1) 
                
            # Neural Network Forward Propagation
            outputs.append(X)
            prev_layer_output = X
            for i in range(num_hidden_layers + 1):
                weight = self.weights[i]
                bias = self.biases[i]
                if i == num_hidden_layers:
                    prev_layer_output = exp_normalize(prev_layer_output.dot(weight) + bias)
                else:
                    prev_layer_output = self.activation(prev_layer_output.dot(weight) + bias)
                outputs.append(prev_layer_output)
            
            # Backpropagation
            dWs = []
            dbs = []
            
            for i in range(num_hidden_layers + 1,0,-1):
                if i == num_hidden_layers + 1:
                    delta = outputs[i].copy()
                    delta[range(Y.shape[0]),Y] -= 1
                else:
                    delta = delta.dot(self.weights[i].T) * self.activationPrime(outputs[i])
                dW = (outputs[i-1].T).dot(delta)
                dWs.append(dW)
                dbs.append(np.sum(delta,axis=0,keepdims=True))
                
            if (n%100 == 0):
                probabilities = outputs[-1]
                loss = np.sum(-1*np.log(probabilities[range(Y.shape[0]),Y])) / Y.shape[0]
                labels = np.argmax(outputs[-1],axis = 1)
                accuracy = 100 * np.sum(labels == Y)/Y.shape[0]
                print("Iteration ",n," ,Loss = ",loss," ,Accuracy = ",accuracy,"%")
                
            dWs.reverse()
            dbs.reverse()

            # Gradient Descent Parameter Update
            for i in range(len(dWs)):
                self.weights[i] += dWs[i].dot(-1 * lr)
                self.biases[i] += dbs[i].dot(-1 * lr)

    def predict(self,X):
        return self.forward_run(X)
        
    def forward_run(self,X):
        prev_layer_output = X
        num_hidden_layers = len(self.weights) - 1
        for i in range(num_hidden_layers + 1):
            weight = self.weights[i]
            bias = self.biases[i]
            if i == num_hidden_layers:
                probabilities = exp_normalize(prev_layer_output.dot(weight) + bias)
                labels = np.argmax(probabilities,axis = 1)
                return labels
            else:
                prev_layer_output = self.activation(prev_layer_output.dot(weight) + bias)

In [4]:
def load_data(path,avg,std):
    if avg is None:
        input_data = np.loadtxt(open(path, "rb"), delimiter=",")
        Y = input_data[:,0].copy()
        X = input_data[:,1:].copy()
        avg = np.average(X,axis=0)
        X = X - avg
        std = np.std(X,axis=0)
        std[(std == 0)] = 1
        X = X / std
        return X,Y,avg,std
    else:
        input_data = np.loadtxt(open(path, "rb"), delimiter=",")
        X = input_data[:,1:].copy()
        X = (X - avg)/std
        return X

In [5]:
inpX,Y,avg,std = load_data(training_data_path,None,None)

In [33]:
X = inpX.copy()
input_size = X.shape[1]
network = NeuralNetwork(input_size,output_size,hidden_layers_sizes,activation)
network.train(X,Y.astype(int),batch_size,n0,max_iterations)

Iteration  0  ,Loss =  3.8294714914595644  ,Accuracy =  3.125 %
Iteration  100  ,Loss =  3.829067446237522  ,Accuracy =  0.0 %
Iteration  200  ,Loss =  3.8109589705095104  ,Accuracy =  6.25 %
Iteration  300  ,Loss =  3.84049332515187  ,Accuracy =  6.25 %
Iteration  400  ,Loss =  3.8314430764763063  ,Accuracy =  0.0 %
Iteration  500  ,Loss =  3.8115281315985956  ,Accuracy =  3.125 %
Iteration  600  ,Loss =  3.8319203749229853  ,Accuracy =  0.0 %
Iteration  700  ,Loss =  3.7939466368457944  ,Accuracy =  0.0 %
Iteration  800  ,Loss =  3.7960121609716326  ,Accuracy =  3.125 %
Iteration  900  ,Loss =  3.8092776249288693  ,Accuracy =  6.25 %
Iteration  1000  ,Loss =  3.812653351231339  ,Accuracy =  0.0 %
Iteration  1100  ,Loss =  3.8026999431167363  ,Accuracy =  0.0 %
Iteration  1200  ,Loss =  3.824351363726045  ,Accuracy =  0.0 %
Iteration  1300  ,Loss =  3.792066183257571  ,Accuracy =  0.0 %
Iteration  1400  ,Loss =  3.7892575881569086  ,Accuracy =  0.0 %
Iteration  1500  ,Loss =  3.765918

Iteration  12600  ,Loss =  3.146636403159217  ,Accuracy =  28.125 %
Iteration  12700  ,Loss =  3.037052057801586  ,Accuracy =  43.75 %
Iteration  12800  ,Loss =  2.93543501570677  ,Accuracy =  34.375 %
Iteration  12900  ,Loss =  2.924296007787217  ,Accuracy =  37.5 %
Iteration  13000  ,Loss =  3.039575338018289  ,Accuracy =  34.375 %
Iteration  13100  ,Loss =  2.9326143325718026  ,Accuracy =  37.5 %
Iteration  13200  ,Loss =  2.8490071121097644  ,Accuracy =  46.875 %
Iteration  13300  ,Loss =  2.921202440611854  ,Accuracy =  34.375 %
Iteration  13400  ,Loss =  3.0460815681077076  ,Accuracy =  18.75 %
Iteration  13500  ,Loss =  2.820456088104151  ,Accuracy =  31.25 %
Iteration  13600  ,Loss =  3.0964507396970413  ,Accuracy =  21.875 %
Iteration  13700  ,Loss =  2.9903568010043733  ,Accuracy =  21.875 %
Iteration  13800  ,Loss =  2.9786620917561066  ,Accuracy =  37.5 %
Iteration  13900  ,Loss =  2.959975487791561  ,Accuracy =  37.5 %
Iteration  14000  ,Loss =  3.0099530320576795  ,Accura

Iteration  24800  ,Loss =  2.6044620000282785  ,Accuracy =  43.75 %
Iteration  24900  ,Loss =  2.655375661223488  ,Accuracy =  34.375 %
Iteration  25000  ,Loss =  2.6326937343482766  ,Accuracy =  43.75 %
Iteration  25100  ,Loss =  2.8300738522644844  ,Accuracy =  25.0 %
Iteration  25200  ,Loss =  2.3921021754899012  ,Accuracy =  34.375 %
Iteration  25300  ,Loss =  2.594832898000181  ,Accuracy =  43.75 %
Iteration  25400  ,Loss =  2.6729376336445636  ,Accuracy =  34.375 %
Iteration  25500  ,Loss =  2.7051896359019  ,Accuracy =  34.375 %
Iteration  25600  ,Loss =  2.6327223979883048  ,Accuracy =  43.75 %
Iteration  25700  ,Loss =  2.4602479804357618  ,Accuracy =  56.25 %
Iteration  25800  ,Loss =  2.4323281721992585  ,Accuracy =  53.125 %
Iteration  25900  ,Loss =  2.478511514518059  ,Accuracy =  56.25 %
Iteration  26000  ,Loss =  2.7177576470950844  ,Accuracy =  34.375 %
Iteration  26100  ,Loss =  2.468117326832413  ,Accuracy =  56.25 %
Iteration  26200  ,Loss =  2.671469993217869  ,Acc

Iteration  37100  ,Loss =  2.0808785051378473  ,Accuracy =  56.25 %
Iteration  37200  ,Loss =  2.3884178150036663  ,Accuracy =  53.125 %
Iteration  37300  ,Loss =  2.46159203780119  ,Accuracy =  50.0 %
Iteration  37400  ,Loss =  2.2904745770215635  ,Accuracy =  56.25 %
Iteration  37500  ,Loss =  2.3797697333771906  ,Accuracy =  40.625 %
Iteration  37600  ,Loss =  2.393680353716056  ,Accuracy =  43.75 %
Iteration  37700  ,Loss =  2.2838485336194596  ,Accuracy =  50.0 %
Iteration  37800  ,Loss =  2.2342982572868983  ,Accuracy =  46.875 %
Iteration  37900  ,Loss =  2.577964696831901  ,Accuracy =  40.625 %
Iteration  38000  ,Loss =  2.436514161621097  ,Accuracy =  37.5 %
Iteration  38100  ,Loss =  2.2931501420167333  ,Accuracy =  34.375 %
Iteration  38200  ,Loss =  2.3140170385259955  ,Accuracy =  34.375 %
Iteration  38300  ,Loss =  2.448199929838344  ,Accuracy =  31.25 %
Iteration  38400  ,Loss =  2.2205644737157906  ,Accuracy =  50.0 %
Iteration  38500  ,Loss =  2.3627850707691938  ,Accu

Iteration  49400  ,Loss =  2.5561541456516608  ,Accuracy =  37.5 %
Iteration  49500  ,Loss =  2.2812247121828793  ,Accuracy =  46.875 %
Iteration  49600  ,Loss =  2.275167537844287  ,Accuracy =  40.625 %
Iteration  49700  ,Loss =  2.2038027249672822  ,Accuracy =  53.125 %
Iteration  49800  ,Loss =  2.299129814208697  ,Accuracy =  43.75 %
Iteration  49900  ,Loss =  2.1943296776101047  ,Accuracy =  53.125 %
Iteration  50000  ,Loss =  2.238341890915075  ,Accuracy =  46.875 %
Iteration  50100  ,Loss =  2.1555669354965703  ,Accuracy =  46.875 %
Iteration  50200  ,Loss =  2.4860199825094753  ,Accuracy =  37.5 %
Iteration  50300  ,Loss =  2.2360279239073746  ,Accuracy =  50.0 %
Iteration  50400  ,Loss =  2.253161838606832  ,Accuracy =  46.875 %
Iteration  50500  ,Loss =  2.405044884607335  ,Accuracy =  37.5 %
Iteration  50600  ,Loss =  2.0498095603833417  ,Accuracy =  59.375 %
Iteration  50700  ,Loss =  2.203200598419995  ,Accuracy =  56.25 %
Iteration  50800  ,Loss =  2.060482104197549  ,Acc

Iteration  61700  ,Loss =  2.1590790155316966  ,Accuracy =  43.75 %
Iteration  61800  ,Loss =  1.8439661954546704  ,Accuracy =  65.625 %
Iteration  61900  ,Loss =  1.946528019789154  ,Accuracy =  62.5 %
Iteration  62000  ,Loss =  2.3802781194508635  ,Accuracy =  46.875 %
Iteration  62100  ,Loss =  2.2225584021657827  ,Accuracy =  46.875 %
Iteration  62200  ,Loss =  1.9795628107599514  ,Accuracy =  53.125 %
Iteration  62300  ,Loss =  2.1225083262668454  ,Accuracy =  50.0 %
Iteration  62400  ,Loss =  2.026966258692748  ,Accuracy =  53.125 %
Iteration  62500  ,Loss =  2.147007709657678  ,Accuracy =  46.875 %
Iteration  62600  ,Loss =  2.129989848091879  ,Accuracy =  65.625 %
Iteration  62700  ,Loss =  1.8217393140127882  ,Accuracy =  59.375 %
Iteration  62800  ,Loss =  1.936801384346978  ,Accuracy =  46.875 %
Iteration  62900  ,Loss =  2.2152128487254825  ,Accuracy =  43.75 %
Iteration  63000  ,Loss =  2.275069710938336  ,Accuracy =  37.5 %
Iteration  63100  ,Loss =  1.9023047608197974  ,

Iteration  74000  ,Loss =  1.9749697484785542  ,Accuracy =  53.125 %
Iteration  74100  ,Loss =  2.21401766117848  ,Accuracy =  50.0 %
Iteration  74200  ,Loss =  1.8899076305976197  ,Accuracy =  59.375 %
Iteration  74300  ,Loss =  1.7433767979390367  ,Accuracy =  75.0 %
Iteration  74400  ,Loss =  1.9283797046664095  ,Accuracy =  59.375 %
Iteration  74500  ,Loss =  2.013808545316735  ,Accuracy =  59.375 %
Iteration  74600  ,Loss =  1.7333100827646404  ,Accuracy =  68.75 %
Iteration  74700  ,Loss =  2.250147215104179  ,Accuracy =  31.25 %
Iteration  74800  ,Loss =  2.0759731197287534  ,Accuracy =  50.0 %
Iteration  74900  ,Loss =  2.2266637912500182  ,Accuracy =  43.75 %
Iteration  75000  ,Loss =  2.001892925602575  ,Accuracy =  50.0 %
Iteration  75100  ,Loss =  2.063083946235892  ,Accuracy =  53.125 %
Iteration  75200  ,Loss =  1.82117453948903  ,Accuracy =  65.625 %
Iteration  75300  ,Loss =  2.0549461814773142  ,Accuracy =  43.75 %
Iteration  75400  ,Loss =  1.9425834687743762  ,Accura

Iteration  86200  ,Loss =  2.297231832210828  ,Accuracy =  43.75 %
Iteration  86300  ,Loss =  1.6306996635434374  ,Accuracy =  59.375 %
Iteration  86400  ,Loss =  1.8400779458366472  ,Accuracy =  62.5 %
Iteration  86500  ,Loss =  2.078708364205564  ,Accuracy =  37.5 %
Iteration  86600  ,Loss =  1.9889037534586604  ,Accuracy =  59.375 %
Iteration  86700  ,Loss =  1.9150743070690912  ,Accuracy =  56.25 %
Iteration  86800  ,Loss =  1.7458069282923088  ,Accuracy =  59.375 %
Iteration  86900  ,Loss =  1.8136463329493606  ,Accuracy =  59.375 %
Iteration  87000  ,Loss =  1.7318106172782242  ,Accuracy =  68.75 %
Iteration  87100  ,Loss =  2.0714379549960737  ,Accuracy =  56.25 %
Iteration  87200  ,Loss =  1.7799463657878598  ,Accuracy =  65.625 %
Iteration  87300  ,Loss =  1.9242676286313407  ,Accuracy =  43.75 %
Iteration  87400  ,Loss =  1.9870398695507527  ,Accuracy =  56.25 %
Iteration  87500  ,Loss =  1.8088913708699834  ,Accuracy =  62.5 %
Iteration  87600  ,Loss =  1.7394822770074803  ,

Iteration  98500  ,Loss =  1.7153788142907715  ,Accuracy =  68.75 %
Iteration  98600  ,Loss =  1.9260813885982764  ,Accuracy =  56.25 %
Iteration  98700  ,Loss =  1.8466662704604655  ,Accuracy =  65.625 %
Iteration  98800  ,Loss =  1.8144465181054241  ,Accuracy =  53.125 %
Iteration  98900  ,Loss =  1.6510289857585863  ,Accuracy =  62.5 %
Iteration  99000  ,Loss =  2.064759182702223  ,Accuracy =  56.25 %
Iteration  99100  ,Loss =  1.9549442777836665  ,Accuracy =  50.0 %
Iteration  99200  ,Loss =  1.7695924349868142  ,Accuracy =  56.25 %
Iteration  99300  ,Loss =  1.8036197109494991  ,Accuracy =  50.0 %
Iteration  99400  ,Loss =  2.011529458809699  ,Accuracy =  43.75 %
Iteration  99500  ,Loss =  1.7077200031354658  ,Accuracy =  59.375 %
Iteration  99600  ,Loss =  1.8921379085019279  ,Accuracy =  53.125 %
Iteration  99700  ,Loss =  1.7976874317740785  ,Accuracy =  56.25 %
Iteration  99800  ,Loss =  1.7143867033583624  ,Accuracy =  62.5 %
Iteration  99900  ,Loss =  1.8483364818903831  ,Ac

In [34]:
predictions = network.predict(X.copy())
print(100 * np.sum(predictions == Y)/Y.shape[0])

58.28644501278772


In [35]:
testX = load_data(testing_data_path,avg,std)

In [36]:
predictions = network.predict(testX)
np.savetxt(output_path,predictions,fmt="%i")