In [15]:
import numpy as np
import sys
from scipy.special import expit as sigmoid
from scipy.fftpack import dctn

training_data_path = sys.argv[1]
testing_data_path = sys.argv[2]
output_path = sys.argv[3]

# training_data_path = "../data/devnagri_train.csv"
# testing_data_path = "../data/devnagri_test_public.csv"
# output_path = "../data/nn/b/cs1160328.txt"

batch_size = 128
n0 = 0.01
activation = 'relu'
hidden_layers_sizes = [200]

In [16]:
def relu(x):
    return (x>0) * x

def tanh(x):
    return np.tanh(x)

def reluPrime(x):
    return (x>0)+0

def tanhPrime(x):
    return 1 - np.power(x,2)

def sigmoidPrime(x):
    return x * (1 - x)

def exp_normalize(x):
    b = np.amax(x,axis=1,keepdims = True)
    y = np.exp(x - b)
    return y / y.sum(axis=1,keepdims=True)

def fft(X):
    for i in range(X.shape[0]):
        image = np.reshape(X[i,:],(32,32))
        fft = np.fft.fft2(image)
        X[i,:] = np.reshape(fft,(1,1024))
    return X

def dct(X):
    for i in range(X.shape[0]):
        image = np.reshape(X[i,:],(32,32))
        dct = dctn(image)
        X[i,:] = np.reshape(dct,(1,1024))
    return X

In [3]:
class NeuralNetwork:
    
    def __init__(self,input_size,output_size,hidden_layers_sizes, activation):
        self.weights = []
        self.biases = []
        
        if(activation == 'relu'):
            self.activation = relu
            self.activationPrime = reluPrime
        elif(activation == 'tanh'):
            self.activation = tanh
            self.activationPrime = tanhPrime
        else:
            self.activation = sigmoid
            self.activationPrime = sigmoidPrime
        
        self.input_size = input_size
        self.output_size = output_size
        self.hiddent_layers_sizes = hidden_layers_sizes
        
        prev_layer_count = input_size
        
        for i in range(len(hidden_layers_sizes) + 1):
            if i==len(hidden_layers_sizes):
                self.weights.append(np.random.rand(prev_layer_count, output_size)/100)
                self.biases.append(np.random.rand(1, output_size)/100)        
            else:
                hidden_layer_count = hidden_layers_sizes[i]
                self.weights.append(np.random.rand(prev_layer_count, hidden_layer_count)/100)
                self.biases.append(np.random.rand(1, hidden_layer_count)/100)
                prev_layer_count = hidden_layer_count
        
    def train(self,inpX,inpY,batch_size,n0,max_iterations):
        max_examples = inpX.shape[0]
        max_possible_iterations = int(0.5 + max_examples / batch_size)
        num_hidden_layers = len(self.weights) - 1
               
        count = 0
            
        lr = n0
        totLoss = 0
        prevAvgLoss = sys.float_info.max
        epoch = 0
        
        for n in range(max_iterations):
            # Forming Mini Batches
            i_eff = n%max_possible_iterations
            
            # Updating Learning Rate
            if (i_eff == 0 and n!=0):
                avgLoss = totLoss/max_possible_iterations
                if(avgLoss >= prevAvgLoss):
                    count += 1
                    lr = n0 / np.sqrt(count+1)
                print("Epoch = ",epoch," Average Loss = ",avgLoss," New Learning Rate = ",lr)
                epoch += 1
                prevAvgLoss = avgLoss
                totLoss = 0
            
            outputs = []
            
            if i_eff != max_possible_iterations - 1:
                X = inpX[i_eff*batch_size: (i_eff+1)*batch_size]
                Y = inpY[i_eff*batch_size: (i_eff+1)*batch_size]
            else:
                X = inpX[i_eff*batch_size:]
                Y = inpY[i_eff*batch_size:]
                
#             # Neural Network Forward Propagation (Cross Entropy)
#             outputs.append(X)
#             prev_layer_output = X
#             for i in range(num_hidden_layers + 1):
#                 weight = self.weights[i]
#                 bias = self.biases[i]
#                 if i == num_hidden_layers:
#                     prev_layer_output = exp_normalize(prev_layer_output.dot(weight) + bias)
#                 else:
#                     prev_layer_output = self.activation(prev_layer_output.dot(weight) + bias)
                    
#                 outputs.append(prev_layer_output)
            
#             # Backpropagation
#             dWs = []
#             dbs = []
            
#             for i in range(num_hidden_layers + 1,0,-1):
#                 if i == num_hidden_layers + 1:
#                     delta = outputs[i].copy()
#                     delta[range(Y.shape[0]),Y] -= 1
#                 else:
#                     delta = delta.dot(self.weights[i].T) * self.activationPrime(outputs[i])
#                 dW = (outputs[i-1].T).dot(delta)
#                 dWs.append(dW)
#                 dbs.append(np.sum(delta,axis=0,keepdims=True))
#             if (n%100 == 0):
#                 loss_ = np.sum(-1*np.log(outputs[-1][range(Y.shape[0]),Y] + 0.001)) / Y.shape[0]
#                 labels_ = np.argmax(outputs[-1],axis = 1)
#                 accuracy_ = 100 * np.sum(labels_ == Y)/Y.shape[0]
#                 print("Iteration ",n,"\tCE Loss = ",loss_,"\tAccuracy = ",accuracy_,"%")
#             dWs.reverse()
#             dbs.reverse()

#             # Gradient Descent Parameter Update
#             for i in range(len(dWs)):
#                 self.weights[i] += dWs[i].dot(-1 * lr)
#                 self.biases[i] += dbs[i].dot(-1 * lr)
            
#             loss = np.sum(-1*np.log(outputs[-1][range(Y.shape[0]),Y] + 0.001)) / Y.shape[0]
#             totLoss += loss

             # Neural Network Forward Propagation (MSE)
            outputs.append(X)
            prev_layer_output = X
            for i in range(num_hidden_layers + 1):
                weight = self.weights[i]
                bias = self.biases[i]
                if i == num_hidden_layers:
                    prev_layer_output = sigmoid(prev_layer_output.dot(weight) + bias)
                else:
                    prev_layer_output = self.activation(prev_layer_output.dot(weight) + bias)
                outputs.append(prev_layer_output)
            
            # Backpropagation
            dWs = []
            dbs = []
            
            y_onehot = np.zeros((Y.shape[0],self.output_size))
            y_onehot[range(Y.shape[0]),Y] = 1
            
            for i in range(num_hidden_layers + 1,0,-1):
                if i == num_hidden_layers + 1:
                    delta = (outputs[i] - y_onehot).dot(2/Y.shape[0]) * sigmoidPrime(outputs[i])
                else:
                    delta = delta.dot(self.weights[i].T) * self.activationPrime(outputs[i])
                dW = (outputs[i-1].T).dot(delta)
                dWs.append(dW)
                dbs.append(np.sum(delta,axis=0,keepdims=True))

            if (n%100 == 0):
                loss = np.sum(np.power(outputs[-1] - y_onehot,2) )/Y.shape[0]
                labels = np.argmax(outputs[-1],axis = 1)
                accuracy = 100 * np.sum(labels == Y)/Y.shape[0]
                print("Iteration ",n,"\tMSE Loss = ",loss,"\tAccuracy = ",accuracy,"%")
                
            dWs.reverse()
            dbs.reverse()

            # Gradient Descent Parameter Update
            for i in range(len(dWs)):
                self.weights[i] += dWs[i].dot(-1 * lr)
                self.biases[i] += dbs[i].dot(-1 * lr)

            loss = np.sum(np.power(outputs[-1] - y_onehot,2) )/Y.shape[0]
            totLoss += loss
            
    def predict(self,X):
        return self.forward_run(X)
        
    def forward_run(self,X):
        prev_layer_output = X
        num_hidden_layers = len(self.weights) - 1
        for i in range(num_hidden_layers + 1):
            weight = self.weights[i]
            bias = self.biases[i]
            if i == num_hidden_layers:
                probabilities = exp_normalize(prev_layer_output.dot(weight) + bias)
                labels = np.argmax(probabilities,axis = 1)
                return labels
            else:
                prev_layer_output = self.activation(prev_layer_output.dot(weight) + bias)

In [4]:
def load_data(path,avg,std):
    if avg is None:
        input_data = np.loadtxt(open(path, "rb"), delimiter=",")
        Y = input_data[:,0].copy()
        X = input_data[:,1:].copy()
        avg = np.average(X,axis=0)
        X = X - avg
        std = np.std(X,axis=0)
        std[(std == 0)] = 1
        X = X / std
        return X,Y,avg,std
    else:
        input_data = np.loadtxt(open(path, "rb"), delimiter=",")
        X = input_data[:,1:].copy()
        X = (X - avg)/std
        return X

In [5]:
inpX,Y,avg,std = load_data(training_data_path,None,None)

In [17]:
X = inpX.copy()
# print(np.sum(X))
# X = dct(X)
# print(np.sum(X))

input_size = X.shape[1]
output_size = int(np.amax(Y))+1
num_examples = X.shape[0]
max_iterations = int(40*(num_examples/batch_size))

network = NeuralNetwork(input_size,output_size,hidden_layers_sizes,activation)
network.train(X,Y.astype(int),batch_size,n0,max_iterations)

3.637978807091713e-12
6.048139766789973e-10
Iteration  0 	MSE Loss =  35.18026208546236 	Accuracy =  3.125 %
Iteration  100 	MSE Loss =  0.9904652300968961 	Accuracy =  9.375 %
Iteration  200 	MSE Loss =  0.9660095532643762 	Accuracy =  9.375 %
Iteration  300 	MSE Loss =  0.9418715750823361 	Accuracy =  15.625 %
Iteration  400 	MSE Loss =  0.8664990720727512 	Accuracy =  21.875 %
Iteration  500 	MSE Loss =  0.8805841604357059 	Accuracy =  22.65625 %
Iteration  600 	MSE Loss =  0.8158299945986209 	Accuracy =  26.5625 %
Epoch =  0  Average Loss =  1.0659298038492888  New Learning Rate =  0.01
Iteration  700 	MSE Loss =  0.8525702436544783 	Accuracy =  25.78125 %
Iteration  800 	MSE Loss =  0.7161159984971204 	Accuracy =  40.625 %
Iteration  900 	MSE Loss =  0.6923964973078903 	Accuracy =  43.75 %
Iteration  1000 	MSE Loss =  0.732105764222156 	Accuracy =  40.625 %
Iteration  1100 	MSE Loss =  0.726699320863179 	Accuracy =  39.0625 %
Iteration  1200 	MSE Loss =  0.6228903019999272 	Accura

Iteration  9900 	MSE Loss =  0.1890706370258891 	Accuracy =  86.71875 %
Iteration  10000 	MSE Loss =  0.2584372831358984 	Accuracy =  83.59375 %
Iteration  10100 	MSE Loss =  0.23770271502321272 	Accuracy =  85.9375 %
Iteration  10200 	MSE Loss =  0.2026681277472574 	Accuracy =  84.375 %
Iteration  10300 	MSE Loss =  0.24610242039931077 	Accuracy =  82.8125 %
Epoch =  16  Average Loss =  0.24409834069770373  New Learning Rate =  0.01
Iteration  10400 	MSE Loss =  0.3351515861048274 	Accuracy =  74.21875 %
Iteration  10500 	MSE Loss =  0.26063606285271307 	Accuracy =  82.8125 %
Iteration  10600 	MSE Loss =  0.28033755750260747 	Accuracy =  78.125 %
Iteration  10700 	MSE Loss =  0.16988231826453215 	Accuracy =  87.5 %
Iteration  10800 	MSE Loss =  0.18984429229854785 	Accuracy =  85.15625 %
Iteration  10900 	MSE Loss =  0.221668712766736 	Accuracy =  85.9375 %
Epoch =  17  Average Loss =  0.2397974360257414  New Learning Rate =  0.01
Iteration  11000 	MSE Loss =  0.222995886550115 	Accur

Iteration  19600 	MSE Loss =  0.21511596244991524 	Accuracy =  82.8125 %
Iteration  19700 	MSE Loss =  0.1850705823420265 	Accuracy =  85.15625 %
Iteration  19800 	MSE Loss =  0.16075510185342468 	Accuracy =  86.71875 %
Iteration  19900 	MSE Loss =  0.23245338617097605 	Accuracy =  80.46875 %
Iteration  20000 	MSE Loss =  0.12335081700937764 	Accuracy =  89.84375 %
Iteration  20100 	MSE Loss =  0.15302669753966253 	Accuracy =  86.71875 %
Epoch =  32  Average Loss =  0.18331790802649245  New Learning Rate =  0.0070710678118654745
Iteration  20200 	MSE Loss =  0.1953764159441327 	Accuracy =  85.9375 %
Iteration  20300 	MSE Loss =  0.14960570817858115 	Accuracy =  89.0625 %
Iteration  20400 	MSE Loss =  0.20390393104304627 	Accuracy =  82.03125 %
Iteration  20500 	MSE Loss =  0.1730751883948521 	Accuracy =  85.9375 %
Iteration  20600 	MSE Loss =  0.2576095819017131 	Accuracy =  78.125 %
Iteration  20700 	MSE Loss =  0.1571849490711074 	Accuracy =  86.71875 %
Epoch =  33  Average Loss =  0

In [18]:
predictions = network.predict(X.copy())
print(100 * np.sum(predictions == Y)/Y.shape[0])
# print(np.average(predictions))

85.54219948849105


In [19]:
testX = load_data(testing_data_path,avg,std)
# testX = dct(testX)

In [20]:
predictions = network.predict(testX)
np.savetxt(output_path,predictions,fmt="%i")