#Neural Networks Classifier - Assignment 3

> IMT2019092





# Imports: No other imports allowed

In [1]:
import numpy as np #No using automatic differentiation allowed from here!
import pandas as pd 
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
import sklearn.metrics as metrics

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Neural Network Class

In [3]:
class NeuralNetworkClassifier: 
    def __init__(self, layers): 
        self.layers = layers
        self.weights = []
        self.bias = []
        self.z_list = []
        self.A_list= []
        
        for i in range(len(layers)):
            self.weights.append(np.random.rand(layers[i][1],layers[i][0])-0.5)
            self.bias.append(np.random.rand(layers[i][1],1)-0.5)

    def predict(self, X): 
        self.forward_propogation(X)
        return self.A_list[-1]

    def fit_once(self, X, y, alpha): 
        self.forward_propogation(X)
        dw, db = self.backward_propogation(X.shape[1], self.A_list[-1], y, X)
        for i in range(len(self.weights)):
            self.weights[i] -= alpha * dw[i]
            self.bias[i] -= alpha * db[i]
        result = self.A_list[-1]       
        self.z_list = []
        self.A_list = []  
        return result         

    def categorical_cross_entropy_loss(self, y, yhat):
        epsilon = 1e-7
        yhat = np.clip(yhat, epsilon, 1.0 - epsilon)
        N = yhat.shape[0]
        ce = -np.sum(y*np.log(yhat))/N 
        return ce        
    
    def linear_prop(self, A, w, b):
        z = w.dot(A) + b
        return z
  
    def activation(self, A, w, b, activation):
        z = self.linear_prop(A,w,b)
        self.z_list.append(z)

        if activation == 'linear':
            return z;
        elif activation == 'softmax':
            return self.softmax(z)
        elif activation == 'relu':
            return self.relu(z)

    def forward_propogation(self, X):
        self.A_list.append(X)
        self.z_list.append(X)

        for i in range(len(self.weights)):
            A_p = X
            X = self.activation(A_p, self.weights[i], self.bias[i], self.layers[i][2])
            self.A_list.append(X)
    
    def backward_propogation(self, m, X, y, A):
        dw = []
        db = []
        L = len(self.weights)
        dz = self.A_list[-1] - self.one_hot_encode(y).T
        for i in range(L,0,-1):
          dw.append(1/m * dz.dot(self.A_list[i-1].T))
          db.append(1/m * np.sum(dz))
          if(self.layers[i-1][2]=='relu'):
            dz = np.multiply(self.weights[i-1].T.dot(dz),self.relu_prime(self.z_list[i-1]))
          else:
            dz = self.weights[i-1].T.dot(dz)
        dw.reverse()
        db.reverse()
        return dw,db

    def softmax(self, X):
        return np.exp(X) / sum(np.exp(X))
    
    def relu(self, X):
        return np.maximum(X, 0)
    
    def relu_prime(self, X):
        return np.where(X>0, 1, 0)

    def one_hot_encode(self, Y):
        T = np.zeros((len(Y),len(set(Y))))
        T[np.arange(Y.size), Y] = 1
        return T   
    
    def get_predictions(self,Y):
        return np.argmax(Y, 0)

    def get_accuracy(self,predictions, Y):
        print(predictions, Y)
        return np.sum(predictions == Y) / Y.size

# Main Code

## Getting data

In [8]:
df = pd.read_csv('/content/drive/MyDrive/train.csv')

In [9]:
m, n = df.shape
print(m,n)

42000 785


In [10]:
df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,pixel11,pixel12,pixel13,pixel14,pixel15,pixel16,pixel17,pixel18,pixel19,pixel20,pixel21,pixel22,pixel23,pixel24,pixel25,pixel26,pixel27,pixel28,pixel29,pixel30,pixel31,pixel32,pixel33,pixel34,pixel35,pixel36,pixel37,pixel38,...,pixel744,pixel745,pixel746,pixel747,pixel748,pixel749,pixel750,pixel751,pixel752,pixel753,pixel754,pixel755,pixel756,pixel757,pixel758,pixel759,pixel760,pixel761,pixel762,pixel763,pixel764,pixel765,pixel766,pixel767,pixel768,pixel769,pixel770,pixel771,pixel772,pixel773,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [11]:
data = np.array(df)
data_val = data[0:1000].T
Y_val = data_val[0]
X_val = data_val[1:n]
X_val = X_val / 255.

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape

## Train and Validation

In [None]:
classifier = NeuralNetworkClassifier([(784,256,"relu"),(256,128,"relu"),(128,10,"softmax")])
# classifier = NeuralNetworkClassifier([(784,256,"relu"),(256,10,"softmax")])
for i in range(1000):
    result = classifier.fit_once(X_train,Y_train,0.1)
    if i%20==0:
        predictions = classifier.get_predictions(result)
        print(classifier.get_accuracy(predictions,Y_train))

[2 2 2 ... 6 3 3] [1 5 1 ... 7 6 9]
0.06953658536585366
[1 8 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.48153658536585364
[1 0 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.7304634146341463
[1 0 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.822560975609756
[1 0 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.8475853658536585
[1 0 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.8596585365853658
[1 0 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.873780487804878
[1 5 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.8822926829268293
[1 5 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.8885121951219512
[1 5 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.8954634146341464
[1 5 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.9004146341463415
[1 5 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.9057560975609756
[1 5 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.909219512195122
[1 5 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.9119756097560976
[1 5 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.9150731707317074
[1 5 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.9179024390243903
[1 5 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.9201219512195122
[1 5 1 ... 7 6 9] [1 5 1 ... 7 6 9]
0.9221951219512196
[1 5 1 ... 

In [None]:
result = classifier.predict(X_val)
predictions = classifier.get_predictions(result)
print(classifier.get_accuracy(predictions,Y_val))

[1 0 1 4 0 0 7 3 5 3 8 9 1 3 3 1 2 0 7 6 8 0 2 0 2 3 6 9 9 7 8 9 4 9 2 1 3
 1 1 4 9 1 4 9 2 6 3 7 7 4 7 5 1 9 0 2 2 3 9 1 6 1 5 0 6 3 4 8 1 0 3 9 6 3
 6 4 7 1 4 1 5 4 8 9 2 9 9 8 9 4 3 6 4 6 7 9 1 2 0 5 9 2 7 7 2 8 8 5 0 6 0
 0 2 9 0 4 7 7 1 5 7 9 4 6 1 6 7 6 5 0 4 7 7 6 1 7 7 3 7 2 1 0 3 4 5 4 0 5
 4 0 3 5 1 0 8 3 7 0 9 6 6 9 5 9 8 9 3 5 4 2 4 8 7 7 5 8 8 8 3 6 9 3 1 0 4
 1 5 9 0 6 2 1 7 0 6 0 0 8 3 2 0 0 6 0 0 4 7 2 7 1 9 9 3 9 8 4 6 6 1 3 8 1
 8 7 1 3 7 6 3 6 3 6 3 2 3 2 5 7 9 2 3 2 7 5 5 8 8 2 0 1 4 0 6 3 7 1 1 1 4
 7 0 2 9 2 0 5 6 0 8 9 6 2 0 0 7 2 0 4 2 0 9 1 6 9 3 0 0 2 0 4 8 4 0 7 2 1
 9 5 2 4 8 5 2 9 7 9 2 9 7 4 9 3 2 7 3 6 3 6 8 8 3 7 0 9 2 7 9 0 5 4 5 8 4
 3 3 1 7 8 9 7 6 2 1 7 0 5 6 5 2 9 5 4 6 2 2 2 5 0 7 7 2 2 6 3 4 2 0 5 9 6
 2 1 9 0 6 0 4 6 4 3 1 5 4 2 9 5 7 3 1 5 4 5 3 7 3 8 6 2 4 6 1 1 4 0 0 5 8
 6 7 4 2 8 0 2 5 9 8 3 0 6 4 8 6 4 1 8 1 5 4 9 4 3 2 0 6 0 7 9 2 9 8 9 6 5
 2 4 4 6 4 8 4 1 7 5 8 9 5 9 3 2 3 8 2 2 7 2 8 9 1 9 3 6 0 2 2 9 1 2 7 2 1
 3 4 9 1 8 0 2 2 3 4 1 3 

##Test and Kaggle Submission

In [None]:
test_df = pd.read_csv('/content/drive/MyDrive/test.csv')
test_df = np.array(test_df)
test_df = test_df.T
test_df = test_df / 255.

In [None]:
test_df.shape

(784, 28000)

In [None]:
result_test= classifier.predict(test_df)
pred = classifier.get_predictions(result_test)

In [None]:
pred.shape

(28000,)

In [None]:
pred.reshape(-1,1)
pred = np.array(pred.T)
print(pred)

[2 0 9 ... 3 9 2]


In [None]:
output = pd.DataFrame(columns = ['ImageId','Label'])
output['ImageId'] = np.arange(1,28001)
output['Label'] = pred
output.head()
output.to_csv('Output.csv', index=None)

**Validation Score : 0.946**

**Kaggle Score : 0.94246**

Observing Categorical Cross Entropy Losses 

In [14]:
classifier = NeuralNetworkClassifier([(784,256,"relu"),(256,128,"relu"),(128,10,"softmax")])
losses = [] 
NUM_ITERS = 10
for i in range(NUM_ITERS):
    y_train_hat = classifier.predict(X_train)
    loss = classifier.categorical_cross_entropy_loss(Y_train, y_train_hat)
    losses.append(loss)
    classifier.fit_once(X_train, Y_train, 0.1)

In [15]:
losses

[2316260.2484814143,
 2304041.1189536913,
 1862229.9397029174,
 520160.21420261636,
 540319.5258545919,
 573746.7254157942,
 617943.4391349718,
 664502.4907901841,
 709350.6664983971,
 759014.8233179951]

We can clearly see the loss is gradually decreasing after each iteration.