In [17]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
import sklearn.metrics as metrics

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [18]:
TRAIN_CSV = '/kaggle/input/digit-recognizer/train.csv'
train_df = pd.read_csv(TRAIN_CSV)

train_df.head()

# Preprocessing

In [19]:
train_df.isna().sum()

Hence, there are no none values

In [20]:
countNonInts = 0
for dtype in train_df.dtypes:
    if dtype != int:
        countNonInts += 1

print("No. of non int columns:", countNonInts)

Hence, no preprocessing is required.

In [21]:
features = train_df.drop('label', axis = 1)[:1000]
labels = train_df['label'][:1000]

In [22]:
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size = 0.33, random_state = 42, stratify = labels)

In [23]:
x_train = np.array(x_train)
y_train = np.array(y_train)

In [24]:
from math import exp
from decimal import Decimal

def relu(x):
    return np.maximum(x, 0)

def softmax(x):
#     print(x)
    return np.exp(x) / (np.sum(np.exp(x), axis=0))

def linear(x, m, c):
    return np.array([m * x[i] + c for i in range(len(x))])

def derivative_relu(x):
    return int(x > 0)

def derivative_softmax(x):
    s = softmax(x)
    return [[s[i] * (int(i == j) - s[j]) for j in range(len(x))] for i in range(len(x))]

def derivative_linear(m):
    return m 

In [38]:
from math import log

class NeuralNetworkClassifier:
    BIAS = 1
    ACTIVATION = {'relu': relu, 'softmax': softmax, 'linear': linear}
    DERIVATIVE = {'relu': derivative_relu, 'softmax': derivative_softmax, 
                  'linear': derivative_linear}
    CORRECTION = 1e-6
    
    def __init__(self, layers):
        self.layers = layers
        self.L = len(layers)
        
        self.lin_coeffs = {}
        prev_next = 0
        for l, layer in enumerate(layers):
            prev, next, activation = layer
            
            if l > 0 and prev_next != prev:
                raise Exception("The no. of layers in this layer is inconsistent."
                                + "\nThe last layer mentioned no. of layers in " 
                                + str(l) + "th layer is " + str(prev_next) 
                                + ".\nBut this layer mentions the no. of layers to be " 
                                + str(prev) + ".")
            
            if activation == 'linear':
                self.lin_coeffs[l] = (np.random.rand(), np.random.rand())
                
            prev_next = next            

        self.wts = [np.random.randn(next, prev + 1) for prev, next, activation in layers[:-1]]
        self.wts.append(np.random.randn(layers[-1][1], layers[-1][0] + 1))
        self.wts.insert(0, [])
        
        self.act = [[] for i in range(self.L + 1)]
        self.z = [[] for i in range(self.L + 1)]

    def _forward_propogation(self, x):
        x = np.append(x, self.BIAS)
        self.act[0] = x
        
        for l in range(1, self.L + 1):
            self.z[l] = self.wts[l] @ self.act[l - 1]
            activation = self.layers[l - 1][2]
            
            if activation == 'linear':
                m, c = self.lin_coeffs[l]
                self.act[l] = np.append(self.ACTIVATION[activation](self.z[l], m, c), 1)
            else:
                self.act[l] = np.append(self.ACTIVATION[activation](self.z[l]), 1)  

        return self.act[self.L]
    
    def _backward_propogation(self, y):
        for l in range(1, self.L):
            for i in range(len(self.wts[l])):
                for j in range(len(self.wts[l][0])):
                    if i == y:
                        activation = self.layers[l - 1][2]
                        
                        if activation == 'linear':
                            m = self.lin_coeffs[l][0]
                            derivative = self.DERIVATIVE[activation](m)
                        else:
                            derivative = self.DERIVATIVE[activation](self.z[l][y])
                             
                        # print("ZERO ERROR", float(self.act[l][y]))
                        scalers = self.alpha * self.act[l - 1][j] / float(self.act[l][y] + self.CORRECTION)
                        
                        self.wts[l][i][j] -= scalers * derivative 

    def fit_once(self, X, Y, alpha):
        n_cols = X.shape[1]
        
        if n_cols != self.layers[0][0]:
            raise Exception("The no. of neurons in the first layer should be the same as the no. of columns in X " 
                            + "\nNo. of columns in X " + str(n_cols) 
                            + "\nNo. of neurons in first layer " + str(self.layers[0][0]))  
            
        if max(Y) + 1 != self.layers[-1][1]:
            raise Exception("The no. of neurons in the last layer should be the same as the no. of classes in Y"
                            + "\nNo. of classes in Y " + str(max(Y) + 1)
                            + "\nNo. of neurons in the last layer " + str(self.layers[-1][1]))      
        self.alpha = alpha
        
        for x, y in list(zip(X, Y)):
            self._forward_propogation(x)
            self._backward_propogation(y)
    
    def predict(self, x):
        n_cols = x.shape[1]
        
        if n_cols != self.layers[0][0]:
            raise Exception("The no. of neurons in the first layer should be the same as the no. of columns in X " 
                            + "\nNo. of columns in X " + str(n_cols) 
                            + "\nNo. of neurons in first layer " + str(self.layers[0][0]))  
            
        return [self._forward_propogation(x_vec) for x_vec in x]
    
    def categorical_cross_entropy_loss(self, y, yhat):
        if max(y) + 1 != self.layers[-1][1]:
            raise Exception("The no. of neurons in the last layer should be the same as the no. of classes in Y"
                            + "\nNo. of classes in Y " + str(max(y) + 1)
                            + "\nNo. of neurons in the last layer " + str(self.layers[-1][1])) 
        error = 0
        for i, y_label in enumerate(y):
            error += -log(yhat[i][y_label] + self.CORRECTION)
        return error

In [26]:
max(y_train)

In [27]:
def accuracy(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
#     print(len(y_true), len(y_pred))
    return np.sum(y_true == y_pred) / float(len(y_true))

In [28]:
len(x_train)

In [39]:
NUM_COLUMNS_X = 784
NUM_CLASSES = 10
model = NeuralNetworkClassifier([(NUM_COLUMNS_X, 1000, "relu"), (1000, NUM_CLASSES, "softmax")])

losses = []
x_test = np.array(x_test)
y_test = np.array(y_test)
NUM_ITERS = 200
for _ in range(NUM_ITERS):
    yhat = model.predict(x_train)
    print(y_train.shape, len(yhat))
    loss = model.categorical_cross_entropy_loss(y_train, yhat)
    print("Curr loss:", loss)
#     print(x_test[0].shape)
    test_preds = [np.argmax(preds) for preds in model.predict(x_test)]
    test_accuracy = accuracy(y_test, test_preds)
    print(" Test Accuracy: ", test_accuracy)
    train_preds = [np.argmax(preds) for preds in yhat]
    train_accuracy = accuracy(y_train, train_preds)
    print(" Train Accuracy: ", train_accuracy)
    losses.append(loss)
    print("List of losses:", losses)
    model.fit_once(x_train, y_train, 0.1)
    print("Iter no:", _)