In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
train = pd.read_csv('train.csv')
train, validation = train_test_split(
    train, test_size = 1 / 7,              
    stratify = train['label'],
    random_state = 42
)
y_train = train['label'].to_numpy()                  
X_train = train.drop(columns = ['label']).to_numpy() 
X_val = validation.drop(columns = ['label']).to_numpy() 
y_val = validation['label'].to_numpy()

print(f'train : X = {X_train.shape} ; y = {y_train.shape}')
print(f'val   : X = {X_val.shape} ; y = {y_val.shape}')

train : X = (36000, 784) ; y = (36000,)
val   : X = (6000, 784) ; y = (6000,)


In [3]:
class ModelForMNISTdigits:
    
    def __init__(self, learning_rate, iterations, num_of_layers, num_of_neurons, input_size):
        if num_of_layers < 2:
            raise ValueError("Number of layers (nol) must be at least 2 (hidden + output layer).")
        self.nol = num_of_layers
        self.non = num_of_neurons
        self.w, self.b = [np.random.randn(self.non[0], input_size)], [np.random.randn(self.non[0], 1)]
        self.z, self.a = [None], [None]
        for i in range(1, self.nol):
            self.w.append(np.random.randn(self.non[i], self.non[i-1])*0.1)
            self.b.append(np.random.randn(self.non[i], 1))
            self.z.append(None)
            self.a.append(None)
        self.alpha = learning_rate
        self.iterations = iterations
        
    def relu(self, z):
        return np.maximum(0, z)
        
    def relu_dash(self, z):
        return (z > 0).astype(float)
        
    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis = 0, keepdims = True))
        return exp_z / np.sum(exp_z, axis = 0, keepdims = True)
    
    def forward_prop(self, X):
        self.z[0] = self.w[0].dot(X.T) + self.b[0]
        self.a[0] = self.relu(self.z[0])
        for i in range(1, self.nol - 1):
            self.z[i] = self.w[i].dot(self.a[i-1]) + self.b[i]
            self.a[i] = self.relu(self.z[i])
        self.z[self.nol - 1] = self.w[self.nol - 1].dot(self.a[self.nol - 2]) + self.b[self.nol - 1]
        self.a[self.nol - 1] = self.softmax(self.z[self.nol - 1])

    def one_hot(self, y):
        one_hot_y = np.zeros((y.size, 10))
        one_hot_y[np.arange(y.size), y] = 1
        return one_hot_y.T

    def back_prop(self, X, y):
        m = y.size
        onehot_y = self.one_hot(y)
        dw, db = [None]*self.nol, [None]*self.nol
        dz = self.a[-1] - onehot_y
        dw[-1] = (1/m)*dz.dot(self.a[-2].T)
        db[-1] = (1/m)*np.sum(dz, axis = 1, keepdims = True)
        for i in reversed(range(1, self.nol - 1)):
            dz = self.w[i+1].T.dot(dz)*self.relu_dash(self.z[i])
            dw[i] = (1/m)*dz.dot(self.a[i-1].T)
            db[i] = (1/m)*np.sum(dz, axis = 1, keepdims = True)
        dz = self.w[1].T.dot(dz)*self.relu_dash(self.z[0])
        dw[0] = (1/m)*dz.dot(X)
        db[0] = (1/m)*np.sum(dz, axis = 1, keepdims = True)
        for i in range(self.nol):
            self.w[i] -= self.alpha*dw[i]
            self.b[i] -= self.alpha*db[i]

    def predict(self, X, y):
        self.forward_prop(X)
        predictions = np.argmax(self.a[-1], 0)
        accuracy = np.sum(predictions == y) / y.size
        print(f'Accuracy : {accuracy}')
        
    def accuracy(self, y, predictions):
        return np.sum(predictions == y) / y.size

    def train(self, X, y):
        for i in range(self.iterations):
            self.forward_prop(X)     
            self.back_prop(X, y)
            if i%20 == 0:
                accuracy = self.accuracy(y, np.argmax(self.a[-1], 0))
                print(f'{i} iterations done:')
                print(f'    Accuracy: {accuracy}')
        accuracy = self.accuracy(y, np.argmax(self.a[-1], 0))
        print(f'{self.iterations} iterations done:')
        print(f'    Accuracy: {accuracy}')

In [4]:
model = ModelForMNISTdigits(learning_rate = 0.05, iterations = 3000, num_of_layers = 3, num_of_neurons = [128, 64, 10], input_size = 784)
model.train(X_train / 255, y_train)

0 iterations done:
    Accuracy: 0.13897222222222222
20 iterations done:
    Accuracy: 0.6515833333333333
40 iterations done:
    Accuracy: 0.7496666666666667
60 iterations done:
    Accuracy: 0.7799722222222222
80 iterations done:
    Accuracy: 0.8094166666666667
100 iterations done:
    Accuracy: 0.82725
120 iterations done:
    Accuracy: 0.8398888888888889
140 iterations done:
    Accuracy: 0.8496666666666667
160 iterations done:
    Accuracy: 0.8570277777777778
180 iterations done:
    Accuracy: 0.864
200 iterations done:
    Accuracy: 0.86925
220 iterations done:
    Accuracy: 0.8741666666666666
240 iterations done:
    Accuracy: 0.8784722222222222
260 iterations done:
    Accuracy: 0.8828055555555555
280 iterations done:
    Accuracy: 0.8853611111111112
300 iterations done:
    Accuracy: 0.8875277777777778
320 iterations done:
    Accuracy: 0.8899444444444444
340 iterations done:
    Accuracy: 0.8925
360 iterations done:
    Accuracy: 0.8942777777777777
380 iterations done:
    A

In [7]:
model.predict(X_val, y_val)

Accuracy : 0.923
