# Handwritten Digit Classifier 
#### Properties: Stochaistic Gradient Descent (SGD), ReLU Activation, Categorical Cross-Entropy Loss

In [387]:
import numpy as np

In [388]:
train_X = np.load('training_images.npy')
train_Y = np.load('training_labels.npy')
test_X = np.load('testing_images.npy')
test_Y = np.load('testing_labels.npy')

In [522]:
class DigitClassifier:

    WI_H1 = np.random.randn(28*28,64).astype(np.float64)
    BI_H1 = np.zeros(64)
    WH1_H2 = np.random.randn(64,64).astype(np.float64)
    BH1_H2 = np.zeros(64)
    WH2_O = np.random.randn(64,10).astype(np.float64)
    BH2_O = np.zeros(10)

    def __init__(self):
        np.random.seed(1000)

    def test(self, data, labels):
        flat_data = data.reshape(len(data),-1)
        flat_data = self.__preprocess(flat_data)

        correctPredictions = 0

        for i in range(len(data)):
            ZI_H1 = np.matmul(flat_data[i], self.WI_H1) + self.BI_H1
            AI_H1 = self.__relu(ZI_H1)
            ZH1_H2 = np.matmul(AI_H1, self.WH1_H2) + self.BH1_H2
            AH1_H2 = self.__relu(ZH1_H2)
            ZH2_O = np.matmul(AH1_H2, self.WH2_O) + self.BH2_O
            SH2_O = self.__softmax(ZH2_O)
            correctPredictions += 1 if np.argmax(SH2_O) == labels[i] else 0

        print(f"accuracy: {correctPredictions/len(data)}")

    def train(self, data, labels, epoch=100, learning_rate=0.1):
        flat_data = data.reshape(len(data),-1)
        flat_data = self.__preprocess(flat_data)
        one_hot_labels = np.zeros((len(labels), 10))
        one_hot_labels[np.arange(len(labels)), labels] = 1

        loss = 0

        for i in range(epoch):
            for j in range(len(flat_data)):
                loss += self.__gradientDescent(flat_data[j], one_hot_labels[j], learning_rate)
            print(f"epoch: {i+1}, loss: {loss}")
            loss = 0

    def __gradientDescent(self, sample, label, learning_rate):
        ZI_H1 = np.matmul(sample, self.WI_H1) + self.BI_H1
        AI_H1 = self.__relu(ZI_H1)
        ZH1_H2 = np.matmul(AI_H1, self.WH1_H2) + self.BH1_H2
        AH1_H2 = self.__relu(ZH1_H2)
        ZH2_O = np.matmul(AH1_H2, self.WH2_O) + self.BH2_O
        SH2_O = self.__softmax(ZH2_O)

        dL_dSH2_O = SH2_O - label
        dL_dWH2_O = np.dot(AH1_H2.reshape(-1, 1), dL_dSH2_O.reshape(1, -1))
        dL_dBH2_O = dL_dSH2_O
        dL_dAH1_H2 = np.dot(dL_dSH2_O, self.WH2_O.T)
        dL_dZH1_H2 = dL_dAH1_H2 * (ZH1_H2 > 0)
        dL_dWH1_H2 = np.dot(AI_H1.reshape(-1, 1), dL_dZH1_H2.reshape(1, -1))
        dL_dBH1_H2 = dL_dZH1_H2
        dL_dAI_H1 = np.dot(dL_dZH1_H2, self.WH1_H2.T)
        dL_dZI_H1 = dL_dAI_H1 * (ZI_H1 > 0)
        dL_dWI_H1 = np.dot(sample.reshape(-1, 1), dL_dZI_H1.reshape(1, -1))
        dL_dBI_H1 = dL_dZI_H1

        self.WI_H1 -= learning_rate * dL_dWI_H1
        self.BI_H1 -= learning_rate * dL_dBI_H1
        self.WH1_H2 -= learning_rate * dL_dWH1_H2
        self.BH1_H2 -= learning_rate * dL_dBH1_H2
        self.WH2_O -= learning_rate * dL_dWH2_O
        self.BH2_O -= learning_rate * dL_dBH2_O

        loss = -np.sum(np.log(SH2_O + 1e-7) * label)
        return loss

    def __relu(self, data):
        return np.maximum(0, data)

    def __softmax(self, data):
        shift_data = data - np.max(data)
        exps = np.exp(shift_data)
        return exps / np.sum(exps, axis=0, keepdims=True)

    def __normalize(self, data):
        return (data - np.min(data)) / (np.max(data) - np.min(data))

    def __preprocess(self, data):
        return (data/255)

classifier = DigitClassifier()
classifier.train(train_X, train_Y, epoch=20, learning_rate=0.006)
classifier.test(test_X, test_Y)

epoch: 1, loss: 73743.62168695552
epoch: 2, loss: 40973.5320430564
epoch: 3, loss: 31397.60438505518
epoch: 4, loss: 26205.660774157328
epoch: 5, loss: 23209.196579430765
epoch: 6, loss: 21206.623315531244
epoch: 7, loss: 19508.72801876251
epoch: 8, loss: 18224.6648673307
epoch: 9, loss: 17286.389622760864
epoch: 10, loss: 16314.538599723845
epoch: 11, loss: 15428.29305991524
epoch: 12, loss: 14780.439227924171
epoch: 13, loss: 14314.744197942493
epoch: 14, loss: 13773.150603276263
epoch: 15, loss: 13168.212265172026
epoch: 16, loss: 12762.814011000206
epoch: 17, loss: 12335.535859362031
epoch: 18, loss: 11950.529073805796
epoch: 19, loss: 11596.133420447099
epoch: 20, loss: 11274.763463558342
accuracy: 0.9238
