In [1]:
import numpy as np
import matplotlib.pyplot
%matplotlib inline
import time

In [2]:
def convert(imgs, labels, outfile, n):
  imgf = open(imgs, "rb")
  labelf = open(labels, "rb")
  csvf = open(outfile, "w")

  imgf.read(16)
  labelf.read(8)
  images = []

  for i in range(n):
    image = [ord(labelf.read(1))]
    for j in range(28 * 28):
      image.append(ord(imgf.read(1)))
    images.append(image)

  for image in images:
    csvf.write(",".join(str(pix) for pix in image) + "\n")
  imgf.close()
  labelf.close()
  csvf.close()

In [3]:
mnist_train_x = "/content/drive/MyDrive/digitRecognizer/train-images.idx3-ubyte"
mnist_train_y = "/content/drive/MyDrive/digitRecognizer/train-labels.idx1-ubyte"
mnist_test_x = "/content/drive/MyDrive/digitRecognizer/t10k-images.idx3-ubyte"
mnist_test_y = "/content/drive/MyDrive/digitRecognizer/t10k-labels.idx1-ubyte"

In [4]:
convert(mnist_train_x, mnist_train_y, "/content/drive/MyDrive/digitRecognizer/train.csv", 60000)
convert(mnist_test_x, mnist_test_y, "/content/drive/MyDrive/digitRecognizer/test.csv", 10000)

In [None]:
train_file = open("/content/drive/MyDrive/digitRecognizer/train.csv", "r")
train_list = train_file.readlines()
train_file.close()
print(len(train_list))

In [None]:
train_list[100]

In [None]:
values = train_list[100].split(",")
image_array = np.asfarray(values[1:]).reshape((28, 28))
matplotlib.pyplot.imshow(image_array, cmap = "Greys", interpolation = "None")

In [None]:
test_file = open("/content/drive/MyDrive/digitRecognizer/test.csv", "r")
test_list = test_file.readlines()
test_file.close()
print(len(test_list))

In [23]:
class DNN:
  def __init__(self, sizes, epochs, lr):
    self.sizes = sizes
    self.epochs = epochs
    self.lr = lr

    input_layer = self.sizes[0]
    hidden_1 = self.sizes[1]
    hidden_2 = self.sizes[2]
    output_layer = self.sizes[3]

    self.params = {
        'W1':np.random.randn(hidden_1, input_layer) * np.sqrt(1./hidden_1),     #128 * 784
        'W2':np.random.randn(hidden_2, hidden_1) * np.sqrt(1./hidden_2),        #64 * 128
        'W3':np.random.randn(output_layer, hidden_2) * np.sqrt(1./output_layer) #10 * 64
    }

  def sigmoid(self, x, derivative = False):
    if derivative:
      return (np.exp(-x)) / ((np.exp(-x) + 1) ** 2)
    return 1 / (1 + np.exp(-x))

  def softmax(self, x, derivative = False):
    exps = np.exp(x - x.max())
    if derivative:
      return exps / np.sum(exps, axis = 0) * (1 - exps / np.sum(exps, axis = 0))
    return exps/np.sum(exps, axis = 0)

  def forward_pass(self, x_train):
    params = self.params

    params['A0'] = x_train #784 * 1
    #input_layer to hidden _1
    params['Z1'] = np.dot(params['W1'], params['A0'])  #128 * 1
    params['A1'] = self.sigmoid(params['Z1'])

    #hidden_1 to hidden_2
    params['Z2'] = np.dot(params['W2'], params['A1'])  #64 * 1
    params['A2'] = self.sigmoid(params['Z2'])

    #hidden_2 to output_layer
    params['Z3'] = np.dot(params['W3'], params['A2'])  #10 * 1
    params['A3'] = self.softmax(params['Z3'])

    return params['A3']

  def backward_pass(self, y_train, output):
    params = self.params

    change_w = {}

    #Calculate W3 update
    error = 2 * (output - y_train) / output.shape[0] * self.softmax(params['Z3'], derivative = True)
    change_w['W3'] = np.outer(error, params['A2'])

    #calculate W2 update
    error = np.dot(params['W3'].T, error) * self.sigmoid(params['Z2'], derivative = True)
    change_w['W2'] = np.outer(error, params['A1'])

    #calculate W1 update
    error = np.dot(params['W2'].T, error) * self.sigmoid(params['Z1'], derivative = True)
    change_w['W1'] = np.outer (error, params['A0'])

    return change_w

  def update_weights(self, change_w):
    for key, val in change_w.items():
      self.params[key] -= self.lr * val # W_t + 1 = W_t - lr*Delta_W_T

  def compute_accuracy(self, test_data, output_nodes):
    predictions = []
    for x in train_list:
      values = x.split(",")
      inputs = (np.asfarray(values[1:]) / 255.0 * 0.99) + 0.01
      targets = np.zeros(output_nodes) + 0.01
      targets[int(values[0])] = 0.99
      output = self.forward_pass(inputs)
      pred = np.argmax(output)
      predictions.append(pred == np.argmax(targets))

    return np.mean(predictions)

  def train(self, train_list, test_list, output_nodes):
    for i in range(self.epochs):
      start_time = time.time()
      for x in train_list:
        values = x.split(",")
        inputs = (np.asfarray(values[1:]) / 255.0 * 0.99) + 0.01
        targets = np.zeros(output_nodes) + 0.01
        targets[int(values[0])] = 0.99
        output = self.forward_pass(inputs)
        change_w = self.backward_pass(targets, output)
        self.update_weights(change_w)
      accuracy = self.compute_accuracy(test_list, output_nodes)
      print("Epoch: {0}, Time Spent: {1:.02f}s, Accuracy: {2:.2f}%".format(i + 1, time.time() - start_time, accuracy*100))


In [24]:
dnn = DNN(sizes = [784, 128, 64, 10], epochs = 10, lr = 0.001)
dnn.train(train_list, test_list, 10)

Epoch: 1, Time Spent: 101.74s, Accuracy: 25.09%
Epoch: 2, Time Spent: 98.62s, Accuracy: 32.14%
Epoch: 3, Time Spent: 101.33s, Accuracy: 35.38%
Epoch: 4, Time Spent: 101.77s, Accuracy: 37.84%
Epoch: 5, Time Spent: 96.09s, Accuracy: 39.88%
Epoch: 6, Time Spent: 98.32s, Accuracy: 41.69%
Epoch: 7, Time Spent: 92.44s, Accuracy: 43.54%
Epoch: 8, Time Spent: 92.00s, Accuracy: 45.82%
Epoch: 9, Time Spent: 95.28s, Accuracy: 48.55%
Epoch: 10, Time Spent: 92.73s, Accuracy: 51.36%
