In [1]:
pip install wget

Collecting wget
  Downloading wget-3.2.zip (10 kB)
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9672 sha256=d33f09c7e28941ee6b43c766efe8ac7753823e89a870ce40e854daf8a7cdb389
  Stored in directory: /root/.cache/pip/wheels/a1/b6/7c/0e63e34eb06634181c63adacca38b79ff8f35c37e3c13e3c02
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


In [2]:
import wget
wget.download('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', 'train-images-idx3-ubyte.gz')
wget.download('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 'train-labels-idx1-ubyte.gz')
wget.download('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', 't10k-images-idx3-ubyte.gz')
wget.download('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', 't10k-labels-idx1-ubyte.gz')

't10k-labels-idx1-ubyte.gz'

In [3]:
import gzip
import numpy as np
from datetime import datetime

def loadImages(file):
  with gzip.open(file, 'r') as f:
    magicNumber = int.from_bytes(f.read(4), 'big')
    imageCount = int.from_bytes(f.read(4), 'big')
    rowCount = int.from_bytes(f.read(4), 'big')
    columnCount = int.from_bytes(f.read(4), 'big')
    imageData = f.read()
    images = np.frombuffer(imageData, dtype=np.uint8).reshape((imageCount, rowCount, columnCount))
    return images

def loadLabels(file):
  with gzip.open(file, 'r') as f:
    magicNumber = int.from_bytes(f.read(4), 'big')
    labelCount = int.from_bytes(f.read(4), 'big')
    labelData = f.read()
    labels = np.frombuffer(labelData, dtype=np.uint8)
    return labels

imagesTrain = loadImages('train-images-idx3-ubyte.gz')
labelsTrain = loadLabels('train-labels-idx1-ubyte.gz')
imagesTest = loadImages('t10k-images-idx3-ubyte.gz')
labelsTest = loadLabels('t10k-labels-idx1-ubyte.gz')

imagesTrain = imagesTrain / 255.0
imagesTrain = imagesTrain.reshape(imagesTrain.shape[0], 28 * 28)
imagesTest = imagesTest / 255.0
imagesTest = imagesTest.reshape(imagesTest.shape[0], 28 * 28)
labelsTrain = np.eye(10)[labelsTrain]
labelsTest = np.eye(10)[labelsTest]

In [4]:
def relu(x):
  return np.maximum(x, 0)

def softmax(x):
  exp = np.exp(x)
  return exp / np.sum(exp, axis = 1, keepdims = True)

def crossEntropyLoss(x1, x2):
  return np.mean(-np.sum(x1 * np.log(x2), axis=1))

def accuracy(x1, x2):
  return np.mean(np.argmax(x1, axis=1) == np.argmax(x2, axis=1))

def derivativeRelu(x):
  return np.where(x > 0.0, 1, 0)

In [31]:
class Network(object):
  def __init__(self, inputLayer, hiddenLayer, outputLayer):
    self.inputLayer = inputLayer
    self.hiddenLayer = hiddenLayer
    self.outputLayer = outputLayer
    self.w = [np.random.normal(0, np.sqrt(2 / inputLayer), [inputLayer, hiddenLayer]),
              np.random.normal(0, np.sqrt(2 / (inputLayer + outputLayer)), [hiddenLayer, outputLayer])]
    self.b = [np.zeros((1, hiddenLayer)),
              np.zeros((1, outputLayer))]

  def forward(self, x):
    self.z1 = np.dot(x, self.w[0]) + self.b[0]
    self.a1 = relu(self.z1)
    self.z2 = np.dot(self.a1, self.w[1]) + self.b[1]
    self.a2 = softmax(self.z2)

  def backward(self, xTrain, yTrain, learningRate):
    dz2 = (self.a2 - yTrain) / self.a2.shape[0]
    dw1 = (self.a1.T).dot(dz2)
    db1 = np.sum(dz2, axis=0, keepdims=True)
    dz1 = dz2.dot(self.w[1].T) * derivativeRelu(self.z1)
    dw0 = np.dot(xTrain.T, dz1)
    db0 = np.sum(dz1, axis=0, keepdims=True)

    self.w[1] -= learningRate * dw1
    self.b[1] -= learningRate * db1
    self.w[0] -= learningRate * dw0
    self.b[0] -= learningRate * db0

  def train(self, xTrain, yTrain, epochs, learningRate, batchSize):
    startTrain = datetime.now()
    for epoch in range(epochs):
      startEpoch = datetime.now()
      iteration = 0
      while iteration < len(xTrain):
        xBatch = xTrain[iteration:iteration + batchSize]
        yBatch = yTrain[iteration:iteration + batchSize]

        self.forward(xBatch)
        self.backward(xBatch, yBatch, learningRate)
        iteration += batchSize

      timeEpoch = (datetime.now() - startEpoch).total_seconds()
      self.forward(xTrain)
      crossEntropyValue = crossEntropyLoss(yTrain, self.a2)
      accuracyValue = accuracy(yTrain, self.a2)

      print('Epoch', epoch)
      print('Time epoch', timeEpoch)
      print('Loss', crossEntropyValue)
      print('Accuracy', accuracyValue)
    timeTrain = (datetime.now() - startTrain).total_seconds()
    print('Total time', timeTrain)

  def test(self, xTest, yTest):
    self.forward(xTest)
    crossEntropyValue = crossEntropyLoss(yTest, self.a2)
    accuracyValue = accuracy(yTest, self.a2)

    print('Loss', crossEntropyValue)
    print('Accuracy', accuracyValue)

In [32]:
network = Network(28*28, 300, 10)

In [33]:
print('Train')
network.train(imagesTrain, labelsTrain, 20, 0.1, 16)

Train
Epoch 0
Time epoch 8.60184
Loss 0.131042807118646
Accuracy 0.95955
Epoch 1
Time epoch 8.545444
Loss 0.07899982233623773
Accuracy 0.97625
Epoch 2
Time epoch 8.55831
Loss 0.05697253458055288
Accuracy 0.9823833333333334
Epoch 3
Time epoch 8.509837
Loss 0.042378011409795024
Accuracy 0.9869333333333333
Epoch 4
Time epoch 8.503092
Loss 0.03460268380061981
Accuracy 0.9892333333333333
Epoch 5
Time epoch 8.563248
Loss 0.028139366844658495
Accuracy 0.99125
Epoch 6
Time epoch 8.471364
Loss 0.022343426730257306
Accuracy 0.9933333333333333
Epoch 7
Time epoch 8.483773
Loss 0.01935850740837974
Accuracy 0.9942166666666666
Epoch 8
Time epoch 8.564465
Loss 0.01725801182320215
Accuracy 0.9947833333333334
Epoch 9
Time epoch 8.456696
Loss 0.01425302552149714
Accuracy 0.9959833333333333
Epoch 10
Time epoch 8.518659
Loss 0.013590778150206512
Accuracy 0.99625
Epoch 11
Time epoch 8.588828
Loss 0.011272207960964837
Accuracy 0.9969166666666667
Epoch 12
Time epoch 8.456255
Loss 0.008264992279001128
Accuracy

In [35]:
print('Test')
network.test(imagesTest, labelsTest)

Test
Loss 0.06990405762963263
Accuracy 0.9821
