Author: Ethan Herron 3-4-2020

This is my jupyter notebook for the book Deep Learning from Scratch by Seth Weidman. I will be following along with all of the code, and adding insights or questions I have along the way in these markdown cells.

In [1]:
import numpy as np

import lincoln
from lincoln.layers import Dense
from lincoln.losses import SoftmaxCrossEntropy, MeanSquaredError
from lincoln.optimizers import Optimizer, SGD, SGDMomentum
from lincoln.activations import Sigmoid, Tanh, Linear, ReLU
from lincoln.network import NeuralNetwork
from lincoln.train import Trainer
from lincoln.layers import Conv2D

In [2]:
'''

Credit: https://github.com/hsjeong5

This was pulled from Seth Weidman's github
'''
import numpy as np
from urllib import request
import gzip
import pickle

filename = [
["training_images","train-images-idx3-ubyte.gz"],
["test_images","t10k-images-idx3-ubyte.gz"],
["training_labels","train-labels-idx1-ubyte.gz"],
["test_labels","t10k-labels-idx1-ubyte.gz"]
]


def download_mnist():
    base_url = "http://yann.lecun.com/exdb/mnist/"
    for name in filename:
        print("Downloading "+name[1]+"...")
        request.urlretrieve(base_url+name[1], name[1])
    print("Download complete.")


def save_mnist():
    mnist = {}
    for name in filename[:2]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28)
    for name in filename[-2:]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
    with open("mnist.pkl", 'wb') as f:
        pickle.dump(mnist,f)
    print("Save complete.")


def init():
    download_mnist()
    save_mnist()


def load():
    with open("mnist.pkl",'rb') as f:
        mnist = pickle.load(f)
    return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]

In [3]:
X_train, y_train, X_test, y_test = load()

In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
X_train, X_test = X_train - np.mean(X_train), X_test - np.mean(X_train)
X_train, X_test = X_train / np.std(X_train), X_test / np.std(X_train)

In [6]:
X_train_conv, X_test_conv = X_train.reshape(-1, 1, 28, 28), X_test.reshape(-1, 1, 28, 28)

In [7]:
num_labels = len(y_train)
train_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    train_labels[i][y_train[i]] = 1

num_labels = len(y_test)
test_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    test_labels[i][y_test[i]] = 1

In [8]:
def calc_accuracy_model(model, test_set):
    return print(f'''The model validation accuracy is: {np.equal(np.argmax(model.forward(test_set, inference=True), axis=1), y_test).sum() * 100.0 / test_set.shape[0]:.2f}%''')

# CNN from scratch

In [9]:
model = NeuralNetwork(
    layers=[Conv2D(out_channels=16,
                   param_size=5,
                   dropout=0.8,
                   weight_init="glorot",
                   flatten=True,
                  activation=Tanh()),
            Dense(neurons=10, 
                  activation=Linear())],
            loss = SoftmaxCrossEntropy(), 
seed=20190402)

trainer = Trainer(model, SGDMomentum(lr = 0.1, momentum=0.9))
trainer.fit(X_train_conv, train_labels, X_test_conv, test_labels,
            epochs = 1,
            eval_every = 1,
            seed=20190402,
            batch_size=60,
            conv_testing=True);

batch 0 loss 31.19150189374252
batch 10 loss 14.15039049051834
batch 20 loss 8.507022911132601
batch 30 loss 9.816084583306068
batch 40 loss 2.7069455082764815
batch 50 loss 5.039141447327366
batch 60 loss 3.841297578567434
batch 70 loss 8.477920654330847
batch 80 loss 5.387018093832927
batch 90 loss 2.3019086461589677
batch 100 loss 3.423740645405914
Validation accuracy after 100 batches is 87.18%
batch 110 loss 7.381830774292775
batch 120 loss 5.261394665297685
batch 130 loss 6.315186399919332
batch 140 loss 3.6748038938448295
batch 150 loss 7.088352791457682
batch 160 loss 6.2534749079451935
batch 170 loss 3.1862586434251368
batch 180 loss 4.019203364837613
batch 190 loss 3.2785645561076895
batch 200 loss 4.158550946114209
Validation accuracy after 200 batches is 86.57%
batch 210 loss 4.358141880979221
batch 220 loss 6.623616594144251
batch 230 loss 4.462456032105261
batch 240 loss 1.9196046008614966
batch 250 loss 6.006217122053406
batch 260 loss 7.594060711628687
batch 270 loss 4.

In [10]:
calc_accuracy_model(model, X_test_conv)

The model validation accuracy is: 
    91.68%
