In [1]:
import tensorflow as tf
import requests
import os
import gzip
import numpy as np
from PIL import Image
from IPython.display import display

In [2]:
try:
    ## For multi-gpu setups, select secondary GPU.
    devices = tf.config.get_visible_devices()
    gpu_1 = list(filter(lambda d: d.name[-5:] == 'GPU:1',devices))[0]
    tf.config.set_visible_devices(gpu_1, 'GPU')
except:
    # handle situations where there is < 2 GPUs.
    pass

In [3]:
# Download MNIST digits dataset.

train_images_url = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
train_labels_url = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
test_images_url = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz'
test_labels_url = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'

urls = [train_images_url, train_labels_url, test_images_url, test_labels_url]

def dl_file(url, destdir='.'):
    if not os.path.exists(destdir):
        os.mkdir(destdir)

    dest_path = os.path.join(destdir,os.path.basename(url))
    if not os.path.exists(dest_path):
        r = requests.get(url)
        with open(dest_path, 'wb') as outfile:
            outfile.write(r.content)

for url in urls:
    dl_file(url, destdir='mnist-data')

In [4]:
# IDX file reader

class IDX_File(object):
    # class constants
    bytecode_type_map_names = {
        0x08: 'unsigned byte',
        0x09: 'signed byte',
        0x0B: 'short (2 bytes)',
        0x0C: 'int (4 bytes)',
        0x0D: 'float (4 bytes)',
        0x0E: 'double (8 bytes)'
    }
    bytecode_type_map = {
        0x08: np.ubyte,
        0x09: np.byte,
        0x0B: np.short,
        0x0C: np.intc,
        0x0D: np.single,
        0x0E: np.double,
    }
    
    def __init__(self, filepath):
        with gzip.open(filepath, 'rb') as infile:
            if int.from_bytes(infile.read(2), 'big') != 0:
                raise RuntimeError("Improperly formatted IDX file. First two bytes should be 0.")

            self.data_type = int.from_bytes(infile.read(1), 'big')
            self.num_dimensions = int.from_bytes(infile.read(1), 'big')
            dimensions = []
            for i in range(self.num_dimensions):
                dimensions.append(int.from_bytes(infile.read(4), 'big'))

            total_len = 1
            for dim_len in dimensions:
                total_len *= dim_len

            itemsize = np.dtype(IDX_File.bytecode_type_map[self.data_type]).itemsize
            self.data = np.frombuffer(infile.read(itemsize*total_len), dtype=IDX_File.bytecode_type_map[self.data_type])
            self.data = self.data.reshape(dimensions)

In [5]:
# Read MNIST digits
train_images_idx_file = IDX_File('mnist-data/train-images-idx3-ubyte.gz')
train_labels_idx_file = IDX_File('mnist-data/train-labels-idx1-ubyte.gz')

test_images_idx_file = IDX_File('mnist-data/t10k-images-idx3-ubyte.gz')
test_labels_idx_file = IDX_File('mnist-data/t10k-labels-idx1-ubyte.gz')

# Unpack mnist digits and make one-hot labels
train_X = train_images_idx_file.data.astype(np.float)
train_X = train_X/256.
train_Y = np.zeros((train_labels_idx_file.data.size, train_labels_idx_file.data.max()+1))
train_Y[np.arange(train_labels_idx_file.data.size), train_labels_idx_file.data] = 1

test_X = test_images_idx_file.data.astype(np.float)
test_X = test_X/256.
test_Y = np.zeros((test_labels_idx_file.data.size, test_labels_idx_file.data.max()+1))
test_Y[np.arange(test_labels_idx_file.data.size), test_labels_idx_file.data] = 1

In [6]:
5./6.

0.8333333333333334

In [7]:
def train_and_test_model(name, model, val_split, train_X, train_Y, test_X, test_Y, epochs=10, callbacks=[]):
    num_train_total = train_X.shape[0]
    num_train = int(num_train_total*val_split)
    
    model.fit(train_X[:num_train], train_Y[:num_train], validation_data=(train_X[num_train:], train_Y[num_train:]), epochs=epochs, callbacks=callbacks)
    error_rate = sum(model.predict(test_X).argmax(axis=1)!=test_Y.argmax(axis=1))/test_X.shape[0]
    
    print("{} Error Rate: {:.2f}%".format(name, error_rate*100))

In [8]:
# Build tensorflow model.
one_layer_simple_model = tf.keras.Sequential([
    tf.keras.layers.Reshape((28*28,), input_shape=(28,28)),
    tf.keras.layers.Dense(1000, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

one_layer_simple_model.summary()
one_layer_simple_model.compile(optimizer='Adam', loss='categorical_crossentropy')

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', restore_best_weights=True, patience=3)
]

train_and_test_model("One Layer Simple Model", one_layer_simple_model, 0.8, train_X, train_Y, test_X, test_Y, epochs=20, callbacks=callbacks)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 1000)              785000    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                10010     
Total params: 795,010
Trainable params: 795,010
Non-trainable params: 0
_________________________________________________________________
Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
One Layer Simple Model Error Rate: 2.25%


In [9]:
# Build tensorflow model.
two_layer_simple_model = tf.keras.Sequential([
    tf.keras.layers.Reshape((28*28,), input_shape=(28,28)),
    tf.keras.layers.Dense(1000, activation='relu'),
    tf.keras.layers.Dense(1000, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

two_layer_simple_model.summary()
two_layer_simple_model.compile(optimizer='Adam', loss='categorical_crossentropy')

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', restore_best_weights=True, patience=3)
]

train_and_test_model("Two Layer Simple Model", two_layer_simple_model, 0.8, train_X, train_Y, test_X, test_Y, epochs=20, callbacks=callbacks)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_1 (Reshape)          (None, 784)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1000)              785000    
_________________________________________________________________
dense_3 (Dense)              (None, 1000)              1001000   
_________________________________________________________________
dense_4 (Dense)              (None, 10)                10010     
Total params: 1,796,010
Trainable params: 1,796,010
Non-trainable params: 0
_________________________________________________________________
Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Two Layer Simple Model Error Rate: 2.42%


In [10]:
one_layer_cnn_model = tf.keras.Sequential([
    tf.keras.layers.Reshape((28,28,1), input_shape=(28,28)),
    tf.keras.layers.Conv2D(32, kernel_size=(3,3),
                           activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation='softmax')
])

one_layer_cnn_model.summary()
one_layer_cnn_model.compile(optimizer='Adam', loss='categorical_crossentropy')

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', restore_best_weights=True, patience=3)
]

train_and_test_model("One Layer CNN Model", one_layer_cnn_model, 0.8, train_X, train_Y, test_X, test_Y, epochs=20, callbacks=callbacks)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_2 (Reshape)          (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
flatten (Flatten)            (None, 21632)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                216330    
Total params: 216,650
Trainable params: 216,650
Non-trainable params: 0
_________________________________________________________________
Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
One Layer CNN Model Error Rate: 2.13%


In [11]:
# Model from https://towardsdatascience.com/a-simple-2d-cnn-for-mnist-digit-recognition-a998dbc1e79a
three_layer_cnn_model = tf.keras.Sequential([
    tf.keras.layers.Reshape((28,28,1), input_shape=(28,28)),
    tf.keras.layers.Conv2D(32, kernel_size=(3,3),
                           activation='relu'),
    tf.keras.layers.Conv2D(64, kernel_size=(3,3),
                           activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(125, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')
])

three_layer_cnn_model.summary()
three_layer_cnn_model.compile(optimizer='Adam', loss='categorical_crossentropy')

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', restore_best_weights=True, patience=3)
]

train_and_test_model("Three Layer CNN Model", three_layer_cnn_model, 0.8, train_X, train_Y, test_X, test_Y, epochs=20, callbacks=callbacks)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_3 (Reshape)          (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 125)              