In [13]:
import os
import pathlib
import itertools

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow import keras
from tqdm import tqdm
import sklearn

%load_ext autoreload
%autoreload 2

import main

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
train_path = pathlib.Path('../blobs/notMNIST_large')
test_path = pathlib.Path('../blobs/notMNIST_small')

labels = np.array([item.name for item in train_path.glob('*')])
print(labels)

['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J']


In [None]:
from sklearn.model_selection import train_test_split


x_train_files, y_train = [], []
x_test_files, y_test = [], []
for label, index in tqdm(zip(labels, range(len(labels)))):
    train_dir = train_path / label
    files = pd.Series(train_dir / name for name in os.listdir(train_dir))
    files = files.sample(len(files) // 10)
    x_train_files.extend(files)
    y_train.extend(index for _ in range(len(files)))
    
    test_dir = test_path / label
    test_files = [test_dir / name for name in os.listdir(test_dir)]
    x_test_files.extend(test_files)
    y_test.extend(index for _ in range(len(test_files)))
    
x_train_files, y_train = sklearn.utils.shuffle(np.array(x_train_files), np.array(y_train))
x_test_files, x_val_files, y_test, y_val = train_test_split(x_test_files, y_test, test_size=0.5, shuffle=True)

In [45]:
def load_img(path):
    img = keras.preprocessing.image.load_img(path, color_mode='grayscale')
    return keras.preprocessing.image.img_to_array(img)

x_train = np.array([load_img(p) for p in x_train_files])
x_val = np.array([load_img(p) for p in x_val_files])
x_test = np.array([load_img(p) for p in x_test_files])

In [46]:
x_train = x_train / 255.0
x_val = x_val / 255.0
x_test = x_test / 255.0

In [35]:
# TODO: add regularization?
model = keras.Sequential([
    keras.layers.Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
    keras.layers.Conv2D(filters=8, kernel_size=(4, 4), activation='relu'),
    keras.layers.Flatten(),
    keras.layers.Dense(10, activation='softmax'),
])
model.compile(
    optimizer='sgd', 
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)
print('# of parameters:', np.sum([keras.backend.count_params(w) for w in model.trainable_weights]))

# of parameters: 43166


In [63]:
history = model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [65]:
print('Test prediction:')
score, accuracy = model.evaluate(x_test, y_test)

Test prediction:


Okay, after 30 iterations we've overfitted the test dataset. It used to be around **92.5%**. Not great, but the architecture was a wild guess

In [55]:
pooling_model = keras.Sequential([
    keras.layers.Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(10, activation='softmax'),
])
pooling_model.compile(
    optimizer='sgd', 
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)
print('# of parameters:', np.sum([keras.backend.count_params(w) for w in pooling_model.trainable_weights]))

# of parameters: 10210


In [59]:
history = pooling_model.fit(x_train, y_train, epochs=30)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [60]:
print('Test pooling prediction:')
score, accuracy = pooling_model.evaluate(x_test, y_test)

Test prediction:


Pooling reduced the number of parameters by a factor of 4 while having greater accuracy! **93.4%** is decent
While pooling is a useful construct this might be just because less parameters are easier to tune.

In [62]:
lenet = keras.Sequential([
    keras.layers.Conv2D(filters=6, kernel_size=(5, 5), activation='tanh', input_shape = (28, 28, 1)),
    keras.layers.AveragePooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(filters=16, kernel_size=(5, 5), activation='tanh'),
    keras.layers.AveragePooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(filters=120, kernel_size=(3, 3), activation='tanh'),
    keras.layers.Flatten(),
    keras.layers.Dense(84, activation='tanh'),
    keras.layers.Dense(10, activation='softmax'),
])
lenet.compile(
    optimizer='sgd', 
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)
print('# of parameters:', np.sum([keras.backend.count_params(w) for w in lenet.trainable_weights]))

# of parameters: 61226


In [66]:
history = lenet.fit(x_train, y_train, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [67]:
print('Test LeNet prediction:')
score, accuracy = lenet.evaluate(x_test, y_test)

Test LeNet prediction:


LeNet wins! After training for just 20 iterations we have accuracy above **94%**

So, the showdown:

1. LeNet is the obvious winner. This architecture is canonical for a reason
2. One pooling layer performed better supposedly becaused it regularized the model
3. Plain CNN wasn't bad, but still 3rd

Note that while overall performance is not vastly greater than that of feed-forward NN from assignment 2, we did not optimize the architecture much. Even with LeNet we could'be at least trained for more epochs.

Also, we still used just 1/10th of the dataset.