In [6]:
%%time
import mnist
import numpy as np
from conv import Conv3x3
from maxpool import MaxPool2
from softmax import Softmax

# We only use the first 1k examples of each set in the interest of time.
# Feel free to change this if you want.
train_images = mnist.train_images()[:500]
train_labels = mnist.train_labels()[:500]
test_images = mnist.test_images()[:100]
test_labels = mnist.test_labels()[:100]

conv = Conv3x3(8)                  # 28x28x1 -> 26x26x8
pool = MaxPool2()                  # 26x26x8 -> 13x13x8
softmax = Softmax(13 * 13 * 8, 10) # 13x13x8 -> 10

def forward(image, label):
  '''
  Completes a forward pass of the CNN and calculates the accuracy and
  cross-entropy loss.
  - image is a 2d numpy array
  - label is a digit
  '''
  # We transform the image from [0, 255] to [-0.5, 0.5] to make it easier
  # to work with. This is standard practice.
  out = conv.forward((image / 255) - 0.5)
  out = pool.forward(out)
  out = softmax.forward(out)

  # Calculate cross-entropy loss and accuracy. np.log() is the natural log.
  loss = -np.log(out[label])
  acc = 1 if np.argmax(out) == label else 0

  return out, loss, acc

def train(im, label, lr=.005):
  '''
  Completes a full training step on the given image and label.
  Returns the cross-entropy loss and accuracy.
  - image is a 2d numpy array
  - label is a digit
  - lr is the learning rate
  '''
  # Forward
  out, loss, acc = forward(im, label)

  # Calculate initial gradient
  gradient = np.zeros(10)
  gradient[label] = -1 / out[label]

  # Backprop
  gradient = softmax.backprop(gradient, lr)
  gradient = pool.backprop(gradient)
  gradient = conv.backprop(gradient, lr)

  return loss, acc

print('MNIST CNN initialized!')

# Train the CNN for 3 epochs
for epoch in range(7):
  print('--- Epoch %d ---' % (epoch + 1))

  # Shuffle the training data
  permutation = np.random.permutation(len(train_images))
  train_images = train_images[permutation]
  train_labels = train_labels[permutation]

  # Train!
  loss = 0
  num_correct = 0
  for i, (im, label) in enumerate(zip(train_images, train_labels)):
    if i % 100 == 99:
      print(
        '[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %
        (i + 1, loss / 100, num_correct)
      )
      loss = 0
      num_correct = 0

    l, acc = train(im, label)
    loss += l
    num_correct += acc

# Test the CNN
print('\n--- Testing the CNN ---')
loss = 0
num_correct = 0
for im, label in zip(test_images, test_labels):
  _, l, acc = forward(im, label)
  loss += l
  num_correct += acc

num_tests = len(test_images)
print('Test Loss:', loss / num_tests)
print('Test Accuracy:', num_correct / num_tests)

MNIST CNN initialized!
--- Epoch 1 ---
[Step 100] Past 100 steps: Average Loss 2.176 | Accuracy: 26%
[Step 200] Past 100 steps: Average Loss 1.902 | Accuracy: 47%
[Step 300] Past 100 steps: Average Loss 1.383 | Accuracy: 61%
[Step 400] Past 100 steps: Average Loss 0.925 | Accuracy: 71%
[Step 500] Past 100 steps: Average Loss 0.790 | Accuracy: 74%
--- Epoch 2 ---
[Step 100] Past 100 steps: Average Loss 0.766 | Accuracy: 76%
[Step 200] Past 100 steps: Average Loss 0.573 | Accuracy: 84%
[Step 300] Past 100 steps: Average Loss 0.587 | Accuracy: 83%
[Step 400] Past 100 steps: Average Loss 0.546 | Accuracy: 85%
[Step 500] Past 100 steps: Average Loss 0.483 | Accuracy: 87%
--- Epoch 3 ---
[Step 100] Past 100 steps: Average Loss 0.518 | Accuracy: 81%
[Step 200] Past 100 steps: Average Loss 0.390 | Accuracy: 87%
[Step 300] Past 100 steps: Average Loss 0.526 | Accuracy: 88%
[Step 400] Past 100 steps: Average Loss 0.387 | Accuracy: 87%
[Step 500] Past 100 steps: Average Loss 0.429 | Accuracy: 87%

Run with Keras library

In [7]:
import numpy as np
import mnist
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from keras.utils import to_categorical
from keras.optimizers import SGD

train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()

train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5

train_images = np.expand_dims(train_images, axis=3)
test_images = np.expand_dims(test_images, axis=3)

model = Sequential([
  Conv2D(8, 3, input_shape=(28, 28, 1), use_bias=False),
  MaxPooling2D(pool_size=2),
  Flatten(),
  Dense(10, activation='softmax'),
])

model.compile(SGD(lr=.005), loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(
  train_images,
  to_categorical(train_labels),
  batch_size=1,
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)


Using TensorFlow backend.








Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Train on 60000 samples, validate on 10000 samples
Epoch 1/3





Epoch 2/3
 3296/60000 [>.............................] - ETA: 4:18 - loss: 0.1211 - acc: 0.9612

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\berkc\Miniconda3\envs\vandy\lib\site-packages\IPython\core\interactiveshell.py", line 3319, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-10bf3fd7f453>", line 33, in <module>
    validation_data=(test_images, to_categorical(test_labels)),
  File "C:\Users\berkc\Miniconda3\envs\vandy\lib\site-packages\keras\engine\training.py", line 1039, in fit
    validation_steps=validation_steps)
  File "C:\Users\berkc\Miniconda3\envs\vandy\lib\site-packages\keras\engine\training_arrays.py", line 199, in fit_loop
    outs = f(ins_batch)
  File "C:\Users\berkc\Miniconda3\envs\vandy\lib\site-packages\keras\backend\tensorflow_backend.py", line 2715, in __call__
    return self._call(inputs)
  File "C:\Users\berkc\Miniconda3\envs\vandy\lib\site-packages\keras\backend\tensorflow_backend.py", line 2675, in _call
    fetched = self._callable_fn(*array_vals)
  File "C:\Users\berkc\Miniconda3\envs\vandy\lib\site-

KeyboardInterrupt: 