In [1]:
import keras
from keras.datasets import mnist
import numpy as np

Using TensorFlow backend.


### Configure Tensorflow

In [2]:
def limit_mem():
    cfg = keras.backend.tf.ConfigProto()
    cfg.gpu_options.allow_growth = True
    keras.backend.set_session(keras.backend.tf.Session(config=cfg))

limit_mem()

### Load data

In [6]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [7]:
# Target Y
print(y_train.shape)
print(y_train[:10])

(60000,)
[5 0 4 1 9 2 1 3 1 4]


In [8]:
# Input X
print(X_train.shape)
print (X_train[:2])
print ("Single Image - Number looks like")
print (X_train[0])
print (X_train[0].shape)

(60000, 28, 28)
[[[0 0 0 ..., 0 0 0]
  [0 0 0 ..., 0 0 0]
  [0 0 0 ..., 0 0 0]
  ..., 
  [0 0 0 ..., 0 0 0]
  [0 0 0 ..., 0 0 0]
  [0 0 0 ..., 0 0 0]]

 [[0 0 0 ..., 0 0 0]
  [0 0 0 ..., 0 0 0]
  [0 0 0 ..., 0 0 0]
  ..., 
  [0 0 0 ..., 0 0 0]
  [0 0 0 ..., 0 0 0]
  [0 0 0 ..., 0 0 0]]]
Single Image - Number looks like
[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   3  18  18  18 126 136
  175  26 166 255 24

In [9]:
# We are adding the channel (black and white so 1 channel) dimension to the matrix 
# Since CNNs and Keras need input volumes which are 3D. TF needs the last dimensions to 
# be channel
#https://docs.scipy.org/doc/numpy-1.10.1/reference/generated/numpy.expand_dims.html
X_train = np.expand_dims(X_train, axis=3)
X_test = np.expand_dims(X_test, axis=3)

In [10]:
#Why do we do this?
X_train.shape

(60000, 28, 28, 1)

### One-hot encoding

In [11]:
from keras.utils.np_utils import to_categorical

def onehot(x):
    return to_categorical(x)

In [12]:
y_train = onehot(y_train)
y_test = onehot(y_test)
print(y_train[:5])

[[ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]]


### Normalize Inputs

In [24]:
x_mean = X_train.mean().astype(np.float32)
x_std = X_train.std().astype(np.float32)

print (x_mean)
print (x_std)
# He passes this into a Keras Lambda layer with the x_mean and x_std hard-coded
def normalize_input(x):
    return (x-x_mean)/x_std

33.3184
78.5675


# Linear Model

We always start with the simplest model first :)

In [25]:
from keras.models import Sequential, Model
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.optimizers import Adam

#https://keras.io/models/sequential/
def get_linear_model():
    model = Sequential([
        # Lambda lets you pass in a custom function as a Layer
        # normalize_input is a function
        # https://keras.io/layers/core/#lambda
        Lambda(normalize_input, input_shape=(28,28,1)), 
        #re: input_shape(28,28,1), in TF, the last is channel 
        #i.e. (how deep is the input -- typically images are 3-4 channels)
        Flatten(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [26]:
lm = get_linear_model()

In [16]:
# What does this do?
from keras.preprocessing import image
gen = image.ImageDataGenerator()

In [17]:
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)

### Train

In [18]:
lm.fit_generator(batches, batches.n,
         nb_epoch=1, validation_data=test_batches,
         nb_val_samples=test_batches.n)

Epoch 1/1


<keras.callbacks.History at 0x7fa3e1ddda58>

In [21]:
# Set learning rate
lm.optimizer.lr=0.1
lm.fit_generator(batches, batches.n, nb_epoch=4, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2)

Epoch 1/4
6s - loss: 0.2678 - acc: 0.9254 - val_loss: 0.2820 - val_acc: 0.9207
Epoch 2/4
6s - loss: 0.2675 - acc: 0.9259 - val_loss: 0.2750 - val_acc: 0.9262
Epoch 3/4
6s - loss: 0.2638 - acc: 0.9270 - val_loss: 0.2765 - val_acc: 0.9234
Epoch 4/4
6s - loss: 0.2606 - acc: 0.9270 - val_loss: 0.2917 - val_acc: 0.9205


<keras.callbacks.History at 0x7fa3f00c4748>

# Neural Network Model

Let's try adding a fully-connected (Dense) layer

In [30]:
def get_fc_model():
    model = Sequential([
        Lambda(normalize_input, input_shape=(28,28,1)),
        Flatten(),
        Dense(512, activation='softmax'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [31]:
fc = get_fc_model()

In [33]:
fc.fit_generator(batches, batches.n, nb_epoch=1, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/1
6s - loss: 0.7477 - acc: 0.9179 - val_loss: 0.5452 - val_acc: 0.9268


<keras.callbacks.History at 0x7fa3b452bf28>

In [34]:
#Increase the learning rate from the Adam() default of .001
#https://keras.io/optimizers/
fc.optimizer.lr=0.1

In [35]:
fc.fit_generator(batches, batches.n, nb_epoch=4, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/4
6s - loss: 0.4661 - acc: 0.9253 - val_loss: 0.4084 - val_acc: 0.9240
Epoch 2/4
6s - loss: 0.3687 - acc: 0.9283 - val_loss: 0.3487 - val_acc: 0.9260
Epoch 3/4
6s - loss: 0.3221 - acc: 0.9316 - val_loss: 0.3239 - val_acc: 0.9289
Epoch 4/4
6s - loss: 0.2964 - acc: 0.9338 - val_loss: 0.3035 - val_acc: 0.9323


<keras.callbacks.History at 0x7fa3b4540438>

In [38]:
#Decrease Learning Rate A Bit
fc.optimizer.lr=0.01

In [39]:
fc.fit_generator(batches, batches.n, nb_epoch=4, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/4
6s - loss: 0.2349 - acc: 0.9425 - val_loss: 0.2708 - val_acc: 0.9335
Epoch 2/4
6s - loss: 0.2323 - acc: 0.9426 - val_loss: 0.2701 - val_acc: 0.9316
Epoch 3/4
6s - loss: 0.2214 - acc: 0.9456 - val_loss: 0.2573 - val_acc: 0.9374
Epoch 4/4
6s - loss: 0.2186 - acc: 0.9464 - val_loss: 0.2682 - val_acc: 0.9318


<keras.callbacks.History at 0x7fa3b4540198>

# CNN

In [43]:
from keras.layers.convolutional import Convolution2D, MaxPooling2D

def get_cnn_model():
    model = Sequential([
        Lambda(normalize_input, input_shape=(28,28,1)), #image is 28x28, 1 channel
        #https://keras.io/layers/convolutional/
        Convolution2D(32,3,3, activation='relu'), #filter size = 3x3, 32 filters
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(), #https://keras.io/layers/pooling/
        Convolution2D(64,3,3, activation='relu'), #filter size = 3x3, 64 filters
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(), #what does this do???? 3D to 2D? Does it sum across depth layer?
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
    ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [45]:
cnn = get_cnn_model()

In [48]:
cnn.fit_generator(batches, batches.n, nb_epoch=4, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/4
6s - loss: 0.0104 - acc: 0.9966 - val_loss: 0.0294 - val_acc: 0.9929
Epoch 2/4
6s - loss: 0.0065 - acc: 0.9979 - val_loss: 0.0270 - val_acc: 0.9933
Epoch 3/4
6s - loss: 0.0078 - acc: 0.9974 - val_loss: 0.0378 - val_acc: 0.9921
Epoch 4/4
6s - loss: 0.0074 - acc: 0.9978 - val_loss: 0.0294 - val_acc: 0.9931


<keras.callbacks.History at 0x7fa39f53f6a0>

# Overfitting

If you look at the above, it looks like we might be overfitting
Our loss is going down, but our validation loss isn't...

### Data Augmentation

In [50]:
cnn = get_cnn_model()

In [52]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                                height_shift_range=0.08, zoom_range=0.08)
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)

In [53]:
cnn.fit_generator(batches, batches.n, nb_epoch=1, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/1
6s - loss: 0.1954 - acc: 0.9370 - val_loss: 0.0843 - val_acc: 0.9712


<keras.callbacks.History at 0x7fa39d1f4940>

In [56]:
cnn.optimizer.lr=0.1
cnn.fit_generator(batches, batches.n, nb_epoch=4, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/4
6s - loss: 0.0548 - acc: 0.9826 - val_loss: 0.0454 - val_acc: 0.9857
Epoch 2/4
6s - loss: 0.0486 - acc: 0.9845 - val_loss: 0.0489 - val_acc: 0.9858
Epoch 3/4
6s - loss: 0.0435 - acc: 0.9868 - val_loss: 0.0301 - val_acc: 0.9900
Epoch 4/4
6s - loss: 0.0392 - acc: 0.9879 - val_loss: 0.0304 - val_acc: 0.9899


<keras.callbacks.History at 0x7fa39d04f860>

In [58]:
cnn.optimizer.lr=0.01
cnn.fit_generator(batches, batches.n, nb_epoch=8, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/8
6s - loss: 0.0368 - acc: 0.9888 - val_loss: 0.0278 - val_acc: 0.9904
Epoch 2/8
6s - loss: 0.0357 - acc: 0.9892 - val_loss: 0.0286 - val_acc: 0.9903
Epoch 3/8
7s - loss: 0.0321 - acc: 0.9898 - val_loss: 0.0413 - val_acc: 0.9871
Epoch 4/8
6s - loss: 0.0298 - acc: 0.9905 - val_loss: 0.0270 - val_acc: 0.9917
Epoch 5/8
6s - loss: 0.0302 - acc: 0.9910 - val_loss: 0.0275 - val_acc: 0.9918
Epoch 6/8
6s - loss: 0.0280 - acc: 0.9912 - val_loss: 0.0352 - val_acc: 0.9886
Epoch 7/8
6s - loss: 0.0281 - acc: 0.9913 - val_loss: 0.0308 - val_acc: 0.9913
Epoch 8/8
6s - loss: 0.0261 - acc: 0.9917 - val_loss: 0.0283 - val_acc: 0.9914


<keras.callbacks.History at 0x7fa39d04fba8>

As you can see, the validation loss isn't going down, which suggests we're overfitting and also that we're not converging. This could be because our learning rate is too high. We're jumping around too far each time to find the correct global minimum of the loss function. Let's try decreasing it to see if we can be more gentle with our updates

In [59]:
cnn.optimizer.lr=0.001
cnn.fit_generator(batches, batches.n, nb_epoch=8, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/8
6s - loss: 0.0250 - acc: 0.9918 - val_loss: 0.0274 - val_acc: 0.9905
Epoch 2/8
6s - loss: 0.0251 - acc: 0.9919 - val_loss: 0.0318 - val_acc: 0.9912
Epoch 3/8
6s - loss: 0.0239 - acc: 0.9924 - val_loss: 0.0212 - val_acc: 0.9929
Epoch 4/8
6s - loss: 0.0226 - acc: 0.9930 - val_loss: 0.0275 - val_acc: 0.9912
Epoch 5/8
6s - loss: 0.0233 - acc: 0.9928 - val_loss: 0.0278 - val_acc: 0.9925
Epoch 6/8
6s - loss: 0.0226 - acc: 0.9931 - val_loss: 0.0273 - val_acc: 0.9910
Epoch 7/8
6s - loss: 0.0203 - acc: 0.9935 - val_loss: 0.0295 - val_acc: 0.9915
Epoch 8/8
6s - loss: 0.0225 - acc: 0.9928 - val_loss: 0.0279 - val_acc: 0.9910


<keras.callbacks.History at 0x7fa39d04fe80>

Validation loss is still bouncing around / not going down. Let's lower it again

In [60]:
model.optimizer.lr=0.0001
cnn.fit_generator(batches, batches.n, nb_epoch=10, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/10
6s - loss: 0.0216 - acc: 0.9936 - val_loss: 0.0283 - val_acc: 0.9918
Epoch 2/10
6s - loss: 0.0201 - acc: 0.9936 - val_loss: 0.0275 - val_acc: 0.9924
Epoch 3/10
6s - loss: 0.0202 - acc: 0.9936 - val_loss: 0.0301 - val_acc: 0.9907
Epoch 4/10
6s - loss: 0.0196 - acc: 0.9942 - val_loss: 0.0270 - val_acc: 0.9925
Epoch 5/10
6s - loss: 0.0211 - acc: 0.9935 - val_loss: 0.0277 - val_acc: 0.9913
Epoch 6/10
6s - loss: 0.0193 - acc: 0.9940 - val_loss: 0.0277 - val_acc: 0.9914
Epoch 7/10
6s - loss: 0.0195 - acc: 0.9940 - val_loss: 0.0257 - val_acc: 0.9929
Epoch 8/10
6s - loss: 0.0184 - acc: 0.9942 - val_loss: 0.0283 - val_acc: 0.9916
Epoch 9/10
6s - loss: 0.0183 - acc: 0.9941 - val_loss: 0.0246 - val_acc: 0.9927
Epoch 10/10
6s - loss: 0.0189 - acc: 0.9941 - val_loss: 0.0307 - val_acc: 0.9910


<keras.callbacks.History at 0x7fa39d04fbe0>

Okay playing with the learning rate doesn't appear to be helping us. We're overfitting now.

# Batchnorm

So now we've tried Data Augmentation, let's improve our CNN with Batchnormalization

In [87]:
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization

#https://keras.io/layers/normalization/
def get_bn_model():
    model = Sequential([
        Lambda(normalize_input, input_shape=(28,28,1)),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [88]:
bn_model = get_bn_model()

In [89]:
bn_model.fit_generator(batches, batches.n, nb_epoch=1, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/1
50s - loss: 0.1628 - acc: 0.9499 - val_loss: 0.0761 - val_acc: 0.9750


<keras.callbacks.History at 0x7fa3c0d06e80>

In [91]:
model.optimizer.lr=0.1
bn_model.fit_generator(batches, batches.n, nb_epoch=4, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/4
50s - loss: 0.0725 - acc: 0.9781 - val_loss: 0.0473 - val_acc: 0.9853
Epoch 2/4
50s - loss: 0.0593 - acc: 0.9812 - val_loss: 0.0409 - val_acc: 0.9870
Epoch 3/4
51s - loss: 0.0499 - acc: 0.9845 - val_loss: 0.0430 - val_acc: 0.9872
Epoch 4/4
50s - loss: 0.0466 - acc: 0.9859 - val_loss: 0.0532 - val_acc: 0.9843


<keras.callbacks.History at 0x7fa3c0a05da0>

In [92]:
model.optimizer.lr=0.01
bn_model.fit_generator(batches, batches.n, nb_epoch=12, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/12
50s - loss: 0.0406 - acc: 0.9874 - val_loss: 0.0372 - val_acc: 0.9880
Epoch 2/12
50s - loss: 0.0403 - acc: 0.9879 - val_loss: 0.0457 - val_acc: 0.9860
Epoch 3/12
50s - loss: 0.0361 - acc: 0.9891 - val_loss: 0.0418 - val_acc: 0.9879
Epoch 4/12
50s - loss: 0.0341 - acc: 0.9897 - val_loss: 0.0386 - val_acc: 0.9891
Epoch 5/12
50s - loss: 0.0317 - acc: 0.9902 - val_loss: 0.0276 - val_acc: 0.9916
Epoch 6/12
50s - loss: 0.0318 - acc: 0.9903 - val_loss: 0.0285 - val_acc: 0.9916
Epoch 7/12
50s - loss: 0.0295 - acc: 0.9908 - val_loss: 0.0319 - val_acc: 0.9890
Epoch 8/12
50s - loss: 0.0253 - acc: 0.9924 - val_loss: 0.0315 - val_acc: 0.9902
Epoch 9/12
50s - loss: 0.0257 - acc: 0.9921 - val_loss: 0.0269 - val_acc: 0.9911
Epoch 10/12
50s - loss: 0.0242 - acc: 0.9925 - val_loss: 0.0290 - val_acc: 0.9911
Epoch 11/12
50s - loss: 0.0270 - acc: 0.9914 - val_loss: 0.0285 - val_acc: 0.9912
Epoch 12/12
50s - loss: 0.0229 - acc: 0.9926 - val_loss: 0.0219 - val_acc: 0.9929


<keras.callbacks.History at 0x7fa3c1b59eb8>

In [93]:
model.optimizer.lr=0.001
bn_model.fit_generator(batches, batches.n, nb_epoch=12, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/12
50s - loss: 0.0242 - acc: 0.9929 - val_loss: 0.0203 - val_acc: 0.9936
Epoch 2/12
51s - loss: 0.0218 - acc: 0.9930 - val_loss: 0.0311 - val_acc: 0.9910
Epoch 3/12
50s - loss: 0.0210 - acc: 0.9934 - val_loss: 0.0204 - val_acc: 0.9933
Epoch 4/12
50s - loss: 0.0208 - acc: 0.9932 - val_loss: 0.0278 - val_acc: 0.9914
Epoch 5/12
50s - loss: 0.0187 - acc: 0.9941 - val_loss: 0.0278 - val_acc: 0.9917
Epoch 6/12
50s - loss: 0.0205 - acc: 0.9936 - val_loss: 0.0232 - val_acc: 0.9931
Epoch 7/12
50s - loss: 0.0208 - acc: 0.9933 - val_loss: 0.0239 - val_acc: 0.9926
Epoch 8/12
50s - loss: 0.0195 - acc: 0.9938 - val_loss: 0.0226 - val_acc: 0.9919
Epoch 9/12
50s - loss: 0.0185 - acc: 0.9940 - val_loss: 0.0244 - val_acc: 0.9920
Epoch 10/12
50s - loss: 0.0187 - acc: 0.9942 - val_loss: 0.0208 - val_acc: 0.9936
Epoch 11/12
50s - loss: 0.0171 - acc: 0.9944 - val_loss: 0.0222 - val_acc: 0.9933
Epoch 12/12
50s - loss: 0.0185 - acc: 0.9940 - val_loss: 0.0201 - val_acc: 0.9936


<keras.callbacks.History at 0x7fa3c17a4080>

# Dropout


In [119]:
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Dropout

#https://keras.io/layers/normalization/
def get_do_model():
    model = Sequential([
        Lambda(normalize_input, input_shape=(28,28,1)),
        Convolution2D(32,3,3, activation='relu'),
#http://forums.fast.ai/t/batchnormalization-axis-1-when-used-on-convolutional-layers/214
#https://github.com/fchollet/keras/issues/1921
        BatchNormalization(), #for tensorflow, use -1 (channels which are in the last index (28,28,1)) 
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5), #https://keras.io/layers/core/
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [120]:
model = get_do_model()

In [121]:
bn_model.fit_generator(batches, batches.n, nb_epoch=1, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) #2 means just log the epochs

Epoch 1/1
51s - loss: 0.0062 - acc: 0.9977 - val_loss: 0.0258 - val_acc: 0.9930


<keras.callbacks.History at 0x7fa3bdef5358>

In [100]:
model.optimizer.lr=0.1
bn_model.fit_generator(batches, batches.n, nb_epoch=4, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) 

Epoch 1/4
50s - loss: 0.0167 - acc: 0.9946 - val_loss: 0.0225 - val_acc: 0.9918
Epoch 2/4
50s - loss: 0.0188 - acc: 0.9941 - val_loss: 0.0209 - val_acc: 0.9938
Epoch 3/4
50s - loss: 0.0149 - acc: 0.9954 - val_loss: 0.0188 - val_acc: 0.9939
Epoch 4/4
50s - loss: 0.0160 - acc: 0.9951 - val_loss: 0.0266 - val_acc: 0.9924


<keras.callbacks.History at 0x7fa3c02183c8>

In [101]:
model.optimizer.lr=0.01
bn_model.fit_generator(batches, batches.n, nb_epoch=12, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) 

Epoch 1/12
50s - loss: 0.0146 - acc: 0.9955 - val_loss: 0.0174 - val_acc: 0.9939
Epoch 2/12
50s - loss: 0.0137 - acc: 0.9957 - val_loss: 0.0191 - val_acc: 0.9944
Epoch 3/12
50s - loss: 0.0155 - acc: 0.9950 - val_loss: 0.0244 - val_acc: 0.9923
Epoch 4/12
50s - loss: 0.0155 - acc: 0.9952 - val_loss: 0.0206 - val_acc: 0.9933
Epoch 5/12
50s - loss: 0.0147 - acc: 0.9954 - val_loss: 0.0178 - val_acc: 0.9952
Epoch 6/12
50s - loss: 0.0139 - acc: 0.9955 - val_loss: 0.0285 - val_acc: 0.9912
Epoch 7/12
50s - loss: 0.0142 - acc: 0.9956 - val_loss: 0.0174 - val_acc: 0.9949
Epoch 8/12
50s - loss: 0.0131 - acc: 0.9957 - val_loss: 0.0257 - val_acc: 0.9921
Epoch 9/12
50s - loss: 0.0160 - acc: 0.9947 - val_loss: 0.0180 - val_acc: 0.9943
Epoch 10/12
50s - loss: 0.0125 - acc: 0.9960 - val_loss: 0.0219 - val_acc: 0.9920
Epoch 11/12
50s - loss: 0.0128 - acc: 0.9959 - val_loss: 0.0214 - val_acc: 0.9935
Epoch 12/12
50s - loss: 0.0128 - acc: 0.9959 - val_loss: 0.0218 - val_acc: 0.9937


<keras.callbacks.History at 0x7fa3c0218860>

In [102]:
model.optimizer.lr=0.001
bn_model.fit_generator(batches, batches.n, nb_epoch=1, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2) 

Epoch 1/1
50s - loss: 0.0122 - acc: 0.9960 - val_loss: 0.0203 - val_acc: 0.9931


<keras.callbacks.History at 0x7fa3c02185f8>

# Ensembling

So now we've added data augmentation, batch normalization, and dropout. Let's add one final component -- Ensembling!

In [None]:
# We're going to "ensemble" on the same model architecture, 
# Just train 4 different models using 4 different sets of
# randomized weights -- the randomness in the weights is the only 
# difference

In [127]:
def fit_model():
    model = get_do_model()
    bn_model.fit_generator(batches, batches.n, nb_epoch=1, validation_data=test_batches,
                nb_val_samples=test_batches.n, verbose=2)
#     model.optimizer.lr=0.1
#     bn_model.fit_generator(batches, batches.n, nb_epoch=1, validation_data=test_batches,
#                 nb_val_samples=test_batches.n, verbose=2) 
#     model.optimizer.lr=0.01
#     bn_model.fit_generator(batches, batches.n, nb_epoch=1, validation_data=test_batches,
#                 nb_val_samples=test_batches.n, verbose=2) 
#     model.optimizer.lr=0.001
#     bn_model.fit_generator(batches, batches.n, nb_epoch=1, validation_data=test_batches,
#                 nb_val_samples=test_batches.n, verbose=2) 

In [128]:
models = [fit_model() for i in range(6)]
model_path = "data/mnist/models/"
for i,m in enumerate(models):
    m.save_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')

Epoch 1/1
52s - loss: 0.0060 - acc: 0.9980 - val_loss: 0.0238 - val_acc: 0.9943
Epoch 1/1
52s - loss: 0.0063 - acc: 0.9977 - val_loss: 0.0227 - val_acc: 0.9943
Epoch 1/1
52s - loss: 0.0064 - acc: 0.9977 - val_loss: 0.0225 - val_acc: 0.9942
Epoch 1/1
52s - loss: 0.0066 - acc: 0.9978 - val_loss: 0.0227 - val_acc: 0.9941
Epoch 1/1
53s - loss: 0.0043 - acc: 0.9987 - val_loss: 0.0249 - val_acc: 0.9943
Epoch 1/1
51s - loss: 0.0057 - acc: 0.9979 - val_loss: 0.0202 - val_acc: 0.9939


AttributeError: 'NoneType' object has no attribute 'save_weights'

In [108]:
evals = np.array([m.evaluate(X_test,y_test,batch_size=256) for m in models])

NameError: name 'models' is not defined

In [None]:
evals.mean(axis=0)

In [None]:
all_preds = np.stack([m.predict(X_test, batch_size=256) for m in models])

In [None]:
all_preds.shape

In [None]:
avg_preds = all_preds.mean(axis=0)

In [None]:
keras.metrics.categorical_accuracy(y_test, avg_preds).eval()