Based on 'Keras CNN with over 99% accuracy' kernel

In [1]:
import numpy as np

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization

Using TensorFlow backend.


In [2]:
train_file = "input/train.csv"
test_file = "input/test.csv"
output_file = "submission.csv"

In [3]:
mnist_dataset = np.loadtxt(train_file, skiprows=1, dtype='int', delimiter=',')

In [4]:
val_split = 0.125
n_raw = mnist_dataset.shape[0]
n_val = int(n_raw * val_split + 0.5)
n_train = n_raw - n_val

np.random.shuffle(mnist_dataset)
x_val, x_train = mnist_dataset[:n_val,1:], mnist_dataset[n_val:,1:]
y_val, y_train = mnist_dataset[:n_val,0], mnist_dataset[n_val:,0]

x_train = x_train.astype("float32")/255.0
x_val = x_val.astype("float32")/255.0
y_train = np_utils.to_categorical(y_train)
y_val = np_utils.to_categorical(y_val)

n_classes = y_train.shape[1]
x_train = x_train.reshape(n_train, 28, 28, 1)
x_val = x_val.reshape(n_val, 28, 28, 1)

Feel free to modify the layers.

In [27]:
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (3, 3), input_shape = (28, 28, 1), 
                 padding='same', kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())

model.add(Conv2D(filters = 64, kernel_size = (3, 3), 
                 padding='same',kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters = 128, kernel_size = (3, 3),
                 padding='same',kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters = 128, kernel_size = (3, 3),
                 padding='same',kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(1024, kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(Dropout(0.5))

model.add(Dense(256, kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(Dropout(0.5))

model.add(Dense(10, kernel_initializer='he_normal'))
model.add(Activation('softmax'))

In [16]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
activation_8 (Activation)    (None, 28, 28, 32)        0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 28, 28, 32)        128       
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
activation_9 (Activation)    (None, 28, 28, 32)        0         
_________________________________________________________________
batch_normalization_6 (Batch (None, 28, 28, 32)        128       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 14, 14, 32)        0         
__________

This Keras function simplifies augmentation, i.e. it randomly modifies the input for training to prevent overfitting. You can also normalize input with this function, just remember to adjust the validation and test sets accordingly in that case.

In [28]:
datagen = ImageDataGenerator(zoom_range = 0.1,
                            height_shift_range = 0.1,
                            width_shift_range = 0.1,
                            rotation_range = 20)

The parameters here were chosen for the Kaggle kernel. For better performance, try reducing the learning rate and increase the number of epochs. I was able to reach 99.5% which put me at place 75 on the leaderboard. With a GPU, it still takes less than an hour to train.

In [33]:
model.compile(loss='categorical_crossentropy', optimizer = Adam(lr=5e-4), metrics = ["accuracy"])

EarlyStopping is only important if you plan to run many epochs. ModelCheckpoint is more useful here, as you can resume training after a kernel restart: just run
> model.load_weights(''mnist.h5'')

In [34]:
callbacks = [EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=2, mode='auto'),
            ModelCheckpoint('mnist.h5', monitor='val_loss', save_best_only=True, verbose=0)]

Model training. Note that the training loss is initially much larger than the validation loss, because of Dropout.

In [None]:
read_model = False
if read_model:
    if os.path.isfile("output/model.h5"):
        model = load_model("output/model.h5")

In [None]:
hist = model.fit_generator(datagen.flow(x_train, y_train, batch_size = 2048),
                           steps_per_epoch = n_train/2048, 
                           epochs = 10, 
                           verbose = 1,  
                           validation_data = (x_val, y_val),
                           callbacks = callbacks)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
 1/17 [>.............................] - ETA: 11s - loss: 0.0406 - acc: 0.9878

In [46]:
from keras.models import load_model

# save json
json_model = model.to_json()
with open("output/model.json", "w") as json_file:
    json_file.write(json_model)
# save model
model.save("output/model.h5")

In [47]:
mnist_testset = np.loadtxt(test_file, skiprows=1, dtype='int', delimiter=',')
x_test = mnist_testset.astype("float32")/255.0
n_test = x_test.shape[0]
x_test = x_test.reshape(n_test, 28, 28, 1)

In [48]:
y_test = model.predict(x_test, batch_size=128)

y_test consists of class probabilities, I now select the class with highest probability

In [49]:
y_index = np.argmax(y_test,axis=1)

In [50]:
with open(output_file, 'w') as f :
    f.write('ImageId,Label\n')
    for i in range(0,n_test) :
        f.write("".join([str(i+1),',',str(y_index[i]),'\n']))