Based on 'Keras CNN with over 99% accuracy' kernel

In [19]:
import numpy as np

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization

In [6]:
train_file = "input/train.csv"
test_file = "input/test.csv"
output_file = "submission.csv"

In [7]:
mnist_dataset = np.loadtxt(train_file, skiprows=1, dtype='int', delimiter=',')

In [8]:
val_split = 0.125
n_raw = mnist_dataset.shape[0]
n_val = int(n_raw * val_split + 0.5)
n_train = n_raw - n_val

np.random.shuffle(mnist_dataset)
x_val, x_train = mnist_dataset[:n_val,1:], mnist_dataset[n_val:,1:]
y_val, y_train = mnist_dataset[:n_val,0], mnist_dataset[n_val:,0]

x_train = x_train.astype("float32")/255.0
x_val = x_val.astype("float32")/255.0
y_train = np_utils.to_categorical(y_train)
y_val = np_utils.to_categorical(y_val)

n_classes = y_train.shape[1]
x_train = x_train.reshape(n_train, 28, 28, 1)
x_val = x_val.reshape(n_val, 28, 28, 1)

Feel free to modify the layers.

In [32]:
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (3, 3), input_shape = (28, 28, 1), 
                 padding='same', kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 32, kernel_size = (3, 3), 
                 kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters = 64, kernel_size = (3, 3),
                 kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 64, kernel_size = (3, 3),
                 kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, kernel_initializer='he_normal'))
model.add(Activation('elu'))
model.add(Dropout(0.5))
model.add(Dense(10, kernel_initializer='he_normal'))

model.add(Activation('softmax'))

This Keras function simplifies augmentation, i.e. it randomly modifies the input for training to prevent overfitting. You can also normalize input with this function, just remember to adjust the validation and test sets accordingly in that case.

In [33]:
datagen = ImageDataGenerator(zoom_range = 0.1,
                            height_shift_range = 0.1,
                            width_shift_range = 0.1,
                            rotation_range = 20)

The parameters here were chosen for the Kaggle kernel. For better performance, try reducing the learning rate and increase the number of epochs. I was able to reach 99.5% which put me at place 75 on the leaderboard. With a GPU, it still takes less than an hour to train.

In [37]:
model.compile(loss='categorical_crossentropy', optimizer = Adam(lr=1e-3), metrics = ["accuracy"])

EarlyStopping is only important if you plan to run many epochs. ModelCheckpoint is more useful here, as you can resume training after a kernel restart: just run
> model.load_weights(''mnist.h5'')

In [38]:
callbacks = [EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=2, mode='auto'),
            ModelCheckpoint('mnist.h5', monitor='val_loss', save_best_only=True, verbose=0)]

Model training. Note that the training loss is initially much larger than the validation loss, because of Dropout.

In [None]:
hist = model.fit_generator(datagen.flow(x_train, y_train, batch_size = 128),
                           steps_per_epoch = n_train/100, #Take away 100 when not on Kaggle kernel
                           epochs = 10, #Increase this when not on Kaggle kernel
                           verbose = 2,  #verbose=1 outputs ETA, but doesn't work well in the cloud
                           validation_data = (x_val, y_val),
                           callbacks = callbacks)

Epoch 1/10
222s - loss: 0.0787 - acc: 0.9773 - val_loss: 0.0275 - val_acc: 0.9914
Epoch 2/10
216s - loss: 0.0660 - acc: 0.9809 - val_loss: 0.0290 - val_acc: 0.9905
Epoch 3/10
222s - loss: 0.0623 - acc: 0.9821 - val_loss: 0.0218 - val_acc: 0.9935
Epoch 4/10
221s - loss: 0.0592 - acc: 0.9827 - val_loss: 0.0193 - val_acc: 0.9935
Epoch 5/10


In [None]:
mnist_testset = np.loadtxt(test_file, skiprows=1, dtype='int', delimiter=',')
x_test = mnist_testset.astype("float32")/255.0
n_test = x_test.shape[0]
x_test = x_test.reshape(n_test, 28, 28, 1)

In [None]:
y_test = model.predict(x_test, batch_size=128)

y_test consists of class probabilities, I now select the class with highest probability

In [None]:
y_index = np.argmax(y_test,axis=1)

In [None]:
with open(output_file, 'w') as f :
    f.write('ImageId,Label\n')
    for i in range(0,n_test) :
        f.write("".join([str(i+1),',',str(y_index[i]),'\n']))