# https://habrahabr.ru/company/wunderfund/blog/315476/

In [54]:
from keras.datasets import mnist
from keras.models import Model
from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, Dropout, Flatten, merge
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.regularizers import l2
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping

In [55]:
batch_size, num_epochs = 128, 50
kernel_size, pool_size = 3, 2
conv_depth = 32
drop_prob_1, drop_prob_2 = 0.25, 0.5
hidden_size = 128

l2_lambda = 0.0001

ens_models = 3

In [56]:
height, width, depth = 28, 28, 1
num_classes = 10

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], depth, height, width).astype('float32') / 255.
X_test = X_test.reshape(X_test.shape[0], depth, height, width).astype('float32') / 255.
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

X_valid, y_valid = X_train[54000:], y_train[54000:]
X_train, y_train = X_train[:54000], y_train[:54000]

In [57]:
datagen = ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1, dim_ordering='th')
datagen.fit(X_train)

In [58]:
inp = Input(shape=(depth, height, width))
inp_norm = BatchNormalization(axis=1)(inp)

outs = []
for i in range(ens_models):
    conv_1 = Convolution2D(conv_depth, kernel_size, kernel_size, border_mode='same', init='he_uniform', 
                           W_regularizer=l2(l2_lambda), activation='relu')(inp_norm)
    conv_1 = BatchNormalization(axis=1)(conv_1)
    conv_2 = Convolution2D(conv_depth, kernel_size, kernel_size, border_mode='same', init='he_uniform',
                           W_regularizer=l2(l2_lambda), activation='relu')(conv_1)
    conv_2 = BatchNormalization(axis=1)(conv_2)
    pool_1 = MaxPooling2D(pool_size=(pool_size, pool_size), dim_ordering='th')(conv_2)
    drop_1 = Dropout(drop_prob_1)(pool_1)
    flat = Flatten()(drop_1)
    hidden = Dense(hidden_size, activation='relu')(flat)
    hidden = BatchNormalization(axis=1)(hidden)
    drop_2 = Dropout(drop_prob_2)(hidden)
    out = Dense(num_classes, init='glorot_uniform', W_regularizer=l2(l2_lambda), activation='softmax')(drop_2)
    outs.append(out)

out = merge(outs, mode='ave')

In [59]:
model = Model(input=inp, output=out)
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit_generator(datagen.flow(X_train, y_train,
                                 batch_size=batch_size), 
                    samples_per_epoch=X_train.shape[0], nb_epoch=num_epochs,
                    verbose=2, validation_data=(X_valid, y_valid),
                    callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

Epoch 1/50
23s - loss: 0.7663 - acc: 0.7719 - val_loss: 0.2139 - val_acc: 0.9578
Epoch 2/50
17s - loss: 0.2705 - acc: 0.9315 - val_loss: 0.0920 - val_acc: 0.9767
Epoch 3/50
18s - loss: 0.1952 - acc: 0.9518 - val_loss: 0.0785 - val_acc: 0.9802
Epoch 4/50
36s - loss: 0.1665 - acc: 0.9595 - val_loss: 0.0696 - val_acc: 0.9820
Epoch 5/50
38s - loss: 0.1476 - acc: 0.9649 - val_loss: 0.0706 - val_acc: 0.9842
Epoch 6/50
37s - loss: 0.1378 - acc: 0.9676 - val_loss: 0.0654 - val_acc: 0.9847
Epoch 7/50
37s - loss: 0.1305 - acc: 0.9692 - val_loss: 0.0663 - val_acc: 0.9857
Epoch 8/50
37s - loss: 0.1209 - acc: 0.9716 - val_loss: 0.0634 - val_acc: 0.9867
Epoch 9/50
37s - loss: 0.1188 - acc: 0.9734 - val_loss: 0.0649 - val_acc: 0.9863
Epoch 10/50
37s - loss: 0.1129 - acc: 0.9743 - val_loss: 0.0642 - val_acc: 0.9857
Epoch 11/50
37s - loss: 0.1098 - acc: 0.9749 - val_loss: 0.0620 - val_acc: 0.9872
Epoch 12/50
37s - loss: 0.1089 - acc: 0.9764 - val_loss: 0.0617 - val_acc: 0.9877
Epoch 13/50
37s - loss: 0

<keras.callbacks.History at 0x7ff360ec1ac8>

In [60]:
model.evaluate(X_test, y_test, verbose=1)



[0.047949830448627474, 0.9909]