Welcome to this example of (sort of deep) convolutional neural nets with keras. It uses the Tensorflow backend and makes use of the following:

 - Dropout
 - Maximum weight constraints (suggested in dropout paper)
 - Convolutional Layers
 - Callbacks that change parameters

In [None]:
#%env KERAS_BACKEND=theano
from subprocess import check_output
print(check_output(["ls", "../input"]).decode("utf8"))
print(check_output(["free", "-mh"]).decode("utf8"))

import numpy as np
import pandas as pd
 
import keras
import keras.backend as K
from keras.layers import Input, Convolution2D, Activation, MaxPooling2D, \
     Dense, BatchNormalization, Dropout
from keras.layers.core import Flatten
from keras.optimizers import SGD
from keras.models import Model
from keras.utils import np_utils
from keras.constraints import maxnorm
from keras.regularizers import l2
from keras.callbacks import LearningRateScheduler

print(keras.__version__)

In [None]:
train = np.genfromtxt('../input/train.csv', delimiter = ',', skip_header = 1)
training_inputs = train[:, 1:] / 127.5 - 1

training_targets = np_utils.to_categorical(train[:, int(0)])

test = np.genfromtxt('../input/test.csv', delimiter = ',', skip_header = 1)
test_inputs = test[:, ] / 127.5 - 1

In [None]:
# For 2D data (e.g. image), ordering type "tf" assumes (rows, cols, channels)
#  type "th" assumes (channels, rows, cols). See https://keras.io/backend/
print('We are using image ordering type', K.image_dim_ordering())

training_inputs = training_inputs.reshape(training_inputs.shape[0], 28, 28, 1)
test_inputs = test_inputs.reshape(test_inputs.shape[0], 28, 28, 1)
print(training_inputs.shape)
print(test_inputs.shape)

In [None]:
inputs = Input(shape = (28, 28, 1))
print(inputs._keras_shape)

inputs_w_dropout = Dropout(.10)(inputs)
print(inputs._keras_shape)

conv1 = Convolution2D(30, 5, 5, #dim_ordering = 'th',
                      border_mode = 'valid', bias = True,
                      W_constraint = maxnorm(2.5))(inputs_w_dropout)
print(conv1._keras_shape)

max1 = MaxPooling2D(pool_size = (2, 2))(conv1)
max1 = Dropout(.05)(max1)
max1 = BatchNormalization()(max1)
act1 = Activation('relu')(max1)
print(act1._keras_shape)

In [None]:
conv2 = Convolution2D(45, 4, 4, #dim_ordering = 'th',
                      border_mode = 'valid',
                      bias = True, W_constraint = maxnorm(2.5))(act1)
print('Conv2 shape: ', conv2._keras_shape)
max2 = MaxPooling2D(pool_size = (2, 2))(conv2)
max2 = Dropout(.05)(max2)
max2 = BatchNormalization()(max2)
act2 = Activation('relu')(max2)
print('act2 shape: ', act2._keras_shape)

conv3 = Convolution2D(160, 2, 2, #dim_ordering = 'th',
                      border_mode = 'valid',
                      bias = True, W_constraint = maxnorm(2.5))(act2)
print(conv3._keras_shape)
conv3 = Dropout(.05)(conv3)
conv3 = BatchNormalization()(conv3)
act3 = Activation('relu')(conv3)
print(act3._keras_shape)

In [None]:
flat1 = Flatten()(act3)
print(flat1._keras_shape)

dense1 = Dense(800, W_constraint = maxnorm(2))(flat1)
dense1 = Dropout(.5)(dense1)
dense1 = BatchNormalization()(dense1)
dense1 = Activation('softmax')(dense1)
print(dense1._keras_shape)

dense2 = Dense(10)(dense1)
out = Activation('softmax')(dense2)
print(dense2._keras_shape)

In [None]:
sgd = SGD(lr = 0.5, decay = 0, momentum = .9, nesterov = True)
 
model = Model(input = inputs, output = out)
model.compile(loss='categorical_crossentropy', optimizer = sgd,
              metrics = ['accuracy'])

In [None]:
# I can't seem to change the model fit parameters!
class MyCallback(keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs={}):
        print("Callback initiated: on_epoch_begin")
        self.params['batch_size'] = 1000
        
my_callback = MyCallback()

In [None]:
# Creating a callback function to alter the learning rate and momentum in custom fashion
# TODO: Investigate ReduceLROnPlateau at https://keras.io/callbacks/
def scheduler(epoch):
    print("I am epoch " + str(epoch))
    if epoch == 1:
        sgd.lr = K.variable(.01)
        sgd.momentum = K.variable(.94)
    if epoch == 4:
        sgd.lr = K.variable(.001)
        sgd.momentum = K.variable(.96)
    if epoch == 10:
        sgd.lr = K.variable(.0001)
        sgd.momentum = K.variable(.8)
    if epoch == 18:
        sgd.lr = K.variable(.0001)
        sgd.momentum = K.variable(0)
    current_lr = model.optimizer.get_config()['lr']
    current_momentum = model.optimizer.get_config()['momentum']
    print("My lr is " + str(current_lr))
    print("My momentum is " + str(current_momentum))   
    return float(current_lr)

change_lr = LearningRateScheduler(scheduler)

In [None]:
fit = model.fit(training_inputs, training_targets, nb_epoch = 10,
                batch_size = 800, validation_split = 0.15,
                shuffle = True,
                callbacks = [change_lr, my_callback])

In [None]:
preds = model.predict(test_inputs)
pred_classes = preds.argmax(axis = -1)
content = {"ImageId": range(1, len(preds) + 1), "Label": pred_classes}
pd.DataFrame(content).to_csv("kernel_pred.csv", index = False)
print(check_output(["ls", "."]).decode("utf8"))