In [1]:
import numpy as np 
import pandas as pd 

from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten, BatchNormalization, Activation, Dropout, DepthwiseConv2D
from keras.layers.pooling import GlobalAveragePooling2D, MaxPooling2D
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools
from keras.datasets import mnist
from keras.utils import np_utils


import os
print(os.listdir("../input"))

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


['train.csv', 'sample_submission.csv', 'test.csv']


Loading the data

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = np.vstack((X_train, X_test))
y_train = np.concatenate([y_train, y_test])
X_train = X_train.reshape(-1, 28, 28, 1)
print(X_train.shape, y_train.shape)

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
(70000, 28, 28, 1) (70000,)


In [3]:
train = pd.read_csv('../input/train.csv').values
y_val = train[:,0].astype('int32')
X_val = train[:,1:].astype('float32')
X_val = X_val.reshape(-1,28,28,1)
print(X_val.shape, y_val.shape)

(42000, 28, 28, 1) (42000,)


Split the data into images and labels

In [4]:
X_test = pd.read_csv('../input/test.csv').values.astype('float32')
X_test = X_test.reshape(-1, 28, 28, 1)

Now we normalize the training and test data

In [5]:
X_train = X_train.astype('float32') / 255
X_val = X_val.astype('float32') / 255
X_test = X_test.astype('float32') / 255 

Now we encode the labels to one hot encoded vectors

In [6]:
y_train = np_utils.to_categorical(y_train, 10)
y_val = np_utils.to_categorical(y_val, 10)

set the random seed

In [7]:
random_seed = 2

Now we create the model

In [8]:
def create_model():
    
    model = Sequential()
    model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu', input_shape = (28,28,1)))
    model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu'))
    model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
    model.add(MaxPooling2D(pool_size = 2))
    
    model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
    model.add(Conv2D(filters = 192, kernel_size = 3, padding = 'same', activation = 'relu'))
    model.add(MaxPooling2D(pool_size = 2))
    
    model.add(Conv2D(filters = 192, kernel_size = 5, padding = 'same', activation = 'relu'))
    model.add(MaxPooling2D(pool_size = 2, padding = 'same'))
    
    model.add(Flatten())
    
    model.add(Dense(256, activation = 'relu'))
    model.add(Dense(10, activation = 'softmax'))
    
    
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    
    return model

In [9]:
model = create_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 64)        640       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 64)        36928     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 28, 28, 128)       73856     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 14, 14, 128)       147584    
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 14, 14, 192)       221376    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 192)         0         
__________

Now we define the callbacks we'll use while training

In [10]:
reduce_learning_rate = ReduceLROnPlateau(monitor = 'val_acc', patience = 3, verbose = 1, factor = 0.3, min_lr = 0.00001)
checkpoint = ModelCheckpoint('mnist_weights.h5', monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')
early_stopping = EarlyStopping(monitor = 'val_loss', min_delta = 1e-10, patience = 10, verbose = 1, restore_best_weights = True)

callbacks = [reduce_learning_rate, checkpoint, early_stopping]

Finally we train the model

In [11]:
history = model.fit(X_train, 
                    y_train, 
                    batch_size = 100, 
                    epochs = 50,
                    validation_data = (X_val, y_val),  
                    callbacks = callbacks,
                    verbose = 1, 
                    shuffle = True)

Train on 70000 samples, validate on 42000 samples
Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.98695, saving model to mnist_weights.h5
Epoch 2/50

Epoch 00002: val_acc improved from 0.98695 to 0.99286, saving model to mnist_weights.h5
Epoch 3/50

Epoch 00003: val_acc improved from 0.99286 to 0.99540, saving model to mnist_weights.h5
Epoch 4/50

Epoch 00004: val_acc improved from 0.99540 to 0.99745, saving model to mnist_weights.h5
Epoch 5/50

Epoch 00005: val_acc did not improve from 0.99745
Epoch 6/50

Epoch 00006: val_acc improved from 0.99745 to 0.99755, saving model to mnist_weights.h5
Epoch 7/50

Epoch 00007: val_acc improved from 0.99755 to 0.99840, saving model to mnist_weights.h5
Epoch 8/50

Epoch 00008: val_acc did not improve from 0.99840
Epoch 9/50

Epoch 00009: val_acc did not improve from 0.99840
Epoch 10/50

Epoch 00010: val_acc improved from 0.99840 to 0.99888, saving model to mnist_weights.h5
Epoch 11/50

Epoch 00011: val_acc did not improve from 0.99888
Epo

Now we predict the results 

In [12]:
test_labels = model.predict_classes(X_test, verbose = 2)

In [13]:
sub = pd.read_csv('../input/sample_submission.csv')
sub['Label'] = test_labels
sub.to_csv('submission.csv',index = False)