## 1. Importing dataset and data preprocessing

### 1.1. Import of essential libraries

In [3]:
import pandas as pd
import numpy as np
import gc
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
import tensorflowjs as tfjs

import warnings
warnings.filterwarnings('ignore')

# fix random seed for reproducibility
np.random.seed(2)

### 1.2. Load data

In [6]:
train = pd.read_csv("./input/train.csv")

# Labels
Y_train = train["label"]
# Features
X_train = train.drop(labels = ["label"],axis = 1) 

### 1.3 Data preprocessing

In [7]:
# Normalization
X_train = X_train / 255.0

# Reshape
X_train = X_train.values.reshape(-1,28,28,1)

# Lable encoding
Y_train = to_categorical(Y_train, num_classes = 10)

# Split into training and valdiation sets
X_train, X_val, Y_train, Y_val = train_test_split(
    X_train, 
    Y_train, 
    test_size = 0.1, 
    random_state=2
)

## 2. CNN

### 2.1. Define Keras model architecture

In [8]:
# Create model using Keras Sequential API
model = Sequential()


# CONVOLUTIONAL/MAXPOOL LAYERS
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 

# Convolutional 2D layer #1
# Computes 32 features using a 5x5 filter with ReLU activation.
model.add(Conv2D(
    filters = 32, 
    kernel_size = (5,5),
    padding = 'Same', 
    activation ='relu',
    input_shape = (28,28,1)
))

# Convolutional 2D layer #2
# Computes 32 features using a 5x5 filter with ReLU activation.
model.add(Conv2D(
    filters = 32, 
    kernel_size = (5,5),
    padding = 'Same', 
    activation ='relu'
))

# Pooling layer #1
# Max pooling layer with a 2x2 filter
model.add(MaxPool2D(pool_size=(2,2)))

# Dopout operation; 0.75 probability that element will be kept
model.add(Dropout(0.25))

# Convolutional 2D layer #3
# Computes 64 features using a 3x3 filter with ReLU activation.
model.add(Conv2D(
    filters = 64, 
    kernel_size = (3,3),
    padding = 'Same', 
    activation ='relu'
))

# Convolutional 2D layer #4
# Computes 64 features using a 3x3 filter with ReLU activation.
model.add(Conv2D(
    filters = 64, 
    kernel_size = (3,3),
    padding = 'Same', 
    activation ='relu'
))

# Pooling layer #1
# Max pooling layer with a 2x2 filter and stride of 2
model.add(MaxPool2D(
    pool_size=(2,2), 
    strides=(2,2)
))

# Dopout operation; 0.75 probability that element will be kept
model.add(Dropout(0.25))

# Flatten layer 
# Convert Tensor into a one single 1D vector
model.add(Flatten())

 
# FULLY CONNECTED LAYERS
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 

# Danse layer #1
# Densely connected layer with 256 units
model.add(Dense(
    256, 
    activation = "relu"
))

# Dopout operation; 0.5 probability that element will be kept
model.add(Dropout(0.5))

# Danse layer #2
# Densely connected layer with 10 units
model.add(Dense(
    10, 
    activation = "softmax"
))

### 2.2. Settings

In [11]:
epochs = 30
batch_size = 128

### 2.3. Set the optimizer and annealer

In [9]:
# Optimizer function will iteratively improve parameters
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

In [10]:
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

In [13]:
# Learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(
    monitor='val_acc', 
    patience=3, 
    verbose=1, 
    factor=0.5, 
    min_lr=0.00001
)

### 2.4. Data augmentation

In [14]:
datagen = ImageDataGenerator(
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    rotation_range=10,
    zoom_range = 0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=False,
    vertical_flip=False
)

datagen.fit(X_train)

In [14]:
history = model.fit_generator(
    datagen.flow(X_train,Y_train, batch_size=batch_size),
    epochs = epochs, 
    validation_data = (X_val,Y_val),
    verbose = 2, 
    steps_per_epoch=X_train.shape[0] // batch_size, 
    callbacks=[learning_rate_reduction]
)

Epoch 1/30
 - 127s - loss: 0.5052 - acc: 0.8365 - val_loss: 0.0849 - val_acc: 0.9729
Epoch 2/30
 - 127s - loss: 0.1512 - acc: 0.9549 - val_loss: 0.0473 - val_acc: 0.9864
Epoch 3/30
 - 127s - loss: 0.1129 - acc: 0.9660 - val_loss: 0.0409 - val_acc: 0.9864
Epoch 4/30
 - 127s - loss: 0.0913 - acc: 0.9722 - val_loss: 0.0401 - val_acc: 0.9898
Epoch 5/30
 - 127s - loss: 0.0807 - acc: 0.9759 - val_loss: 0.0282 - val_acc: 0.9907
Epoch 6/30
 - 127s - loss: 0.0740 - acc: 0.9783 - val_loss: 0.0370 - val_acc: 0.9895
Epoch 7/30
 - 127s - loss: 0.0728 - acc: 0.9794 - val_loss: 0.0337 - val_acc: 0.9890
Epoch 8/30
 - 127s - loss: 0.0671 - acc: 0.9804 - val_loss: 0.0554 - val_acc: 0.9826
Epoch 9/30
 - 127s - loss: 0.0651 - acc: 0.9816 - val_loss: 0.0419 - val_acc: 0.9917
Epoch 10/30
 - 127s - loss: 0.0647 - acc: 0.9815 - val_loss: 0.0250 - val_acc: 0.9929
Epoch 11/30
 - 127s - loss: 0.0714 - acc: 0.9800 - val_loss: 0.0227 - val_acc: 0.9921
Epoch 12/30
 - 127s - loss: 0.0680 - acc: 0.9821 - val_loss: 0.

### 2.5. Saving model

In [15]:
# Save Python model
model.save('ModelPY/model.h5')
# Save JS model
tfjs.converters.save_keras_model(model, './ModelJS')
print("Saved model to disk")

Saved model to disk


## 3. More of useful information

[Introduction to CNN Keras - 0.997 (top 6%) by Yassine Ghouzam][1]

[1]: https://www.kaggle.com/yassineghouzam/introduction-to-cnn-keras-0-997-top-6/code