### Imports

In [24]:
#%matplotlib inline
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
#import seaborn as sns
import warnings
import tensorflow as tf

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, Activation, Dense, Input, Dropout, BatchNormalization
from tensorflow.keras.layers import Flatten, MaxPooling2D, GlobalAveragePooling2D, LeakyReLU
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
warnings.filterwarnings(action='ignore')

# increase width of jupyter notebook
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [9]:
train_df = pd.read_csv('train.csv')
test_df  = pd.read_csv('test.csv')

train_label = train_df['label']
train_label = to_categorical(train_label) # Converts a class vector (integers) to binary class matrix
train_data  = (train_df.iloc[:, 1:].values.astype('float32') / 255.0 ).reshape(-1, 28, 28, 1)
test_data   = (test_df.iloc[:,:].values.astype('float32') / 255.0 ).reshape(-1, 28, 28, 1)
# reshaping the data, images are 28 x 28 pixels

### visualize distribution of labels

In [11]:
count = pd.value_counts(train_df['label'].values)
count.plot.bar(figsize = (10,6))

### Build CNN

In [27]:
optim = RMSprop(learning_rate=0.05,
                rho=0.9,
                momentum=0.1,
                epsilon=1e-07,
                centered=True,
                name='RMSprop')
nets  = 15
model = [0] * nets

In [28]:
# https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D
# https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization
for i in range(nets):
    model[i] = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(64, (3,3), padding='same', input_shape=(28, 28, 1)),
        tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
        tf.keras.layers.LeakyReLU(alpha=0.1),
        
        tf.keras.layers.Conv2D(64,  (3,3), padding='same'),
        tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
        tf.keras.layers.LeakyReLU(alpha=0.1),
        
        tf.keras.layers.Conv2D(64,  (3,3), padding='same'),
        tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
        tf.keras.layers.LeakyReLU(alpha=0.1),

        tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Dropout(0.25),

        tf.keras.layers.Conv2D(128, (3,3), padding='same'),
        tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
        tf.keras.layers.LeakyReLU(alpha=0.1),
        
        tf.keras.layers.Conv2D(128, (3,3), padding='same'),
        tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
        tf.keras.layers.LeakyReLU(alpha=0.1),
        
        tf.keras.layers.Conv2D(128, (3,3), padding='same'),
        tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
        tf.keras.layers.LeakyReLU(alpha=0.1),

        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Dropout(0.25),    

        tf.keras.layers.Conv2D(256, (3,3), padding='same'),
        tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
        tf.keras.layers.LeakyReLU(alpha=0.1),
        
        tf.keras.layers.Conv2D(256, (3,3), padding='same'),
        tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
        tf.keras.layers.LeakyReLU(alpha=0.1),

        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Dropout(0.25),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256),
        tf.keras.layers.LeakyReLU(alpha=0.1),

        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    model[i].compile(loss = 'categorical_crossentropy',
                    optimizer = optim,
                    metrics = ['accuracy'])

In [None]:
datagen = ImageDataGenerator(
        rotation_range=10,  
        zoom_range = 0.15,  
        width_shift_range=0.1, 
        height_shift_range=0.1)
history = [0] * nets
epochs = 10

for i in range(nets):
    X_train2, X_val2, Y_train2, Y_val2 = train_test_split(train_data, train_label, test_size = 0.1, stratify = train_label)
    
    history[i] = model[i].fit_generator(datagen.flow(X_train2,Y_train2, batch_size=64),
        epochs = epochs, steps_per_epoch = X_train2.shape[0]//64,  
        validation_data = (X_val2,Y_val2), verbose=0)
    print('CNN {0:d}:  Epochs = {1:d}, Train acc = {2:.5f}, Validation acc = {3:.5f}'.format(
        i+1,epochs,max(history[i].history['accuracy']),max(history[i].history['val_accuracy']) ))

### Visualization

### Save predictions to csv

In [None]:
results = np.zeros( (test_data.shape[0], 10) ) 
for j in range(nets):
    results = results + model[j].predict(test_data)
results = np.argmax(results,axis = 1)
results = pd.Series(results,name = 'Label')
submission = pd.concat([pd.Series(range(1, 28001),name = 'ImageId'), results],axis = 1)
submission.to_csv('submission_cnn.csv', index = False)