In [35]:
import tensorflow as tf
import keras
from keras.layers import Dense, Input, Conv2D, BatchNormalization, MaxPool2D, Flatten, Dropout
from keras.models import Model, Sequential
from keras.utils import to_categorical, multi_gpu_model
from keras.callbacks import LearningRateScheduler
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split

In [2]:
train = pd.read_csv('train.csv')
train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
Y_train = to_categorical(train.label.to_numpy().reshape((42000,1)), 10)
X_train = train.drop('label', axis=1).to_numpy().reshape((42000,28,28,1))

In [33]:
def buildModel(num_models):
    model = [0] *num_models
    for j in range(num_models):
        model[j] = Sequential()

        model[j].add(Conv2D(32, kernel_size = 3, padding='same', activation='relu', input_shape = (28, 28, 1)))
        model[j].add(BatchNormalization())
        model[j].add(Conv2D(32, kernel_size = 3, padding='same', activation='relu'))
        model[j].add(BatchNormalization())
        model[j].add(Conv2D(32, kernel_size = 5, strides=2, padding='same', activation='relu'))
        model[j].add(BatchNormalization())
        model[j].add(Dropout(0.4))

        model[j].add(Conv2D(32, kernel_size = 3, padding='same', activation='relu'))
        model[j].add(BatchNormalization())
        model[j].add(Conv2D(32, kernel_size = 3, padding='same', activation='relu'))
        model[j].add(BatchNormalization())
        model[j].add(Conv2D(64, kernel_size = 5, strides=2, padding='same', activation='relu'))
        model[j].add(BatchNormalization())
        model[j].add(Dropout(0.4))

        model[j].add(Conv2D(128, kernel_size = 4, activation='relu'))
        model[j].add(BatchNormalization())
        model[j].add(Flatten())
        model[j].add(Dropout(0.4))
        model[j].add(Dense(128, activation='relu', kernel_regularizer='l2'))
        model[j].add(Dropout(0.4))
        model[j].add(Dense(128, activation='relu', kernel_regularizer='l2'))
        model[j].add(Dropout(0.4))
        model[j].add(Dense(10, activation='softmax'))
        
        # CREATE MULTI-GPU MODEL (2 GPUS)
        model[j] = multi_gpu_model(model[j], gpus=2)
        # COMPILE WITH ADAM OPTIMIZER AND CROSS ENTROPY COST
        model[j].compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    return model
        

In [34]:
model = buildModel(15)
model[0].summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
conv2d_533_input (InputLayer)   (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
lambda_91 (Lambda)              (None, 28, 28, 1)    0           conv2d_533_input[0][0]           
__________________________________________________________________________________________________
lambda_92 (Lambda)              (None, 28, 28, 1)    0           conv2d_533_input[0][0]           
__________________________________________________________________________________________________
sequential_61 (Sequential)      (None, 10)           517642      lambda_91[0][0]                  
                                                                 lambda_92[0][0]                  
__________

In [36]:
#Generate image variations
datagen = ImageDataGenerator(rotation_range=10, zoom_range=0.10, width_shift_range=0.1, height_shift_range=0.1)

In [None]:
# DECREASE LEARNING RATE EACH EPOCH
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)
# TRAIN NETWORKS
history = [0] * 15
epochs = 45

for i in range(15):
    X_train2, X_val2, Y_train2, Y_val2 = train_test_split(X_train, Y_train, test_size=0.1)
    history[i] = model[i].fit_generator(datagen.flow(X_train2, Y_train2, batch_size=128), epochs=epochs, steps_per_epoch = X_train2.shape[0]//128, validation_data=(X_val2, Y_val2), callbacks=[annealer], verbose=0)
    print("CNN {0:d}: Epochs = {1:d}, Train accuracy = {2:.5f}, Validation accuracy = {3:.5f}".format(i+1, epochs, max(history[i].history['acc']), max(history[i].history['val_acc'])))

CNN 1: Epochs = 45, Train accuracy = 0.99743, Validation accuracy = 0.99786
CNN 2: Epochs = 45, Train accuracy = 0.99451, Validation accuracy = 0.99738
CNN 3: Epochs = 45, Train accuracy = 0.99488, Validation accuracy = 0.99595
CNN 4: Epochs = 45, Train accuracy = 0.99480, Validation accuracy = 0.99595
CNN 5: Epochs = 45, Train accuracy = 0.99506, Validation accuracy = 0.99643
CNN 6: Epochs = 45, Train accuracy = 0.99514, Validation accuracy = 0.99548
