In [1]:
import tensorflow as tf
import keras
from keras.layers import Dense, Input, Conv2D, BatchNormalization, MaxPool2D, Flatten, Dropout, SeparableConv2D
from keras.models import Model, Sequential
from keras.utils import to_categorical, multi_gpu_model
from keras.callbacks import LearningRateScheduler
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
import time

Using TensorFlow backend.


In [2]:
train = pd.read_csv('train.csv')
train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
Y_train = to_categorical(train.label.to_numpy().reshape((42000,1)), 10)
X_train = train.drop('label', axis=1).to_numpy().reshape((42000,28,28,1))/255

In [4]:
def buildModel(num_models):
    
    model = [0] * num_models
    
    for i in range(num_models):
        print(i)
        inputs = Input(shape=(28,28,1))

        X = SeparableConv2D(32, kernel_size=3, padding='same', activation='relu')(inputs)
        X = BatchNormalization()(X)
        X = SeparableConv2D(32, kernel_size=3, padding='same', activation='relu')(X)
        X = BatchNormalization()(X)
        X = SeparableConv2D(32, kernel_size=5, strides=2, padding='same', activation='relu')(X)
        X = BatchNormalization()(X)
        X = Dropout(0.4)(X)

        X = SeparableConv2D(64, kernel_size=3, padding='same', activation='relu')(X)
        X = BatchNormalization()(X)
        X = SeparableConv2D(64, kernel_size=3, padding='same', activation='relu')(X)
        X = BatchNormalization()(X)
        X = SeparableConv2D(64, kernel_size=5, strides=2, padding='same', activation='relu')(X)
        X = BatchNormalization()(X)
        X = Dropout(0.4)(X)

        X = SeparableConv2D(128, kernel_size=3, activation='relu')(X)
        X = BatchNormalization()(X)
        X = SeparableConv2D(128, kernel_size=3, activation='relu')(X)
        X = BatchNormalization()(X)
        X = SeparableConv2D(128, kernel_size=3, activation='relu')(X)
        X = BatchNormalization()(X)
        X = SeparableConv2D(256, kernel_size=1, activation='relu')(X)
        X = BatchNormalization()(X)
        X = SeparableConv2D(256, kernel_size=1, activation='relu')(X)
        X = BatchNormalization()(X)
        X = Flatten()(X)
        X = Dropout(0.4)(X)

        outputs = Dense(10, activation='softmax')(X)

        model[i] = multi_gpu_model(Model(inputs=inputs, outputs=outputs), gpus=2)
        model[i].compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model
        

In [5]:
start_time = time.time()
model = buildModel(15)
stop_time = time.time()
print("Building model took {0:.5f} seconds".format(stop_time-start_time))
model[0].summary()

0


W0221 19:36:39.180123 140128527906624 deprecation.py:506] From /home/jason/anaconda3/envs/tensorflow_env/lib/python3.7/site-packages/tensorflow/python/training/moving_averages.py:210: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0221 19:36:39.364416 140128527906624 deprecation.py:506] From /home/jason/anaconda3/envs/tensorflow_env/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


1
2
3
4
5
6
7
8
9
10
11
12
13
14
Building model took 56.97129 seconds
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 28, 28, 1)    0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 28, 28, 1)    0           input_1[0][0]                    
__________________________________________________________________________________________________
model_1 (Model)                 (None, 10)           166899      lambda_1[0][0]                   
                                       

In [6]:
#Generate image variations
datagen = ImageDataGenerator(rotation_range=10, zoom_range=0.10, width_shift_range=0.1, height_shift_range=0.1)

In [7]:
# DECREASE LEARNING RATE EACH EPOCH
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)
# TRAIN NETWORKS
history = [0] * 15
epochs = 45

start_time_train = time.time()
for i in range(15):
    X_train2, X_val2, Y_train2, Y_val2 = train_test_split(X_train, Y_train, test_size=0.1)
    history[i] = model[i].fit_generator(datagen.flow(X_train2, Y_train2, batch_size=128), epochs=epochs, steps_per_epoch = X_train2.shape[0]//128, validation_data=(X_val2, Y_val2), callbacks=[annealer], verbose=0)
    print("CNN {0:d}: Epochs = {1:d}, Train accuracy = {2:.5f}, Validation accuracy = {3:.5f}".format(i+1, epochs, max(history[i].history['acc']), max(history[i].history['val_acc'])))
stop_time_train = time.time()
print("Total training time: {0:.5f}".format(stop_time_train-start_time_train))

CNN 1: Epochs = 45, Train accuracy = 0.99198, Validation accuracy = 0.99452
CNN 2: Epochs = 45, Train accuracy = 0.99177, Validation accuracy = 0.99690
CNN 3: Epochs = 45, Train accuracy = 0.99241, Validation accuracy = 0.99548
CNN 4: Epochs = 45, Train accuracy = 0.99180, Validation accuracy = 0.99476
CNN 5: Epochs = 45, Train accuracy = 0.99164, Validation accuracy = 0.99429
CNN 6: Epochs = 45, Train accuracy = 0.99143, Validation accuracy = 0.99500
CNN 7: Epochs = 45, Train accuracy = 0.99249, Validation accuracy = 0.99714
CNN 8: Epochs = 45, Train accuracy = 0.99220, Validation accuracy = 0.99405
CNN 9: Epochs = 45, Train accuracy = 0.99151, Validation accuracy = 0.99548
CNN 10: Epochs = 45, Train accuracy = 0.99220, Validation accuracy = 0.99667
CNN 11: Epochs = 45, Train accuracy = 0.99230, Validation accuracy = 0.99476
CNN 12: Epochs = 45, Train accuracy = 0.99193, Validation accuracy = 0.99452
CNN 13: Epochs = 45, Train accuracy = 0.99161, Validation accuracy = 0.99548
CNN 14: 

In [8]:
X_test = pd.read_csv('test.csv').to_numpy().reshape((28000,28,28,1))/255

In [9]:
results = np.zeros( (X_test.shape[0],10) )
start_time_prediction = time.time()
for i in range(15):
    results = results + model[i].predict(X_test, verbose=1, batch_size=128)
stop_time_prediction = time.time()
print("Total prediction time: {0:.5f}".format(stop_time_prediction-start_time_prediction))
results = np.argmax(results,axis = 1)
results = pd.Series(results,name="Label")
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)
submission.to_csv("MNIST-CNN-ENSEMBLE-SEPCONV.csv",index=False)

Total prediction time: 64.59167


In [17]:
for i in range(15):
    model[i].save_weights('model_ensemble' + str(i+1) + '-convfc.h5')