In [0]:
# Data Visualization
import numpy as np
import matplotlib.pyplot as plt

# Import Deep Learning librairies
import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Sampling
from sklearn.model_selection import train_test_split

In [0]:
# Data importation
train = pd.read_csv('train.csv').drop('label',axis =1)
label = pd.read_csv('train.csv')['label']
test = pd.read_csv('test.csv')

# Normalization
train = train/255
test = test/255

# Reshape to create images
train = train.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)

# Split the train and the validation set for the fitting
X_train, X_val, Y_train, Y_val = train_test_split(train, label, test_size = 0.2, random_state=2)

After multiple executions, I noticed that best results are for epochs of 8, 15 and 30. For practical reasons, I choose to work with 8 epochs.

In [0]:
epochs = 8 
batch_size = 100

Since I have implemented (apart) many times CNNs with no augmentation, it appears to be less efficient. 
We have an accuracy lower by ~0,02% in general and on Kaggle. So, the optimization of hyperparameters is realized with Data Augmentation.

In [0]:
# Manual data augmentation
datagen = ImageDataGenerator(rotation_range=10,
                            zoom_range=0.2,
                            width_shift_range=0.2,
                              height_shift_range=0.2)
datagen.fit(X_train)

Let's go for a grid search on number of filters and number of nodes of the hidden layer for the CNN architecture I have chosen to work on.

The method for the Grid Search is clear:

* I take n filters for the 
two first Conv2D, m filters for the two following Conv2D and p nodes for the hidden layer post-Flatten. 
* We look for the best tradeoff between n/m/p for (n,m) in (32,64) and p in (128,256) as I know that p = 64 is not very efficient and (n,m) taking the value 128 doesn't improve automatically the cross validation accuracy and take too much time.

In [0]:
#SearchGrid

first_part = [32,64]
second_part = [32,64]
third_part = [128,256]
RESULT = []

#Define CNN:
#[Conv2D->relu]*2 with n filters -> MaxPool2D (2,2) -> Dropout -> 
#[Conv2D->relu]*2 with m filters -> MaxPool2D (2,2) -> Dropout ->
#Flatten -> Dense with p nodes -> Dropout -> Out

def CNN(n,m,p):

  model = Sequential()

  model.add(Conv2D(filters = n, kernel_size = (5,5),padding = 'Same', 
                  activation ='relu', input_shape = (28,28,1)))
  model.add(Conv2D(filters = n, kernel_size = (5,5),padding = 'Same', 
                  activation ='relu'))
  model.add(MaxPool2D(pool_size=(2,2)))
  model.add(Dropout(0.25)) #to avoid overfitting


  model.add(Conv2D(filters = m, kernel_size = (3,3),padding = 'Same', 
                  activation ='relu'))
  model.add(Conv2D(filters = m, kernel_size = (3,3),padding = 'Same', 
                  activation ='relu'))
  model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
  model.add(Dropout(0.5)) #to avoid overfitting


  model.add(Flatten())
  model.add(Dense(p, activation = "relu"))
  model.add(Dropout(0.7)) #to avoid overfitting

  model.add(Dense(10, activation = "softmax")) #10 possible outputs because it is a 10-class problem

  # Compile the model
  model.compile(optimizer = 'Adam' , loss = "sparse_categorical_crossentropy", metrics=["accuracy"])

  validation_acc = []

  for e in range(epochs):

      print('Epoch', e)
      batches = 0

      for x_batch, y_batch in datagen.flow(X_train, Y_train, batch_size=batch_size):

          x_train, x_val, y_train, y_val = train_test_split(x_batch, y_batch, test_size = 0.2, random_state=random_seed)
          history_CNN_adam_da = model.fit(X_train, Y_train, validation_data = (X_val, Y_val))
          validation_acc.append(history_CNN_adam_da.history['val_acc'])
          batches += 1

          if batches >= 1:
              # we need to break the loop by hand because
              # the generator loops indefinitely
              break
  return validation_acc[-1]

for f in first_part:
  for s in second_part:
    for t in third_part:
      result = CNN(f,s,t)
      print( "For the hyparameters: " + str(f) + '/' + str(s) + '/' + str(t) + ', we have: ' + str( result ) )
      RESULT.append("For the hyparameters: " + str(f) + '/' + str(s) + '/' + str(t) + ', we have: ' + str( result ))





Epoch 0
Train on 33600 samples, validate on 8400 samples
Epoch 1
Train on 33600 samples, validate on 8400 samples
Epoch 2
Train on 33600 samples, validate on 8400 samples
Epoch 3
Train on 33600 samples, validate on 8400 samples
Epoch 4
Train on 33600 samples, validate on 8400 samples
Epoch 5
Train on 33600 samples, validate on 8400 samples
Epoch 6
Train on 33600 samples, validate on 8400 samples
Epoch 7
Train on 33600 samples, validate on 8400 samples
For the hyparameters: 32/32/128, we have: [0.98833334]




Epoch 0
Train on 33600 samples, validate on 8400 samples
Epoch 1
Train on 33600 samples, validate on 8400 samples
Epoch 2
Train on 33600 samples, validate on 8400 samples
Epoch 3
Train on 33600 samples, validate on 8400 samples
Epoch 4
Train on 33600 samples, validate on 8400 samples
Epoch 5
Train on 33600 samples, validate on 8400 samples
Epoch 6
Train on 33600 samples, validate on 8400 samples
Epoch 7
Train on 33600 samples, validate on 8400 samples
For the hyparameters: 32/32/256, we have: [0.98964286]




Epoch 0
Train on 33600 samples, validate on 8400 samples
Epoch 1
Train on 33600 samples, validate on 8400 samples
Epoch 2
Train on 33600 samples, validate on 8400 samples
Epoch 3
Train on 33600 samples, validate on 8400 samples
Epoch 4
Train on 33600 samples, validate on 8400 samples
Epoch 5
Train on 33600 samples, validate on 8400 samples
Epoch 6
Train on 33600 samples, validate on 8400 samples
Epoch 7
Train on 33600 samples, validate on 8400 samples
For the hyparameters: 32/64/128, we have: [0.98892856]




Epoch 0
Train on 33600 samples, validate on 8400 samples
Epoch 1
Train on 33600 samples, validate on 8400 samples
Epoch 2
Train on 33600 samples, validate on 8400 samples
Epoch 3
Train on 33600 samples, validate on 8400 samples
Epoch 4
Train on 33600 samples, validate on 8400 samples
Epoch 5
Train on 33600 samples, validate on 8400 samples
Epoch 6
Train on 33600 samples, validate on 8400 samples
Epoch 7
Train on 33600 samples, validate on 8400 samples
For the hyparameters: 32/64/256, we have: [0.9897619]
Epoch 0
Train on 33600 samples, validate on 8400 samples
Epoch 1
Train on 33600 samples, validate on 8400 samples
Epoch 2
Train on 33600 samples, validate on 8400 samples
Epoch 3
Train on 33600 samples, validate on 8400 samples
Epoch 4
Train on 33600 samples, validate on 8400 samples
Epoch 5
Train on 33600 samples, validate on 8400 samples
Epoch 6
Train on 33600 samples, validate on 8400 samples
Epoch 7
Train on 33600 samples, validate on 8400 samples
For the hyparameters: 64/32/128, w

In [0]:
for RES in RESULT:
  print(RES)

For the hyparameters: 32/32/128, we have: [0.98833334]
For the hyparameters: 32/32/256, we have: [0.98964286]
For the hyparameters: 32/64/128, we have: [0.98892856]
For the hyparameters: 32/64/256, we have: [0.9897619]
For the hyparameters: 64/32/128, we have: [0.9888095]
For the hyparameters: 64/32/256, we have: [0.98904765]
For the hyparameters: 64/64/128, we have: [0.9909524]
For the hyparameters: 64/64/256, we have: [0.9907143]
