In [None]:
%matplotlib inline

import pandas as pd
import numpy as np

from collections import Counter
from sklearn.model_selection import train_test_split

from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.models import model_from_json

import warnings
warnings.filterwarnings("ignore")

# My seed

seed = 42

In [None]:
df_train = pd.read_csv('../input/train.csv')

### Splitting the dataset

In [None]:
X_train = df_train.drop(['label'], axis=1)
y_train = df_train['label']

# Free memory space

del df_train

print('Shape of X_train:', X_train.shape)
print('Shape of y_train:', y_train.shape)

### Normalizing the values of training and test

In [None]:
X_train = X_train / 255

### Reshape the images in 3 dimensions to use with Keras

In [None]:
X_train = X_train.values.reshape(-1,28,28,1) # (height = 28px, width = 28px , canal = 1)
print('Shape of X_train:', X_train.shape)

### Converting y values (labels) to categorical values

In [None]:
# One Hot Categories

y_train = to_categorical(y_train, num_classes = 10)
y_train

### Function to create neural networks to be evalueted

In [None]:
def baseline_model(layers = 1, 
                   filter_l1 = 32, 
                   filter_l2 = 64, 
                   filter_l3 = 128,
                   kernel_size_l1 = 5,
                   kernel_size_l2 = 3,
                   kernel_size_l3 = 3,
                   pool_size_l1 = 2,
                   pool_size_l2 = 2,
                   activation_l1 = 'relu',
                   activation_l2 = 'relu',
                   activation_l3 = 'relu',
                   optimizer = 'Adamax',
                   dense = 256, 
                   dropout_l1 = 0.25, 
                   dropout_l2 = 0.25, 
                   dropout_l3 = 0.4, 
                   batchNormalization = True):
                          
    # Create baseline
    
    baseline = Sequential()

    # First group
    #---------------------------------------------------------------------------------------------------
        
    baseline.add(Conv2D(filters = filter_l1, 
                        kernel_size = (kernel_size_l1, kernel_size_l1), 
                        padding = 'Same', 
                        activation = activation_l1,
                        input_shape = (28, 28, 1)))
    
    if batchNormalization:
      baseline.add(BatchNormalization())
      
    if (layers >= 2):
      for i in range(layers-1):
        baseline.add(Conv2D(filters = filter_l1, 
                            kernel_size = (kernel_size_l1, kernel_size_l1), 
                            padding = 'Same', 
                            activation = activation_l1))
        
        if batchNormalization:
          baseline.add(BatchNormalization())
      
    baseline.add(MaxPool2D(pool_size=(pool_size_l1, pool_size_l1)))
    baseline.add(Dropout(dropout_l1))
    
    # Second group
    #---------------------------------------------------------------------------------------------------
    
    baseline.add(Conv2D(filters = filter_l2, 
                        kernel_size = (kernel_size_l2, kernel_size_l2), 
                        padding = 'Same', 
                        activation = activation_l2))
    
    if batchNormalization:
      baseline.add(BatchNormalization())
      
    if (layers >= 2):
      for i in range(layers-1):
        baseline.add(Conv2D(filters = filter_l2, 
                            kernel_size = (kernel_size_l2, kernel_size_l2),
                            padding = 'Same', 
                            activation = activation_l2))
        
        if batchNormalization:
          baseline.add(BatchNormalization())
    
    baseline.add(MaxPool2D(pool_size=(pool_size_l2, pool_size_l2), strides=(pool_size_l2, pool_size_l2)))
    baseline.add(Dropout(dropout_l2))
    
    # Third group
    #---------------------------------------------------------------------------------------------------
    
    baseline.add(Conv2D(filters = filter_l3, 
                        kernel_size = (kernel_size_l3, kernel_size_l3), 
                        padding = 'Same', 
                        activation = activation_l3))
    
    if batchNormalization:
      baseline.add(BatchNormalization())
      
    baseline.add(Flatten())
    baseline.add(Dense(dense, activation = "relu"))
    baseline.add(Dropout(dropout_l3))
    
    baseline.add(Dense(10, activation = "softmax"))
    
    # Compile the baseline including the optimizer and evaluating the performance of the baseline by accuracy
    
    baseline.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])
    
    return baseline

### Testing the models

In [None]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

In [None]:
epochs = 1
batch_size = 90

In [None]:
X_train_aux, X_test_aux, y_train_aux, y_test_aux = train_test_split(X_train, y_train, test_size = 0.1)

In [None]:
################################################################################
# Test
################################################################################
             
model = KerasClassifier(build_fn=baseline_model, epochs=epochs, batch_size=batch_size, verbose=1)

# Define the grid search parameters

# First group to be evaluated

layers = [1, 2, 3]
filter_l1 = [16, 32, 64]
filter_l2 = [32, 64, 128]
filter_l3 = [64, 128, 256]

# Default parameters from baseline

kernel_size_l1 = [5]
kernel_size_l2 = [3]
kernel_size_l3 = [3]
activation_l1 = ['relu']
activation_l2 = ['relu']
activation_l3 = ['relu']
pool_size_l1 = [2]
pool_size_l2 = [2]
optimizer = ['Adamax']
dense = [256]
dropout_l1 = [0.25]
dropout_l2 = [0.25]
dropout_l3 = [0.4]
batchNormalization = [True]

# First test

param_grid = dict(layers=layers,
                 filter_l1=filter_l1,
                 filter_l2=filter_l2,
                 filter_l3=filter_l3,
                 kernel_size_l1=kernel_size_l1,
                 kernel_size_l2=kernel_size_l2,
                 kernel_size_l3=kernel_size_l3,
                 pool_size_l1=pool_size_l1,
                 pool_size_l2=pool_size_l2,
                 activation_l1=activation_l1,
                 activation_l2=activation_l2,
                 activation_l3=activation_l3,
                 optimizer=optimizer,
                 dense=dense,
                 dropout_l1=dropout_l1,
                 dropout_l2=dropout_l2,
                 dropout_l3=dropout_l3,
                 batchNormalization=batchNormalization)

print('Parameters: ', param_grid)

In [None]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(X_train, y_train)

In [None]:
# Summarize results

print('-----------------------------------------------------------------------')
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print('-----------------------------------------------------------------------')

In [None]:
# Getting the best parameters from previous evaluation

layers = [grid_result.best_params_['layers']]
filter_l1 = [grid_result.best_params_['filter_l1']]
filter_l2 = [grid_result.best_params_['filter_l2']]
filter_l3 = [grid_result.best_params_['filter_l3']]

In [None]:
# Define the grid search parameters

# Second group to be evaluated

kernel_size_l1 = [3, 4, 5]
kernel_size_l2 = [3, 4, 5]
kernel_size_l3 = [3, 4, 5]
activation_l1 = ['relu', 'sigmoid']
activation_l2 = ['relu', 'sigmoid']
activation_l3 = ['relu', 'sigmoid']

# Default parameters from baseline

pool_size_l1 = [2]
pool_size_l2 = [2]
optimizer = ['Adamax']
dense = [256]
dropout_l1 = [0.25]
dropout_l2 = [0.25]
dropout_l3 = [0.4]
batchNormalization = [True]

# Second test

param_grid = dict(layers=layers,
                 filter_l1=filter_l1,
                 filter_l2=filter_l2,
                 filter_l3=filter_l3,
                 kernel_size_l1=kernel_size_l1,
                 kernel_size_l2=kernel_size_l2,
                 kernel_size_l3=kernel_size_l3,
                 pool_size_l1=pool_size_l1,
                 pool_size_l2=pool_size_l2,
                 activation_l1=activation_l1,
                 activation_l2=activation_l2,
                 activation_l3=activation_l3,
                 optimizer=optimizer,
                 dense=dense,
                 dropout_l1=dropout_l1,
                 dropout_l2=dropout_l2,
                 dropout_l3=dropout_l3,
                 batchNormalization=batchNormalization)

print('Parameters: ', param_grid)

In [None]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(X_train, y_train)

In [None]:
# Summarize results

print('-----------------------------------------------------------------------')
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print('-----------------------------------------------------------------------')

In [None]:
# Getting the best parameters from previous evaluation

kernel_size_l1 = [grid_result.best_params_['kernel_size_l1']]
kernel_size_l3 = [grid_result.best_params_['kernel_size_l2']]
kernel_size_l3 = [grid_result.best_params_['kernel_size_l3']]
activation_l1 = [grid_result.best_params_['activation_l1']]
activation_l2 = [grid_result.best_params_['activation_l2']]
activation_l3 = [grid_result.best_params_['activation_l3']]

In [None]:
# Define the grid search parameters

# Third group to be evaluated

pool_size_l1 = [2, 3, 4]
pool_size_l2 = [2, 3, 4]
dense = [128, 256]

# Default parameters from baseline

optimizer = ['Adamax']
dropout_l1 = [0.25]
dropout_l2 = [0.25]
dropout_l3 = [0.4]
batchNormalization = [True]

# Third test

param_grid = dict(layers=layers,
                 filter_l1=filter_l1,
                 filter_l2=filter_l2,
                 filter_l3=filter_l3,
                 kernel_size_l1=kernel_size_l1,
                 kernel_size_l2=kernel_size_l2,
                 kernel_size_l3=kernel_size_l3,
                 pool_size_l1=pool_size_l1,
                 pool_size_l2=pool_size_l2,
                 activation_l1=activation_l1,
                 activation_l2=activation_l2,
                 activation_l3=activation_l3,
                 optimizer=optimizer,
                 dense=dense,
                 dropout_l1=dropout_l1,
                 dropout_l2=dropout_l2,
                 dropout_l3=dropout_l3,
                 batchNormalization=batchNormalization)

print('Parameters: ', param_grid)

In [None]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(X_train, y_train)

In [None]:
# Summarize results

print('-----------------------------------------------------------------------')
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print('-----------------------------------------------------------------------')

In [None]:
# Getting the best parameters from previous evaluation

pool_size_l1 = [grid_result.best_params_['pool_size_l1']]
pool_size_l2 = [grid_result.best_params_['pool_size_l2']]
dense = [grid_result.best_params_['dense']]

In [None]:
# Define the grid search parameters

# Forth group to be evaluated

optimizer = ['Adamax', 'RMSProp']
dropout_l1 = [0.25, 0.4, 0.5]
dropout_l2 = [0.25, 0.4, 0.5]
dropout_l3 = [0.25, 0.4, 0.5]
batchNormalization = [True]

# Forth test

param_grid = dict(layers=layers,
                 filter_l1=filter_l1,
                 filter_l2=filter_l2,
                 filter_l3=filter_l3,
                 kernel_size_l1=kernel_size_l1,
                 kernel_size_l2=kernel_size_l2,
                 kernel_size_l3=kernel_size_l3,
                 pool_size_l1=pool_size_l1,
                 pool_size_l2=pool_size_l2,
                 activation_l1=activation_l1,
                 activation_l2=activation_l2,
                 activation_l3=activation_l3,
                 optimizer=optimizer,
                 dense=dense,
                 dropout_l1=dropout_l1,
                 dropout_l2=dropout_l2,
                 dropout_l3=dropout_l3,
                 batchNormalization=batchNormalization)

print('Parameters: ', param_grid)

In [None]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(X_train, y_train)

In [None]:
# Summarize results

print('-----------------------------------------------------------------------')
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print('-----------------------------------------------------------------------')