In [2]:
!pip install imblearn

Collecting imblearn
  Downloading https://files.pythonhosted.org/packages/81/a7/4179e6ebfd654bd0eac0b9c06125b8b4c96a9d0a8ff9e9507eb2a26d2d7e/imblearn-0.0-py2.py3-none-any.whl
Collecting imbalanced-learn (from imblearn)
[?25l  Downloading https://files.pythonhosted.org/packages/c5/ea/f027ceb21114abe8189a2804640b2d5dd49a7a271c4814695482c5bc94d8/imbalanced_learn-0.4.2-py3-none-any.whl (166kB)
[K    6% |██                              | 10kB 25.1MB/s eta 0:00:01[K    12% |████                            | 20kB 4.3MB/s eta 0:00:01[K    18% |██████                          | 30kB 6.1MB/s eta 0:00:01[K    24% |████████                        | 40kB 4.0MB/s eta 0:00:01[K    30% |█████████▉                      | 51kB 4.9MB/s eta 0:00:01[K    36% |███████████▉                    | 61kB 5.7MB/s eta 0:00:01[K    43% |█████████████▉                  | 71kB 6.5MB/s eta 0:00:01[K    49% |███████████████▉                | 81kB 7.2MB/s eta 0:00:01[K    55% |█████████████████▊       

In [3]:
%matplotlib inline

import pandas as pd
import numpy as np

from collections import Counter
from sklearn.model_selection import train_test_split

from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

from keras.models import model_from_json

import warnings
warnings.filterwarnings("ignore")

import io
from google.colab import files

# My seed

seed = 42

Using TensorFlow backend.


### Uploading training dataset

In [4]:
uploaded = files.upload()

Saving train.csv to train.csv


In [0]:
df_train = pd.read_csv(io.StringIO(uploaded['train.csv'].decode('utf-8')))
#df_train = pd.read_csv('train.csv')

### Splitting the dataset

In [6]:
X_train = df_train.drop(['label'], axis=1)
y_train = df_train['label']

# Free memory space

del df_train

print('Shape of X_train:', X_train.shape)
print('Shape of y_train:', y_train.shape)

Shape of X_train: (42000, 784)
Shape of y_train: (42000,)


### Normalizing the values of training and test

In [0]:
X_train = X_train / 255

### Reshape the images in 3 dimensions to use with Keras

In [8]:
X_train = X_train.values.reshape(-1,28,28,1) # (height = 28px, width = 28px , canal = 1)

print('Shape of X_train:', X_train.shape)

Shape of X_train: (42000, 28, 28, 1)


### Converting y values (labels) to categorical values

In [9]:
# One Hot Categories

y_train = to_categorical(y_train, num_classes = 10)
y_train

array([[0., 1., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

### Function to create neural networks to be evalueted

In [0]:
def baseline_model(layers = 1, 
                   filter_l1 = 32, 
                   filter_l2 = 64, 
                   filter_l3 = 128,
                   activation_l1 = 'relu',
                   activation_l2 = 'relu',
                   activation_l3 = 'relu',
                   dense = 256, 
                   dropout_l1 = 0.25, 
                   dropout_l2 = 0.25, 
                   dropout_l3 = 0.4, 
                   batchNormalization = True,
                   optimizer = RMSprop(epsilon=1e-08)):
                          
    # Create baseline
    
    baseline = Sequential()

    # First group
    #---------------------------------------------------------------------------------------------------
        
    baseline.add(Conv2D(filters = filter_l1, kernel_size = (5,5),padding = 'Same', activation = activation_l1, 
                     input_shape = (28, 28, 1)))
    
    if batchNormalization:
      baseline.add(BatchNormalization())
      
    if (layers >= 2):
      for i in range(layers-1):
        baseline.add(Conv2D(filters = filter_l1, kernel_size = (5,5),padding = 'Same', activation = activation_l1))
        if batchNormalization:
          baseline.add(BatchNormalization())
      
    baseline.add(MaxPool2D(pool_size=(2,2)))
    baseline.add(Dropout(dropout_l1))
    
    # Second group
    #---------------------------------------------------------------------------------------------------
    
    baseline.add(Conv2D(filters = filter_l2, kernel_size = (3,3), padding = 'Same', activation = activation_l2))
    if batchNormalization:
      baseline.add(BatchNormalization())
      
    if (layers >= 2):
      for i in range(layers-1):
        baseline.add(Conv2D(filters = filter_l2, kernel_size = (5,5),padding = 'Same', activation = activation_l2))
        if batchNormalization:
          baseline.add(BatchNormalization())
    
    baseline.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    baseline.add(Dropout(dropout_l2))
    
    # Third group
    #---------------------------------------------------------------------------------------------------
    
    baseline.add(Conv2D(filters = filter_l3, kernel_size = (3,3), padding = 'Same', activation = activation_l3))
    
    if batchNormalization:
      baseline.add(BatchNormalization())
      
    baseline.add(Flatten())
    baseline.add(Dense(dense, activation = "relu"))
    baseline.add(Dropout(dropout_l3))
    
    baseline.add(Dense(10, activation = "softmax"))
    
    # Compile the baseline including the optimizer and evaluating the performance of the baseline by accuracy
    
    baseline.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])
    
    return baseline

### Learning Rate

In [0]:
# If after the third epoch we didn't have an improvement of accuracy, the learning rate will be 
# reduced by 50% (factor).

lr_reduction = ReduceLROnPlateau(monitor='val_acc',
                                 patience=3, 
                                 verbose=0, 
                                 factor=0.5, 
                                 min_lr=0.00001)

### Data Augmentation

In [0]:
# The idea is to alter the training data with small transformations to reproduce the variations 
# occuring when someone is writing a digit. It's a way to minimize the overfitting of the model.

def data_augmentation(range = 10):
  
    generator = ImageDataGenerator(featurewise_center = False,
                                 samplewise_center = False, 
                                 featurewise_std_normalization = False,
                                 samplewise_std_normalization = False,
                                 zca_whitening = False,
                                 rotation_range = range, # Rotate image in 'rotation_range' degrees
                                 zoom_range = range/100, # Zoom image ('zoom_range'% zoom) 
                                 width_shift_range = range/100, # Shift image horizontally ('width_shift_range'% of width)
                                 height_shift_range = range/100, # Shift image vertically ('height_shift_range'% of height)
                                 horizontal_flip = False,
                                 vertical_flip = False)

    generator.fit(X_train)
    return generator

### Testing the models

In [0]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

In [0]:
epochs = 1
batch_size = 90

In [0]:
X_train_aux, X_test_aux, y_train_aux, y_test_aux = train_test_split(X_train, y_train, test_size = 0.1)

In [14]:
################################################################################
# Test
################################################################################
             
model = KerasClassifier(build_fn=baseline_model, epochs=epochs, batch_size=batch_size, verbose=1)

# Define the grid search parameters

optimizer = ['SGD', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', RMSprop(epsilon=1e-08)]
layers = [1, 2, 3]
filter_l1 = [16, 32] 
filter_l2 = [32, 64]
filter_l3 = [64, 128]
activation_l1 = ['relu', 'sigmoid']
activation_l2 = ['relu', 'sigmoid']
activation_l3 = ['relu', 'sigmoid']
dense = [128, 256] 
dropout_l1 = [0.25, 0.4, 0.5]
dropout_l2 = [0.25, 0.4, 0.5]
dropout_l3 = [0.25, 0.4, 0.5]
batchNormalization = [True, False]

param_grid = dict(optimizer=optimizer,
                 layers=layers,
                 filter_l1=filter_l1,
                 filter_l2=filter_l2,
                 filter_l3=filter_l3,
                 activation_l1=activation_l1,
                 activation_l2=activation_l2,
                 activation_l3=activation_l3,
                 dense=dense,
                 dropout_l1=dropout_l1,
                 dropout_l2=dropout_l2,
                 dropout_l3=dropout_l3,
                 batchNormalization=batchNormalization)

param_grid

{'activation_l1': ['relu', 'sigmoid', 'tanh'],
 'activation_l2': ['relu', 'sigmoid', 'tanh'],
 'activation_l3': ['relu', 'sigmoid', 'tanh'],
 'batchNormalization': [True, False],
 'dense': [128, 256],
 'dropout_l1': [0.25, 0.4, 0.5],
 'dropout_l2': [0.25, 0.4, 0.5],
 'dropout_l3': [0.25, 0.4, 0.5],
 'filter_l1': [16, 32],
 'filter_l2': [32, 64],
 'filter_l3': [64, 128],
 'layers': [1, 2, 3],
 'optimizer': ['SGD',
  'Adagrad',
  'Adadelta',
  'Adam',
  'Adamax',
  'Nadam',
  <keras.optimizers.RMSprop at 0x7faa32900160>]}

In [0]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(X_train, y_train)

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


In [0]:
# Summarize results

print('-----------------------------------------------------------------------')
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print('-----------------------------------------------------------------------')

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))