# Model performance on Dropout 

* Dropout is a technique where randomly selected neurons are ignored during training. They are “dropped-out” randomly. This means that their contribution to the activation of downstream neurons is temporally removed on the forward pass and any weight updates are not applied to the neuron on the backward pass.

**Dropout Rate**

The default interpretation of the dropout hyperparameter is the probability of training a given node in a layer, where 1.0 means no dropout, and 0.0 means no outputs from the layer.

A good value for dropout in a hidden layer is between 0.5 and 0.8. Input layers use a larger dropout rate, such as of 0.8.

In [1]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten, BatchNormalization
from keras.layers import Dense, Dropout
from keras import regularizers
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
import keras


def load_train_data(n):
    with open('data_batch_'+ str(n), 'rb') as file:
        batch = pickle.load(file, encoding='latin1')

    features = batch['data']
    Target = batch['labels']
    return features, Target


batch_1, Target_1 = load_train_data(1)
batch_2, Target_2 = load_train_data(2)
batch_3, Target_3 = load_train_data(3)
batch_4, Target_4 = load_train_data(4)
batch_5, Target_5 = load_train_data(5)


with open('test_batch', 'rb') as file:
    batch = pickle.load(file, encoding='latin1')
X_test = batch['data']
y_test = batch['labels']


X_train = np.append(batch_1, batch_2,axis=0)
X_train = np.append(X_train, batch_3,axis=0)
X_train = np.append(X_train, batch_4,axis=0)
X_train = np.append(X_train, batch_5,axis=0)
y_train = np.append(Target_1, Target_2,axis=0)
y_train = np.append(y_train, Target_3,axis=0)
y_train = np.append(y_train, Target_4,axis=0)
y_train = np.append(y_train, Target_5,axis=0)
X_train = X_train.reshape((len(X_train), 3, 32, 32)).transpose(0,2,3,1)
y_train = np_utils.to_categorical(y_train, 10)
X_test = X_test.reshape((len(X_test), 3, 32, 32)).transpose(0,2,3,1)
y_test = np_utils.to_categorical(y_test, 10)
X_train = X_train.astype('float32')
X_test= X_test.astype('float32')
X_train= X_train / 255.0
X_test= X_test/ 255.0

Using TensorFlow backend.


## Model 11
Dropout rate - 0.5

In [3]:
model11 = Sequential()
model11.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',kernel_regularizer=regularizers.l2(0.001),padding = 'same', input_shape=(32, 32, 3)))
model11.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',kernel_regularizer=regularizers.l2(0.001),padding = 'same'))
model11.add(MaxPooling2D((2, 2)))
model11.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',kernel_regularizer=regularizers.l2(0.001),padding = 'same'))
model11.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',kernel_regularizer=regularizers.l2(0.001),padding = 'same'))
model11.add(MaxPooling2D((2, 2)))
model11.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',kernel_regularizer=regularizers.l2(0.001),padding = 'same'))
model11.add(MaxPooling2D((2, 2)))
model11.add(Flatten())
model11.add(Dense(128, activation='relu'))
model11.add(Dropout(rate = 0.5))
model11.add(Dense(10, activation='softmax'))
model11.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 32, 32, 64)        1792      
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 16, 16, 64)        36928     
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 16, 16, 64)        36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 8, 8, 64)          0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 8, 8, 64)          36928     
__________

In [4]:
epochs = 10
sgd = SGD(lr=1e-2, momentum=0.9, decay=1e-2/epochs)
model11.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
model11.fit(X_train,y_train,epochs=epochs,batch_size = 32)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2699772ae48>

In [6]:
test_loss,test_acc = model11.evaluate(X_test,y_test)
test_acc



0.7262

# Observation
 In the model 11  dropout reduce the overfitting

# model 12
Add one more layer of conv2d and maxpooling and remove kernel_regularizer

In [7]:
model12 = Sequential()
model12.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same', input_shape=(32, 32, 3)))
model12.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same'))
model12.add(MaxPooling2D((2, 2)))
model12.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same'))
model12.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same'))
model12.add(MaxPooling2D((2, 2)))
model12.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same'))
model12.add(MaxPooling2D((2, 2)))
model12.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same'))
model12.add(MaxPooling2D((2, 2)))
model12.add(Flatten())
model12.add(Dense(128, activation='relu'))
model12.add(Dropout(rate = 0.5))
model12.add(Dense(10, activation='softmax'))
model12.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_11 (Conv2D)           (None, 32, 32, 64)        1792      
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 16, 16, 64)        36928     
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 16, 16, 64)        36928     
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 8, 8, 64)          0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 8, 8, 64)          36928     
__________

In [8]:
epochs = 10
sgd = SGD(lr=1e-2, momentum=0.9, decay=1e-2/epochs)
model12.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
model12.fit(X_train,y_train,epochs=epochs,batch_size = 32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2699772a748>

In [9]:
test_loss,test_acc = model12.evaluate(X_test,y_test)
test_acc



0.7259

# Observation 

The addition of one hidden layer increased  the accuracy by 3 % in same 10 epochs

# Model 13
Dropout rate 0.6

In [11]:
model13 = Sequential()
model13.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same', input_shape=(32, 32, 3)))
model13.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same'))
model13.add(MaxPooling2D((2, 2)))
model13.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same'))
model13.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same'))
model13.add(MaxPooling2D((2, 2)))
model13.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same'))
model13.add(MaxPooling2D((2, 2)))
model13.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same'))
model13.add(MaxPooling2D((2, 2)))
model13.add(Conv2D(64, (3, 3), activation='relu',kernel_initializer='he_normal',padding = 'same'))
model13.add(MaxPooling2D((2, 2)))
model13.add(Flatten())
model13.add(Dense(128, activation='relu'))
model13.add(Dropout(rate = 0.6))
model13.add(Dense(10, activation='softmax'))
model13.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_24 (Conv2D)           (None, 32, 32, 64)        1792      
_________________________________________________________________
conv2d_25 (Conv2D)           (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 16, 16, 64)        36928     
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 16, 16, 64)        36928     
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 8, 8, 64)          0         
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 8, 8, 64)          36928     
__________

In [12]:
epochs = 10
sgd = SGD(lr=1e-2, momentum=0.9, decay=1e-2/epochs)
model13.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
model13.fit(X_train,y_train,epochs=epochs,batch_size = 32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x269d9c4d5f8>

In [13]:
test_loss,test_acc = model13.evaluate(X_test,y_test)
test_acc



0.7357

# Observation
The model 13 gives more accuracy then other models.
Next will tune hyperparmeters like batch size, optimizer, loss.