# 16 Reduce Overfitting With Dropout Regularization

## 16.2 Dropout Regularization in Keras

In [1]:
# Baseline Model on the Sonar Dataset
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataframe = read_csv("sonar.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# baseline
def create_baseline():
  # create model
  model = Sequential()
  model.add(Dense(60, input_dim=60, init='normal', activation='relu'))
  model.add(Dense(30, init='normal', activation='relu'))
  model.add(Dense(1, init='normal', activation='sigmoid'))
  # Compile model
  sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

Using TensorFlow backend.


In [3]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, nb_epoch=300,
    batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

# Baseline: 65.03% (8.60%)



Baseline: 65.03% (8.60%)


## 16.3 Using Dropout on the Visible Layer

In [8]:
# dropout in the input layer with weight constraint
def create_model():
  # create model
  model = Sequential()
  model.add(Dropout(0.2, input_shape=(60,)))
  #V1 model.add(Dense(60, init='normal', activation='relu', W_constraint=maxnorm(3)))
  model.add(Dense(60, init='normal', activation='relu', kernel_constraint=maxnorm(3))) #V2
  #V1 model.add(Dense(30, init='normal', activation='relu', W_constraint=maxnorm(3))
  model.add(Dense(30, init='normal', activation='relu', kernel_constraint=maxnorm(3))) #V2
  model.add(Dense(1, init='normal', activation='sigmoid'))
  # Compile model
  # OLD sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
  sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False) # NEW
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])    
  return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, nb_epoch=300,
    batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

# Visible: 86.06% (6.60%)

  import sys
  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  import sys
  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  import sys
  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  import sys
  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  import sys
  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  import sys
  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  import sys
  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  import sys
  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  import sys
  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  import sys
  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.


Visible: 86.06% (6.60%)


## 16.4 Using Dropout on Hidden Layers

In [10]:
# dropout in hidden layers with weight constraint
def create_model():
  # create model
  model = Sequential()
  model.add(Dense(60, input_dim=60, init='normal', activation='relu',
      W_constraint=maxnorm(3)))
  model.add(Dropout(0.2))
  model.add(Dense(30, init='normal', activation='relu', W_constraint=maxnorm(3)))
  model.add(Dropout(0.2))
  model.add(Dense(1, init='normal', activation='sigmoid'))
  # Compile model
  # OLD sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
  sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False) # NEW
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])    
  return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, nb_epoch=300,
    batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Hidden: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

# Hidden: 85.61% (4.65%)
# Hidden: 83.11% (6.66%)

  
  
  # Remove the CWD from sys.path while we load stuff.
  
  
  # Remove the CWD from sys.path while we load stuff.
  
  
  # Remove the CWD from sys.path while we load stuff.
  
  
  # Remove the CWD from sys.path while we load stuff.
  
  
  # Remove the CWD from sys.path while we load stuff.
  
  
  # Remove the CWD from sys.path while we load stuff.
  
  
  # Remove the CWD from sys.path while we load stuff.
  
  
  # Remove the CWD from sys.path while we load stuff.
  
  
  # Remove the CWD from sys.path while we load stuff.
  
  
  # Remove the CWD from sys.path while we load stuff.


Hidden: 83.11% (6.66%)
