# Imports

In [0]:
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

Using TensorFlow backend.


# Fixing Random Seed

In [0]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# Loading Dataset

In [0]:
# load dataset
dataframe = read_csv("sonar.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

# Encoding labels

In [0]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# Dropout Regularization in Keras

---

## Baseline Model

In [0]:
# Baseline Model on the Sonar Dataset
# baseline
def create_baseline():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
	model.add(Dense(30, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Using TensorFlow backend.


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Baseline: 81.66% (6.89%)


## Dropout Layer added in the Visible layer with 40% Dropping Probablity  
## Learning rate is 1
## No. of epochs are 600

In [0]:
# dropout in the input layer with weight constraint
def create_model():
	# create model, insert code here
	model = Sequential()
	model.add(Dropout(0.4, input_shape=(60,)))
	model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))


	# Compile model
	sgd = SGD(lr=1, momentum=0.9, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=600, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Visible: 50.00% (3.60%)


# Tuned Visible Model 

---
## Dropout Probability has been decreased from 40% to 20%
## Learning Rate has been decreased from 1 to 0.1 
## No. of Epochs have been decreased from 600 to 300


In [0]:
# dropout in the input layer with weight constraint
def create_model():
	# create model, insert code here
	model = Sequential()
	model.add(Dropout(0.2, input_shape=(60,)))
	model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))


	# Compile model
	sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Visible: 86.99% (6.88%)


# Hidden Dropout Model

---

## Dropout Probability is 20%
## Learning rate is 20%
## No. of epochs are 600


In [0]:
# dropout in the input layer with weight constraint
def create_model():
  # create model, insert code here
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dropout(.2))
  model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dropout(.2))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  
  # Compile model
  sgd = SGD(lr=0.2, momentum=0.9, decay=0.0, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=600, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Hidden: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Hidden: 51.05% (3.45%)


# Hidden_Tuned 

---

## Dropout Probability is kept unchanged i.e. 20%
## Learning rate has been decreased from 20% to 10%
## No. of epochs have been decreased from 600 to 300

In [0]:
# dropout in the input layer with weight constraint
def create_model():
  # create model, insert code here
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dropout(.2))
  model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dropout(.2))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  
  # Compile model
  sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Hidden: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
1

Hidden: 83.66% (5.28%)


# Original Paper

## With different Drop out Values

---

### 1st Model with 0.3 or 30% Dropping Probability
### 2nd Model with 0.4 or 40% Dropping Probability  
### 3rd Model with 0.5 or 50% Dropping Probability  

In [0]:
# Dropping Probability 30%
# dropout in the input layer with weight constraint
def create_model():
	# create model, insert code here
	model = Sequential()
	model.add(Dropout(0.3, input_shape=(60,)))
	model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))


	# Compile model
	sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible_Original: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Visible_Original: 81.68% (6.16%)


In [0]:
# Dropping Probability 40%   # the best combo
# dropout in the input layer with weight constraint
def create_model():
	# create model, insert code here
	model = Sequential()
	model.add(Dropout(0.4, input_shape=(60,)))
	model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))


	# Compile model
	sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible_Original: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Visible_Original: 88.45% (5.98%)


In [0]:
# Dropping Probability 50%
# dropout in the input layer with weight constraint
def create_model():
	# create model, insert code here
	model = Sequential()
	model.add(Dropout(0.5, input_shape=(60,)))
	model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))


	# Compile model
	sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible_Original: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Visible_Original: 82.16% (5.44%)


# With larger Network

In [0]:
# dropout in the input layer with weight constraint
def create_model():
  # create model, insert code here
  model = Sequential()
  model.add(Dense(512, input_dim=60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dropout(.3))
  model.add(Dense(256, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dropout(.2))
  model.add(Dense(128, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dropout(.2))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  
  # Compile model
  sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Larger_Network: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Larger_Network: 51.91% (3.05%)


# Both Visible and Hidden Dropout

In [0]:
# dropout in the input layer with weight constraint
def create_model():
	# create model, insert code here
  model = Sequential()
  model.add(Dropout(0.2, input_shape=(60,)))
  model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dropout(.2))
  model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dropout(.2))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  
  # Compile model
  sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Both Visible & Hidden: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Both Visible & Hidden: 87.02% (4.79%)


# Large Learning Rate with Decay and Large Momentum 

In [0]:
def create_model():
  # create model, insert code here
  model = Sequential()
  model.add(Dropout(0.4, input_shape=(60,)))
  model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  
  # Compile model
  sgd = SGD(lr=10, momentum=0.99, decay=10/300, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Large Lr & Momentum: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Large Lr & Momentum: 50.91% (3.48%)


# Constrained the size of the network weights 

In [0]:
def create_model():
  # create model, insert code here
  model = Sequential()
  model.add(Dropout(0.4, input_shape=(60,)))
  model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(4)))
 # model.add(Dropout(.2))
  model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(4)))
 # model.add(Dropout(.2))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  
  # Compile model
  sgd = SGD(lr=10, momentum=0.99, decay=10/300, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Maxnorm 4: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Maxnorm 4: 50.43% (3.58%)


In [0]:
def create_model():
  # create model, insert code here
  model = Sequential()
  model.add(Dropout(0.4, input_shape=(60,)))
  model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(5)))
 # model.add(Dropout(.2))
  model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(5)))
 # model.add(Dropout(.2))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  
  # Compile model
  sgd = SGD(lr=10, momentum=0.99, decay=10/300, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Maxnorm 5: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Maxnorm 5: 50.43% (3.58%)


In [0]:
# dropout in the input layer with weight constraint
def create_model():
  # create model, insert code here
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  
  # Compile model
  sgd = SGD(lr=0.1, momentum=0.8, decay=0.002, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=50, batch_size=28, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=33, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Model: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Model: 88.17% (10.80%)


# The Best Model

---

### Dropout Probability is 40%
### Learning rate is 10%
### Momentum is 80%
### Decay is 0.02%

In [0]:
# Dropping Probability 40%   # the best combo
# dropout in the input layer with weight constraint
def create_model():
	# create model, insert code here
	model = Sequential()
	model.add(Dropout(0.4, input_shape=(60,)))
	model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))


	# Compile model
	sgd = SGD(lr=0.1, momentum=0.8, decay=0.002, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model

numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=50, batch_size=28, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=33, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible_Original: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Visible_Original: 89.34% (11.03%)
