<a href="https://colab.research.google.com/github/danidavid/Tensorflow-Keras-Repo/blob/master/Dropout_regularizaion_Project_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing dataset

In [0]:
import pandas as pd 
import io
import requests

#This will be used for online dataset import
url='https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data'

s=requests.get(url).content

df=pd.read_csv(io.StringIO(s.decode('UTF-8')),header=None)


# Step 3: Dropout Regularization in Keras

## Importing library

In [0]:
import pandas
import numpy
import io
import requests
import pandas as pd
import numpy as np
import keras
from keras import layers
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [0]:
# Baseline Model on the Sonar Dataset
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.optimizers import SGD


In [0]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# To convert R and M into integer values 0 and 1

In [0]:
# split into input (X) and output (Y) variables
dataset=df.values
X=dataset[:,0:60].astype(float)
Y=dataset[:,60]

In [0]:
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# Starting Keras Models

# Step 3: Dropout Regularization in Keras

In [0]:
# baseline
def create_baseline():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
	model.add(Dense(30, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model


In [0]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Baseline: 82.14% (7.30%)


#Step 4: Using Dropout on the Visible Layer

In [0]:
# baseline
def create_model():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(3)))
	model.add(Dense(30, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  # Compile model
	sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model



In [0]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Visible: 81.16% (6.88%)


# Step 5: Trying to Improve Performance

In [0]:
# baseline
def create_model_improved():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(3)))
	model.add(Dense(30, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  # Compile model
	sgd = SGD(lr=0.07, momentum=0.9, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model

In [0]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model_improved, epochs=500, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Improved: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Improved: 86.00% (6.32%)


# Step 6: Using Dropout on Hidden Layers

In [0]:
# baseline
def create_model_hidden():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
	model.add(Dense(30, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(3)))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  # Compile model
	sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model

In [0]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model_hidden, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Hidden: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Hidden: 84.06% (5.50%)


# Step 7: Trying to Improve Performance

In [0]:
# baseline
def create_model_hidden_improved():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
	model.add(Dense(30, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(3)))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  # Compile model
	sgd = SGD(lr=0.07, momentum=0.9, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model

In [0]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model_hidden_improved, epochs=500, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Improved: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Improved: 85.56% (7.11%)


# Step 8: Tips For Using Dropout

# Step 8.1: Try Different Dropout values

In [0]:
# baseline
def create_model_dropout():
	# create model
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
  model.add(keras.layers.Dropout(0.3))
  model.add(Dense(30, kernel_initializer='normal', activation='relu'))
  model.add(keras.layers.Dropout(0.1))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  # Compile model
  sgd = SGD(lr=0.07, momentum=0.9, decay=0.0, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

In [0]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model_dropout, epochs=500, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Result: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Result: 84.16% (8.01%)


# Step 8.2: Try using a Larger network

In [0]:
# baseline
def create_model_large():
	# create model
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
  #model.add(keras.layers.Dropout(0.3))
  model.add(Dense(30, kernel_initializer='normal', activation='relu'))
  model.add(Dense(15, kernel_initializer='normal', activation='relu'))
  model.add(Dense(5, kernel_initializer='normal', activation='relu'))
  #model.add(keras.layers.Dropout(0.1))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  # Compile model
  sgd = SGD(lr=0.07, momentum=0.9, decay=0.0, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

In [0]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_model_large, epochs=500, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Result: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Result: 79.32% (11.09%)


# Step 8.3: Try using Dropout on both visible and hidden units

In [0]:
# baseline
def dropout_visible_hidden():
	# create model
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
  model.add(keras.layers.Dropout(0.0015))
  model.add(Dense(30, kernel_initializer='normal', activation='relu'))
  model.add(keras.layers.Dropout(0.0030))
  model.add(Dense(15, kernel_initializer='normal', activation='relu'))
  model.add(keras.layers.Dropout(0.02))
  model.add(Dense(5, kernel_initializer='normal', activation='relu'))
  model.add(keras.layers.Dropout(0.01))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  # Compile model
  sgd = SGD(lr=0.07, momentum=0.9, decay=0.0, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

In [0]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=dropout_visible_hidden, epochs=500, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Result: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Result: 78.30% (13.31%)


# Step 8.4: Try using large learning rate with decay and larger momentum

In [0]:
# baseline
def large_lr_momentum():
	# create model
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
  model.add(keras.layers.Dropout(0.2))
  model.add(Dense(30, kernel_initializer='normal', activation='relu'))
  model.add(keras.layers.Dropout(0.15)) 
  model.add(Dense(15, kernel_initializer='normal', activation='relu'))
  model.add(keras.layers.Dropout(0.1))
  model.add(Dense(5, kernel_initializer='normal', activation='relu'))
  model.add(keras.layers.Dropout(0.1))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  # Compile model
  sgd = SGD(lr=7, momentum=0.9, decay=0.2, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

In [0]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=large_lr_momentum, epochs=600, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Result: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Result: 53.38% (1.23%)


# Step 8.5: Try constraining the size of the network weights

In [0]:
# baseline
def constraint_weights():
	# create model
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(4)))
  model.add(Dense(30, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(4))) 
  model.add(Dense(15, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(5)))
  model.add(Dense(5, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(5)))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  # Compile model
  sgd = SGD(lr=0.7, momentum=0.9, decay=0.0, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

In [0]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=constraint_weights, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Result: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Result: 51.43% (3.30%)


# Step 9: Read More Resources on Dropout, and try implementing them

In [0]:
# baseline
def dropout_implement():
	# create model
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(4)))
  model.add(keras.layers.Dropout(0.3))
  model.add(Dense(30, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(4))) 
  model.add(keras.layers.Dropout(0.5))
  model.add(Dense(15, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(5)))
  model.add(keras.layers.Dropout(0.7))
  model.add(Dense(5, kernel_initializer='normal', activation='relu',kernel_constraint=maxnorm(5)))
  model.add(keras.layers.Dropout(0.1))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  # Compile model
  sgd = SGD(lr=0.7, momentum=0.9, decay=0.0, nesterov=False)
  model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

In [0]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=dropout_implement, epochs=300, batch_size=16, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Result: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Result: 50.00% (3.60%)
