## Considerations for using Dropout:
- Use a dropout of around 20%-50% to avoid underlearning
- Use a larger network
- Try using dropout at input as well as hidden layers, using it between each hidden layer has been shown to be benefitial
- Use a larger learning rate (increase by 10 - 100) with a large momentum (0.9 - 0.99)
- Constrain the weights (to around 4-5) to avoid very large weights 

In [12]:
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import SGD
from keras.constraints import maxnorm
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [3]:
dataframe = read_csv("sonar.csv", header=None)
dataset = dataframe.values

X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

In [4]:
encoder = LabelEncoder()
encoder.fit(Y)
encoded_y = encoder.transform(Y)

In [16]:
def create_baseline():
    model = Sequential()
    model.add(Dense(60, input_dim=60,activation="relu"))
    model.add(Dense(30, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))
    
    sgd = SGD(lr=0.01, momentum=0.8)
    model.compile(loss="binary_crossentropy", optimizer=sgd, metrics=["accuracy"])
    return model

def create_model():
    model = Sequential()
    model.add(Dropout(0.2, input_shape=(60,)))
    #weigths are constrained to 3, after suggestion from dropout paper
    model.add(Dense(60, activation="relu", kernel_constraint=maxnorm(3))) 
    model.add(Dense(30, activation="relu", kernel_constraint=maxnorm(3)))
    model.add(Dense(1, activation="sigmoid"))
    
    #increase of lr of 1 order and increase of momentum as suggested from dropout paper
    sgd = SGD(lr=0.1, momentum=0.9)
    model.compile(loss="binary_crossentropy", optimizer=sgd, metrics=["accuracy"])
    return model

def create_model_input_dropout():
    model = Sequential()
    model.add(Dropout(0.2, input_shape=(60,)))
    #weigths are constrained to 3, after suggestion from dropout paper
    model.add(Dense(60, activation="relu", kernel_constraint=maxnorm(3))) 
    model.add(Dense(30, activation="relu", kernel_constraint=maxnorm(3)))
    model.add(Dense(1, activation="sigmoid"))
    
    #increase of lr of 1 order and increase of momentum as suggested from dropout paper
    sgd = SGD(lr=0.1, momentum=0.9)
    model.compile(loss="binary_crossentropy", optimizer=sgd, metrics=["accuracy"])
    return model

def create_model_hidden_dropout():
    model = Sequential()
    #weigths are constrained to 3, after suggestion from dropout paper
    model.add(Dense(60, input_dim=60, activation="relu", kernel_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(30, activation="relu", kernel_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation="sigmoid"))
    
    #increase of lr of 1 order and increase of momentum as suggested from dropout paper
    sgd = SGD(lr=0.1, momentum=0.9)
    model.compile(loss="binary_crossentropy", optimizer=sgd, metrics=["accuracy"])
    return model

In [17]:
estimators = []
estimators.append(("standardize", StandardScaler()))
estimators.append(("mlp",KerasClassifier(build_fn=create_model_input_dropout, epochs=300, batch_size=16,verbose=0)))
kfold = StratifiedKFold(n_splits=10, shuffle=True)
pipeline = Pipeline(estimators)
results=cross_val_score(pipeline, X, encoded_y, cv=kfold)



In [18]:
print("Baseline: %.2f%% (%.2f%%)" %(results.mean()*100, results.std()*100))

Baseline: 84.52% (9.56%)
