# Keras - Tuning Neural Network (Advanced)

Tuning: Regularization, learning rate, Droupout, Maxpooling etc

----------

### 4. Regularization: Dropout

##### 4.1  Base lIne model

In [52]:
# Baseline Model on the Sonar Dataset
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataframe = read_csv("./data/sonar.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)



# baseline
def create_baseline():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
	model.add(Dense(30, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    
	# Compile model
	sgd = SGD(lr=0.01,
              momentum=0.8,
              decay=0.0,
              nesterov=False)
    
    
	model.compile(loss='binary_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])
    
	return model


numpy.random.seed(seed)

estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp',
                   KerasClassifier(build_fn=create_baseline,
                    epochs=10,
                    batch_size=16,
                    verbose=0)))

pipeline = Pipeline(estimators)

kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)

results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)

print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Baseline: 65.03% (8.60%)


##### 4.2. Dropout at visible layer

In [53]:
# Example of Dropout on the Sonar Dataset: Visible Layer
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataframe = read_csv("./data/sonar.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)



# dropout in the input layer with weight constraint
def create_model():
	# create model
	model = Sequential()
	model.add(Dropout(0.2, input_shape=(60,)))
	model.add(Dense(60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    
	# Compile model
    
	sgd = SGD(lr=0.1,
              momentum=0.9,
              decay=0.0,
              nesterov=False)
    
	model.compile(loss='binary_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])
	return model



numpy.random.seed(seed)

estimators = []
estimators.append(('standardize', StandardScaler()))

estimators.append(('mlp',
                   KerasClassifier(build_fn=create_model,
                                   epochs=10,
                                   batch_size=16,
                                   verbose=0)))

pipeline = Pipeline(estimators)

kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)

results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)

print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Visible: 81.66% (7.78%)


##### 4.3. Using Dropout on Hidden Layers

In [55]:
# Example of Dropout on the Sonar Dataset: Hidden Layer
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataframe = read_csv("./data/sonar.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)



# dropout in hidden layers with weight constraint
def create_model():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dropout(0.2))
	model.add(Dense(30, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
	model.add(Dropout(0.2))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    
    
	# Compile model
	sgd = SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=False)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    
	return model



numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))

estimators.append(('mlp',
                   KerasClassifier(build_fn=create_model,
                                   epochs=10,
                                   batch_size=16,
                                   verbose=0)))
pipeline = Pipeline(estimators)

kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)

results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)

print("Hidden: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Hidden: 82.21% (6.82%)


-------------

### 5. Learning Rate

##### 5.1 Time-Based Learning Rate Schedule

In [56]:
# Time Based Learning Rate Decay
from pandas import read_csv
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from sklearn.preprocessing import LabelEncoder


# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)


# load dataset
dataframe = read_csv("./data/ionosphere.csv", header=None)
dataset = dataframe.values


# split into input (X) and output (Y) variables
X = dataset[:,0:34].astype(float)
Y = dataset[:,34]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
Y = encoder.transform(Y)


# create model
model = Sequential()
model.add(Dense(34, input_dim=34, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))


# Compile model
epochs = 10

learning_rate = 0.1

decay_rate = learning_rate / epochs

momentum = 0.8

sgd = SGD(lr=learning_rate,
          momentum=momentum,
          decay=decay_rate,
          nesterov=False)

model.compile(loss='binary_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])


# Fit the model
model.fit(X, Y,
          validation_split=0.33,
          epochs=epochs,
          batch_size=28,
          verbose=2)

Train on 235 samples, validate on 116 samples
Epoch 1/10
0s - loss: 0.6814 - acc: 0.6468 - val_loss: 0.6388 - val_acc: 0.8707
Epoch 2/10
0s - loss: 0.6391 - acc: 0.7319 - val_loss: 0.5352 - val_acc: 0.8276
Epoch 3/10
0s - loss: 0.5676 - acc: 0.8128 - val_loss: 0.4886 - val_acc: 0.8103
Epoch 4/10
0s - loss: 0.4886 - acc: 0.8298 - val_loss: 0.4386 - val_acc: 0.9397
Epoch 5/10
0s - loss: 0.4141 - acc: 0.8511 - val_loss: 0.3349 - val_acc: 0.9483
Epoch 6/10
0s - loss: 0.3567 - acc: 0.8766 - val_loss: 0.4452 - val_acc: 0.8793
Epoch 7/10
0s - loss: 0.3217 - acc: 0.9021 - val_loss: 0.2296 - val_acc: 0.9655
Epoch 8/10
0s - loss: 0.2932 - acc: 0.8936 - val_loss: 0.2142 - val_acc: 0.9655
Epoch 9/10
0s - loss: 0.2658 - acc: 0.9021 - val_loss: 0.2515 - val_acc: 0.9569
Epoch 10/10
0s - loss: 0.2366 - acc: 0.9234 - val_loss: 0.2278 - val_acc: 0.9569


<keras.callbacks.History at 0x133eb9358>

##### 5.2 Drop-Based Learning Rate Schedule

In [58]:
# Drop-Based Learning Rate Decay
import pandas
from pandas import read_csv
import numpy
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import LearningRateScheduler


# learning rate schedule
def step_decay(epoch):
  initial_lrate = 0.1
  drop = 0.5
  epochs_drop = 10.0
  lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
  return lrate


# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)


# load dataset
dataframe = read_csv("./data/ionosphere.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:34].astype(float)
Y = dataset[:,34]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
Y = encoder.transform(Y)


# create model
model = Sequential()
model.add(Dense(34, input_dim=34, kernel_initializer= 'normal' , activation= 'relu' ))
model.add(Dense(1, kernel_initializer= 'normal' , activation= 'sigmoid' ))


# Compile model
sgd = SGD(lr=0.0, momentum=0.9, decay=0.0, nesterov=False)
model.compile(loss= 'binary_crossentropy' , optimizer=sgd, metrics=[ 'accuracy' ])


# learning schedule callback
lrate = LearningRateScheduler(step_decay)

callbacks_list = [lrate]

# Fit the model
model.fit(X, Y,
          validation_split=0.33,
          epochs=10,
          batch_size=28,
          callbacks=callbacks_list,
          verbose=2)

Train on 235 samples, validate on 116 samples
Epoch 1/10
0s - loss: 0.6803 - acc: 0.6468 - val_loss: 0.6199 - val_acc: 0.9138
Epoch 2/10
0s - loss: 0.6195 - acc: 0.7234 - val_loss: 0.4761 - val_acc: 0.8621
Epoch 3/10
0s - loss: 0.4985 - acc: 0.8255 - val_loss: 0.3681 - val_acc: 0.9483
Epoch 4/10
0s - loss: 0.3626 - acc: 0.8596 - val_loss: 0.3826 - val_acc: 0.8707
Epoch 5/10
0s - loss: 0.2804 - acc: 0.8809 - val_loss: 0.1543 - val_acc: 0.9655
Epoch 6/10
0s - loss: 0.2153 - acc: 0.9191 - val_loss: 0.2157 - val_acc: 0.9310
Epoch 7/10
0s - loss: 0.1795 - acc: 0.9362 - val_loss: 0.1501 - val_acc: 0.9655
Epoch 8/10
0s - loss: 0.1584 - acc: 0.9319 - val_loss: 0.0850 - val_acc: 0.9741
Epoch 9/10
0s - loss: 0.1722 - acc: 0.9404 - val_loss: 0.1417 - val_acc: 0.9741
Epoch 10/10
0s - loss: 0.1257 - acc: 0.9617 - val_loss: 0.1020 - val_acc: 0.9914


<keras.callbacks.History at 0x12f115710>

--------