# Lift Performance with Learning Rate Schedules

reference: Deep Learning with Python chapter 17

You will know:
- The benefit of learning rate schedules on lifting model performance during training
- How to configure and evaluate a time-based learning rate schedule
- How to configure and evaluate a drop-based learning rate schedule

Adapting the learning rate for your stochastic gradient descent optimization procedure can increase performance and reduce training time. Sometimes this is called learning rate annealing or adaptive learning rates.

In [4]:
### Time-Based Learning Rate Schedule
# LearningRate = LearningRate x (1 / 1 + decay x epoch)
# it can be a good idea to use momentum when using an adaptive learning rate.
from pandas import read_csv
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from sklearn.preprocessing import LabelEncoder
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataframe = read_csv("../datasets/ionosphere.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:34].astype(float)
Y = dataset[:,34]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
Y = encoder.transform(Y)
# create model
model = Sequential()
model.add(Dense(34, input_dim=34, kernel_initializer='normal', activation='relu')) 
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
# Compile model
epochs = 50
learning_rate = 0.1
decay_rate = learning_rate / epochs
momentum = 0.8
sgd = SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False) 
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
# Fit the model
model.fit(X, Y, validation_split=0.33, epochs=epochs, batch_size=28, verbose=2)

Using TensorFlow backend.


Train on 235 samples, validate on 116 samples
Epoch 1/50
 - 0s - loss: 0.6813 - acc: 0.6468 - val_loss: 0.6380 - val_acc: 0.8621
Epoch 2/50
 - 0s - loss: 0.6370 - acc: 0.7319 - val_loss: 0.5290 - val_acc: 0.8276
Epoch 3/50
 - 0s - loss: 0.5582 - acc: 0.8213 - val_loss: 0.4742 - val_acc: 0.8448
Epoch 4/50
 - 0s - loss: 0.4672 - acc: 0.8383 - val_loss: 0.4437 - val_acc: 0.9310
Epoch 5/50
 - 0s - loss: 0.3836 - acc: 0.8638 - val_loss: 0.2777 - val_acc: 0.9483
Epoch 6/50
 - 0s - loss: 0.3151 - acc: 0.8851 - val_loss: 0.3911 - val_acc: 0.8879
Epoch 7/50
 - 0s - loss: 0.2753 - acc: 0.9106 - val_loss: 0.2237 - val_acc: 0.9569
Epoch 8/50
 - 0s - loss: 0.2411 - acc: 0.9106 - val_loss: 0.1431 - val_acc: 0.9569
Epoch 9/50
 - 0s - loss: 0.2445 - acc: 0.9106 - val_loss: 0.2211 - val_acc: 0.9397
Epoch 10/50
 - 0s - loss: 0.2019 - acc: 0.9234 - val_loss: 0.2536 - val_acc: 0.9224
Epoch 11/50
 - 0s - loss: 0.1918 - acc: 0.9277 - val_loss: 0.1901 - val_acc: 0.9483
Epoch 12/50
 - 0s - loss: 0.1722 - acc:

<keras.callbacks.History at 0x1038ccef0>

In [None]:
### Drop-Based Learning Rate Schedule
# Often this method is implemented by dropping the learning rate by half every fixed number of epochs
# LearningReate = InitialLearningRate x DropRate^(floor((1+Epoch)/(EpochDrop)))
from pandas import read_csv
import numpy
import match
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import LearningRateScheduler

# learning rate schedule
def step_decay(epoch):
    initial_lrate = 0.1
    drop = 0.5
    epochs_drop = 10.0
    lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
    return lrate

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataframe = read_csv("../datasets/ionosphere.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:34].astype(float)
Y = dataset[:,34]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
Y = encoder.transform(Y)
# create model
model = Sequential()
model.add(Dense(34, input_dim=34, kernel_initializer='normal', activation='relu')) 
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
# Compile model
sgd = SGD(lr=0.0, momentum=0.9, decay=decay_rate, nesterov=False) 
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
# learning rate callback
lrate = LearningRateScheduler(step_decay)
callbacks_list = [lrate]
# Fit the model
model.fit(X, Y, validation_split=0.33, epochs=epochs, batch_size=28, verbose=2)