In [1]:
%matplotlib notebook

import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
fashion_mnist = keras.datasets.fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

In [3]:
X_valid, X_train = X_train[:5000] / 255.0, X_train[5000:] / 255.0
y_valid, y_train = y_train[:5000], y_train[5000:]

In [8]:
X_test = X_test / 255.0

In [4]:
# Create a model
model = keras.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation=keras.layers.ReLU(negative_slope=0.2)))
model.add(keras.layers.Dense(100, activation=keras.layers.ReLU(negative_slope=0.2)))
model.add(keras.layers.Dense(10, activation="softmax"))

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1010      
Total params: 266,610
Trainable params: 266,610
Non-trainable params: 0
_________________________________________________________________


In [7]:
model.compile(loss='sparse_categorical_crossentropy',
             optimizer='sgd',
             metrics=['accuracy'])

## Exponential decay

Reduce the learning rate by a factor of 10 every s steps.

In [6]:
def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        return lr0 * 0.1 **(epoch/s)
    return exponential_decay_fn

exponential_decay_fn = exponential_decay(0.01, 20)

# Create a LearningRateScheduler callback
lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)

In [9]:
history = model.fit(X_train, y_train,
                   epochs=30,
                   validation_data=(X_valid, y_valid),
                   callbacks=[lr_scheduler])

Train on 55000 samples, validate on 5000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [10]:
model.evaluate(X_test, y_test)



[0.3850333259105682, 0.862]

In [12]:
history.history

{'loss': [0.6973423051053828,
  0.4905638758616014,
  0.4501436263171109,
  0.42796134003725916,
  0.41079105180393566,
  0.3989254883115942,
  0.38951129138036206,
  0.3810682255701585,
  0.37421596797596324,
  0.3686750339334661,
  0.3637712825601751,
  0.35918447239615703,
  0.35551586575941607,
  0.35203538556965913,
  0.34964657741676675,
  0.34700136972015555,
  0.3445570809841156,
  0.3426229178862138,
  0.3412319676572626,
  0.3396706779371608,
  0.33795510903271764,
  0.336905721376159,
  0.33581839617165654,
  0.3350741016973149,
  0.3341521716681394,
  0.3333802757945928,
  0.33261818555268374,
  0.3320155783328143,
  0.33164242678989064,
  0.3308753045125441],
 'accuracy': [0.771,
  0.8300727,
  0.8420727,
  0.85016364,
  0.8564909,
  0.86087275,
  0.8638,
  0.8670727,
  0.86825454,
  0.8710182,
  0.87234545,
  0.87487274,
  0.8745273,
  0.8757273,
  0.8765454,
  0.87863636,
  0.87874544,
  0.8793455,
  0.8800182,
  0.8809636,
  0.88174546,
  0.88143635,
  0.88183635,
  0.8

### Plot the exponential decay of the learning rate

In [16]:
fig, ax = plt.subplots()
ax.plot(history.history['lr'], marker='o')
plt.show()

<IPython.core.display.Javascript object>