In [1]:
# # This code allows to output more than one variable value without using a print statement.
# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"

# # Default
# # InteractiveShell.ast_node_interactivity = "last_expr"

In [2]:
# Version Check
import sys
import tensorflow as tf
import tensorflow_datasets as tfds
import PIL
import pandas as pd
import numpy as np
import scipy
print("python", sys.version)
print("tensorflow", tf.__version__)
print("tensorflow-datasets", tfds.__version__)
print("Pillow", PIL.__version__)
print("pandas", pd.__version__)
print("numpy", np.__version__)
print("scipy", scipy.__version__)
print()
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("Built with GPU support:", tf.test.is_built_with_gpu_support())

python 3.8.0 (default, Nov  6 2019, 16:00:02) [MSC v.1916 64 bit (AMD64)]
tensorflow 2.6.2
tensorflow-datasets 4.4.0
Pillow 8.3.2
pandas 1.3.3
numpy 1.19.5
scipy 1.7.1

Num GPUs Available: 1
Built with CUDA: True
Built with GPU support: True


## Model Checkpoint

In [3]:
# Load dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

print('train set:', x_train.shape, y_train.shape)
print('test set :', x_test.shape, y_test.shape)
print()


# Normalization
x_train = x_train / x_train.max()
x_test = x_test / x_test.max()

# Modeling
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, BatchNormalization, Activation

model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(64), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(32), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(32, activation='softmax'), 
])

print(model.summary())


# Compile
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

train set: (60000, 28, 28) (60000,)
test set : (10000, 28, 28) (10000,)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 256)               200960    
_________________________________________________________________
batch_normalization (BatchNo (None, 256)               1024      
_________________________________________________________________
activation (Activation)      (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                16448     
_________________________________________________________________
batch_normalization_1 (Batch (None, 64)                256       
_________________________________________________

In [4]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint(filepath='tmp_checkpoint.ckpt', 
                             save_weights_only=True, 
                             save_best_only=True, 
                             monitor='val_loss', 
                             verbose=1)

In [5]:
history = model.fit(x_train, y_train, 
                    validation_data=(x_test, y_test), 
                    epochs=10, verbose=1, 
                    callbacks=[checkpoint])

Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.10413, saving model to tmp_checkpoint.ckpt
Epoch 2/10

Epoch 00002: val_loss improved from 0.10413 to 0.08875, saving model to tmp_checkpoint.ckpt
Epoch 3/10

Epoch 00003: val_loss improved from 0.08875 to 0.06744, saving model to tmp_checkpoint.ckpt
Epoch 4/10

Epoch 00004: val_loss improved from 0.06744 to 0.06670, saving model to tmp_checkpoint.ckpt
Epoch 5/10

Epoch 00005: val_loss improved from 0.06670 to 0.06349, saving model to tmp_checkpoint.ckpt
Epoch 6/10

Epoch 00006: val_loss improved from 0.06349 to 0.06074, saving model to tmp_checkpoint.ckpt
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.06074
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.06074
Epoch 9/10

Epoch 00009: val_loss improved from 0.06074 to 0.05937, saving model to tmp_checkpoint.ckpt
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.05937


In [6]:
# Before loading model checkpoint
model.evaluate(x_test, y_test)  # loss(sparse_categorical_crossentropy), val_acc



[0.06428355723619461, 0.9811000227928162]

In [7]:
# After loading model checkpoint
model.load_weights('tmp_checkpoint.ckpt')
model.evaluate(x_test, y_test)  # loss(sparse_categorical_crossentropy), val_acc



[0.05937452241778374, 0.9819999933242798]

## Early Stopping

In [8]:
# Load dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

print('train set:', x_train.shape, y_train.shape)
print('test set :', x_test.shape, y_test.shape)
print()


# Normalization
x_train = x_train / x_train.max()
x_test = x_test / x_test.max()

# Modeling
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, BatchNormalization, Activation

model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(64), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(32), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(32, activation='softmax'), 
])

print(model.summary())


# Compile
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

train set: (60000, 28, 28) (60000,)
test set : (10000, 28, 28) (10000,)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 256)               200960    
_________________________________________________________________
batch_normalization_3 (Batch (None, 256)               1024      
_________________________________________________________________
activation_3 (Activation)    (None, 256)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 64)                16448     
_________________________________________________________________
batch_normalization_4 (Batch (None, 64)                256       
_______________________________________________

In [9]:
from tensorflow.keras.callbacks import EarlyStopping
earlystopping = EarlyStopping(monitor='val_loss', patience=3)

In [10]:
history = model.fit(x_train, y_train, 
                    validation_data=(x_test, y_test), 
                    epochs=20, verbose=1,
                    callbacks=[earlystopping])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20


In [None]:
# Early-stopped after 9th epoch. (Val-loss did not decrease for 3 epochs since 6th epoch.)

## Learning Rate Scheduler

In [11]:
# Load dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

print('train set:', x_train.shape, y_train.shape)
print('test set :', x_test.shape, y_test.shape)
print()


# Normalization
x_train = x_train / x_train.max()
x_test = x_test / x_test.max()

# Modeling
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, BatchNormalization, Activation

model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(64), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(32), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(32, activation='softmax'), 
])

print(model.summary())


# Compile
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

train set: (60000, 28, 28) (60000,)
test set : (10000, 28, 28) (10000,)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 256)               200960    
_________________________________________________________________
batch_normalization_6 (Batch (None, 256)               1024      
_________________________________________________________________
activation_6 (Activation)    (None, 256)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 64)                16448     
_________________________________________________________________
batch_normalization_7 (Batch (None, 64)                256       
_______________________________________________

In [12]:
# initial learning rate
model.optimizer.lr.numpy()

0.001

In [13]:
from tensorflow.keras.callbacks import LearningRateScheduler

def scheduler(epoch, lr):
    tf.print(f"learning rate: {lr:0.5f}")
    
    # for the first 5 epochs, maintain learning_rate
    if epoch < 5:
        return lr
    
    # after 5 epochs, decrease learning_rate
    else:
        return lr * tf.math.exp(-0.1)

learning_rate_scheduler = LearningRateScheduler(scheduler)

In [14]:
history = model.fit(x_train, y_train, 
                    validation_data=(x_test, y_test), 
                    epochs=10, verbose=1, 
                    callbacks=[learning_rate_scheduler])

Epoch 1/10
learning rate: 0.00100
Epoch 2/10
learning rate: 0.00100
Epoch 3/10
learning rate: 0.00100
Epoch 4/10
learning rate: 0.00100
Epoch 5/10
learning rate: 0.00100
Epoch 6/10
learning rate: 0.00100
Epoch 7/10
learning rate: 0.00090
Epoch 8/10
learning rate: 0.00082
Epoch 9/10
learning rate: 0.00074
Epoch 10/10
learning rate: 0.00067


In [15]:
# final learning rate
round(model.optimizer.lr.numpy(), 5)

0.00061

## Tensorboard

In [16]:
# Load dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

print('train set:', x_train.shape, y_train.shape)
print('test set :', x_test.shape, y_test.shape)
print()


# Normalization
x_train = x_train / x_train.max()
x_test = x_test / x_test.max()

# Modeling
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, BatchNormalization, Activation

model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(64), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(32), 
    BatchNormalization(),   # BatchNorm between Dense and Activation
    Activation('relu'),
    Dense(32, activation='softmax'), 
])

print(model.summary())


# Compile
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

train set: (60000, 28, 28) (60000,)
test set : (10000, 28, 28) (10000,)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_3 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 256)               200960    
_________________________________________________________________
batch_normalization_9 (Batch (None, 256)               1024      
_________________________________________________________________
activation_9 (Activation)    (None, 256)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 64)                16448     
_________________________________________________________________
batch_normalization_10 (Batc (None, 64)                256       
_______________________________________________

In [17]:
# tensorboard path
log_dir = 'tensorboard'

# define tensorboard callback 
from tensorflow.keras.callbacks import TensorBoard
tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [18]:
history = model.fit(x_train, y_train, 
                    validation_data=(x_test, y_test), 
                    epochs=10, verbose=1, 
                    callbacks=[tensorboard])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
# load tensorboard extention
%load_ext tensorboard

# tensorboard print magic commmand
%tensorboard --logdir {log_dir}

Reusing TensorBoard on port 6006 (pid 9176), started 0:07:42 ago. (Use '!kill 9176' to kill it.)