# Basic Structure of Training a Model

In [1]:
import tensorflow as tf
from datetime import datetime

## Hyper-parameters

In [2]:
num_classes = 10

epochs = 50
batch_size = 16

## Data pre-process

In [3]:
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train / 255.0
x_test = x_test / 255.0

## Set up a model

In [4]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.flat = tf.keras.layers.Flatten(input_shape=(28, 28))
        self.d1 = tf.keras.layers.Dense(512, activation='relu')
        self.d2 = tf.keras.layers.Dense(256, activation='relu')
        self.d3 = tf.keras.layers.Dense(128, activation='relu')
        self.d4 = tf.keras.layers.Dense(64, activation='relu')
        self.d5 = tf.keras.layers.Dense(32, activation='relu')
        self.d6 = tf.keras.layers.Dense(num_classes, activation='softmax')
        
    def call(self, inputs):
        x = self.flat(inputs)
        x = self.d1(x)
        x = self.d2(x)
        x = self.d3(x)
        x = self.d4(x)
        x = self.d5(x)
        return self.d6(x)
    
model = MyModel()

## Set up callbacks
### checkpoint

In [5]:
checkpoint_path = 'my_checkpoint.ckpt'
checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
                                                save_best_only=True,
                                                save_weights_only=True,
                                                monitor='val_loss',
                                                verbose=1)

### tensorboard

In [6]:
%load_ext tensorboard
# Clear any logs from previous runs
!rm -rf ./logs/

logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

### reduce learning rate

In [7]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                 factor=0.1,
                                                 patience=3,
                                                 min_lr=0.00001)

## Set up optimizer, loss function, & metrics

In [8]:
optimizer = tf.keras.optimizers.RMSprop(0.001)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric_acc = tf.keras.metrics.SparseCategoricalAccuracy()

## Compile the model

In [9]:
model.compile(optimizer, loss, metrics=[metric_acc])

## Fit the model

In [10]:
model.fit(
    x_train,
    y_train,
    validation_data=(x_test, y_test),
    epochs=epochs,
    callbacks=[checkpoint, reduce_lr, tensorboard_callback],
)

Epoch 1/50
Epoch 00001: val_loss improved from inf to 1.53596, saving model to my_checkpoint.ckpt
Epoch 2/50
Epoch 00002: val_loss improved from 1.53596 to 1.52525, saving model to my_checkpoint.ckpt
Epoch 3/50
Epoch 00003: val_loss improved from 1.52525 to 1.51601, saving model to my_checkpoint.ckpt
Epoch 4/50
Epoch 00004: val_loss improved from 1.51601 to 1.50408, saving model to my_checkpoint.ckpt
Epoch 5/50
Epoch 00005: val_loss did not improve from 1.50408
Epoch 6/50
Epoch 00006: val_loss did not improve from 1.50408
Epoch 7/50
Epoch 00007: val_loss did not improve from 1.50408
Epoch 8/50
Epoch 00008: val_loss improved from 1.50408 to 1.49329, saving model to my_checkpoint.ckpt
Epoch 9/50
Epoch 00009: val_loss improved from 1.49329 to 1.49326, saving model to my_checkpoint.ckpt
Epoch 10/50
Epoch 00010: val_loss improved from 1.49326 to 1.49278, saving model to my_checkpoint.ckpt
Epoch 11/50
Epoch 00011: val_loss improved from 1.49278 to 1.49120, saving model to my_checkpoint.ckpt


Epoch 00022: val_loss improved from 1.48767 to 1.48752, saving model to my_checkpoint.ckpt
Epoch 23/50
Epoch 00023: val_loss did not improve from 1.48752
Epoch 24/50
Epoch 00024: val_loss did not improve from 1.48752
Epoch 25/50
Epoch 00025: val_loss did not improve from 1.48752
Epoch 26/50
Epoch 00026: val_loss did not improve from 1.48752
Epoch 27/50
Epoch 00027: val_loss did not improve from 1.48752
Epoch 28/50
Epoch 00028: val_loss improved from 1.48752 to 1.48737, saving model to my_checkpoint.ckpt
Epoch 29/50
Epoch 00029: val_loss did not improve from 1.48737
Epoch 30/50
Epoch 00030: val_loss improved from 1.48737 to 1.48701, saving model to my_checkpoint.ckpt
Epoch 31/50
Epoch 00031: val_loss did not improve from 1.48701
Epoch 32/50
Epoch 00032: val_loss did not improve from 1.48701
Epoch 33/50
Epoch 00033: val_loss did not improve from 1.48701
Epoch 34/50
Epoch 00034: val_loss did not improve from 1.48701
Epoch 35/50
Epoch 00035: val_loss did not improve from 1.48701
Epoch 36/5

Epoch 45/50
Epoch 00045: val_loss did not improve from 1.48701
Epoch 46/50
Epoch 00046: val_loss did not improve from 1.48701
Epoch 47/50
Epoch 00047: val_loss did not improve from 1.48701
Epoch 48/50
Epoch 00048: val_loss did not improve from 1.48701
Epoch 49/50
Epoch 00049: val_loss did not improve from 1.48701
Epoch 50/50
Epoch 00050: val_loss did not improve from 1.48701


<tensorflow.python.keras.callbacks.History at 0x7fac781bc950>

## Check the training process via tensorboard

In [11]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 7257), started 0:19:34 ago. (Use '!kill 7257' to kill it.)

## Load the best weights

In [12]:
model.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fac58c28210>

## Evaluate the model

In [15]:
model.evaluate(x_test, y_test)



[1.4870102405548096, 0.9742000102996826]