# Multi-Layer Perceptrons / Dense Layers

In [10]:
# Setup
import tensorflow as tf
import keras

import time as timer
import datetime

import os

### Constants

In [11]:
EPOCH = 20
BATCH = 64

DATADIR = "/Users/mghifary/Work/Code/AI/data"
MODELDIR = "/Users/mghifary/Work/Code/AI/models"

modelpath = os.path.join(MODELDIR, "mnist-mlp-classifier.h5")

In [12]:
# Load dataset
data_path = os.path.join(DATADIR, "mnist.npz")
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data(data_path)
x_train = x_train.astype("float32") / 255.
x_test = x_test.astype("float32") / 255.

(n_train, dx1, dx2) = x_train.shape
n_test = x_test.shape[0]

dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))

# Shuffle and batch
dataset = dataset.shuffle(buffer_size=1024).batch(BATCH)
dataset_test = dataset_test.batch(BATCH)

In [13]:
# Build model
class MLP(keras.Sequential):
    """Multi-layer Perceptron"""

    def __init__(self, num_classes=10):
        super().__init__()

        self.add(keras.layers.Flatten())
        self.add(keras.layers.Dense(256, activation="relu"))
        self.add(keras.layers.Dense(128, activation="relu"))
        self.add(keras.layers.Dense(num_classes))
    
model = MLP(num_classes=10)

model.build(input_shape=(None, dx1, dx2))
model.summary()

Model: "mlp_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_3 (Dense)             (None, 256)               200960    
                                                                 
 dense_4 (Dense)             (None, 128)               32896     
                                                                 
 dense_5 (Dense)             (None, 10)                1290      
                                                                 
Total params: 235146 (918.54 KB)
Trainable params: 235146 (918.54 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


Train model

In [14]:
# Loss and optimizer
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.legacy.Adam(learning_rate=1e-3)


In [15]:
# Define metrics
train_loss = keras.metrics.Mean('train_loss', dtype=tf.float32)
train_accuracy = keras.metrics.SparseCategoricalAccuracy('train_accuracy')

test_loss = keras.metrics.Mean('test_loss', dtype=tf.float32)
test_accuracy = keras.metrics.SparseCategoricalAccuracy('test_accuracy')

In [16]:
# Training and test function
@tf.function
def train_on_batch(model, loss_fn, optimizer, x, y):
    with tf.GradientTape() as tape:
        y_pred = model(x, training=True)
        loss_value = loss_fn(y, y_pred)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    train_loss(loss_value)
    train_accuracy(y, y_pred)

@tf.function
def test_on_batch(model, loss_fn, x, y):
    y_pred = model(x, training=False)
    loss_value = loss_fn(y, y_pred)
    
    test_loss(loss_value)
    test_accuracy(y, y_pred)

In [17]:
# Setup summary writers
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/mlp-mnist/' + current_time + '/train'
test_log_dir = 'logs/mlp-mnist/' + current_time + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

In [18]:
# Do training
for epoch in range(EPOCH):
    # Mini-batch training
    train_duration = 0.0
    for step, (x, y) in enumerate(dataset):
        start_t = timer.time()
        train_on_batch(model, loss_fn, optimizer, x, y)
        elapsed_t = timer.time() - start_t

        train_duration += elapsed_t
    
    # Store log
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', train_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)

    # Test
    test_duration = 0.0
    for step, (xt, yt) in enumerate(dataset_test):
        start_t = timer.time()
        test_on_batch(model, loss_fn, xt, yt)
        elapsed_t = timer.time() - start_t

        test_duration += elapsed_t

    # Store log
    with test_summary_writer.as_default():
        tf.summary.scalar('loss', test_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch)

    print(f"Epoch {epoch+1} - Training [loss: {train_loss.result():.5f}, accuracy: {train_accuracy.result():.3f}] ({train_duration:.3f} secs), Test [loss: {test_loss.result():.5f}. accuracy: {test_accuracy.result():.3f}] ({test_duration:.3f} secs)")

    # Save model
    model.save_weights(modelpath, overwrite=True, save_format=None, options=None)

# end of epoch

Epoch 1 - Training [loss: 0.23135, accuracy: 0.932] (1.307 secs), Test [loss: 0.12660. accuracy: 0.962] (0.150 secs)
Epoch 2 - Training [loss: 0.16052, accuracy: 0.952] (1.167 secs), Test [loss: 0.11741. accuracy: 0.963] (0.099 secs)
Epoch 3 - Training [loss: 0.12660, accuracy: 0.962] (1.219 secs), Test [loss: 0.10509. accuracy: 0.967] (0.088 secs)
Epoch 4 - Training [loss: 0.10539, accuracy: 0.968] (1.172 secs), Test [loss: 0.09923. accuracy: 0.969] (0.088 secs)
Epoch 5 - Training [loss: 0.09087, accuracy: 0.972] (1.148 secs), Test [loss: 0.09553. accuracy: 0.971] (0.089 secs)
Epoch 6 - Training [loss: 0.07973, accuracy: 0.976] (1.209 secs), Test [loss: 0.09332. accuracy: 0.972] (0.089 secs)
Epoch 7 - Training [loss: 0.07135, accuracy: 0.978] (1.155 secs), Test [loss: 0.09064. accuracy: 0.973] (0.088 secs)
Epoch 8 - Training [loss: 0.06496, accuracy: 0.980] (1.185 secs), Test [loss: 0.09178. accuracy: 0.973] (0.090 secs)
Epoch 9 - Training [loss: 0.05945, accuracy: 0.982] (1.198 secs)