In [1]:
# for colab environment
# !pip install tensorflow-gpu==2

In [2]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import datasets

In [14]:
# Hyper-parameters
num_classes = 10
batch_size = 32
epochs = 5

# Data

In [15]:
mnist = tf.keras.datasets.mnist

# load data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# add channel dim
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

# normalization
x_train, x_test = x_train / 255.0, x_test / 255.0

## tf.data

In [16]:
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# need to shuffle on training dataset to prevent bias
train_ds = train_ds.shuffle(buffer_size=1000)
train_ds = train_ds.batch(batch_size=batch_size)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_ds = test_ds.batch(batch_size=batch_size)

### check data size and type

In [17]:
image, label = next(iter(train_ds))

In [18]:
image.shape, label.shape

(TensorShape([32, 28, 28, 1]), TensorShape([32]))

In [19]:
image.dtype

tf.float64


# Model

## Model structure

In [20]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = layers.Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='SAME')
        self.conv2 = layers.Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='SAME')
        self.conv3 = layers.Conv2D(filters=64, kernel_size=3, strides=(1, 1), padding='SAME')
        self.conv4 = layers.Conv2D(filters=64, kernel_size=3, strides=(1, 1), padding='SAME')
        self.relu = layers.Activation('relu')
        self.maxpool = layers.MaxPool2D((2, 2))
        self.dropout = layers.Dropout(0.5)
        
        self.flatten = layers.Flatten()
        self.dense1 = layers.Dense(512)
        self.dense2 = layers.Dense(num_classes)
        self.sofmax = layers.Activation('softmax')
        
    def call(self, X): 
        X = self.conv1(X) # [batch_size, 28, 28, 1] -> [batch_size, 28, 28, 32]
        X = self.relu(X)
        X = self.conv2(X) # [batch_size, 28, 28, 32] -> [batch_size, 28, 28, 32*32]
        X = self.relu(X)
        X = self.maxpool(X) # [batch_size, 28, 28, 32*32] -> [batch_size, 14, 14, 32*32]
        X = self.dropout(X)
        
        X = self.conv3(X) # [batch_size, 14, 14, 32*32] -> [batch_size, 14, 14, 32*32*64]
        X = self.relu(X)
        X = self.conv4(X) # [batch_size, 14, 14, 32*32*64] -> [batch_size, 14, 14, 32*32*64*64]
        X = self.relu(X)
        X = self.maxpool(X) # [batch_size, 14, 14, 32*32*64*64] -> [batch_size, 7, 7, 32*32*64*64]
        X = self.dropout(X)
        
        X = self.flatten(X) # [batch_size, 7, 7, 32*32*64*64] -> [batch_size, 7*7*(32*32*64*64)]
        X = self.dense1(X) # [batch_size, 7*7*(32*32*64*64)] -> [batch_size, 512]
        X = self.relu(X)
        X = self.dropout(X)
        X = self.dense2(X) # [batch_size, 512] -> [batch_size, num_classes]
        return self.sofmax(X)
        
model = MyModel()

# Training

## By compile()

In [21]:
# model.compile(optimizer=tf.keras.optimizers.Adam(), 
#               loss=tf.keras.losses.SparseCategoricalCrossentropy())
# model.fit(train_ds, epochs=epochs)

## Manually

In [22]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

In [23]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='test_accuracy')

In [24]:
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)

In [25]:
@tf.function
def test_step(images, labels):
    predictions = model(images)
    t_loss = loss_object(labels, predictions)
    
    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [26]:
for epoch in range(epochs):
    for images, labels in train_ds:
        train_step(images, labels)
        
    for test_images, test_labels in test_ds:
        test_step(test_images, test_labels)
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss {}, Test Accuracy: {}'
    print(template.format(epoch+1, 
                          train_loss.result(),
                          train_accuracy.result() * 100, 
                          test_loss.result(), 
                          test_accuracy.result() * 100))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1, Loss: 0.09977281093597412, Accuracy: 96.95166778564453, Test Loss 0.037901584059000015, Test Accuracy: 98.77999877929688
Epoch 2, Loss: 0.06836521625518799, Accuracy: 97.89582824707031, Test Loss 0.032650694251060486, Test Accuracy: 99.01000213623047
Epoch 3, Loss: 0.05352446809411049, Accuracy: 98.34333038330078, Test Loss 0.034836623817682266, Test Accuracy: 98.93333435058594
Epoch 4, Loss: 0.044734854251146317, Accuracy: 98.61000061035156, Test Loss 0.03382951766252518, Test Accuracy: 98.95249938964844
Epoch 5, Loss: 0.03848061338067055, Accuracy: 98.80599975585938, Test Loss 0.032135333865880966, Test Accuracy: 99.01399993896484
