In [1]:
## Load libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
plt.style.use('dark_background')
%matplotlib inline

In [2]:
np.set_printoptions(precision=2)

In [3]:
import tensorflow as tf

In [4]:
tf.__version__

'2.15.0'

---

Convolution example:

![](https://onedrive.live.com/embed?resid=37720F927B6DDC34%21104168&authkey=%21ADjt5cD8McHWdv8&width=660)

---

In [8]:
# Define input volume
input_volume = np.array([[
    [0, 0, 0, 0, 0, 0, 0],
    [0, 1, 0, 1, 2, 1, 0],
    [0, 0, 2, 2, 0, 1, 0],
    [0, 1, 1, 0, 2, 1, 0],
    [0, 0, 2, 1, 1, 0, 0],
    [0, 2, 1, 1, 2, 0, 0],
    [0, 0, 0, 0, 0, 0, 0]
]], dtype=np.float32)

# Add 3rd dimension for volume
input_volume = np.expand_dims(input_volume, axis=-1)

# Define filter (a.k.a. weights)
W = np.array([[
    [ 0, 0, -1],
    [ 0, 1, 0 ],
    [-2, 0, 2 ]
]], dtype=np.float32)

# Define bias
b = np.array([1], dtype=np.float32)

---

Build a model with only one convolutional layer initialized by $\mathbf{W}$ and $b.$

---

In [9]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(
        filters = 1,
        kernel_size = [3, 3],
        kernel_initializer = tf.constant_initializer(W),
        bias_initializer = tf.constant_initializer(b)
    )]
)

---

Feed the input volume into the model and print the output.

---

In [10]:
output = model(input_volume)
print(output.shape)
print(tf.squeeze(output))

(1, 5, 5, 1)
tf.Tensor(
[[ 6.  5. -2.  1.  2.]
 [ 3.  0.  3.  2. -2.]
 [ 4.  2. -1.  0.  0.]
 [ 2.  1.  2. -1. -3.]
 [ 1.  1.  1.  3.  1.]], shape=(5, 5), dtype=float32)


---

Add a max-pooling layer.

---

In [13]:
max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
                                           strides = (1, 1),
                                           padding='valid')

---

Apply max-pooling to the output of the convolution layer

---

In [14]:
tf.squeeze(max_pool_2d(output))

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[6., 5., 3., 2.],
       [4., 3., 3., 2.],
       [4., 2., 2., 0.],
       [2., 2., 3., 3.]], dtype=float32)>

---

MNISTLoader class to read data from the MNIST dataset.

Note that in TensorFlow, a typical representation of an image data set is a four-dimensional tensor of [number of images, width, height, number of color channels]. In the DataLoader class above, self.train_data and self.test_data were loaded with 60,000 and 10,000 handwritten digit images of size 28x28, respectively. Since we are reading a grayscale image here with only one color channel (a regular RGB color image has 3 color channels), we use the np.expand_dims() function to manually add one dimensional channels at the last dimension for the image data.

---

In [22]:
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()
        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):
        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]

---

Define a fully-connected neural network model.

The model accepts a vector (e.g. here a flattened 1×784 handwritten digit image) as input and outputs a 10-dimensional vector representing the probability that this image belongs to 0 to 9 respectively.

---

In [23]:
class FCN(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

---

Define hyperparameters of the model used in training process.

---

In [38]:
num_epochs = 5
batch_size = 1000
learning_rate = 0.001

---

Instantiate the model and data reading classes, and instantiate an optimizer in tf.keras.optimizer (the Adam optimizer is used here).

---

In [36]:
model = CNN()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

---

Model training.

1. A random batch of training data is taken from the
DataLoader.
2. Feed the data into the model, and obtain the predicted value from the model.
3. Calculate the loss function (loss) by comparing the model predicted value with the true value. Here we use the sparse categorical cross-entropy function in tf.keras.losses as a loss function.
4. Calculate the gradient of the loss function on the model variables.
5. The gradients are passed into the optimizer, and use the apply_gradients method to update the model variables so that the loss value is minimized.

---

In [39]:
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
  X, y = data_loader.get_batch(batch_size)
  with tf.GradientTape() as tape:
    y_pred = model(X)
    loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
    loss = tf.reduce_mean(loss)
    print("batch %d: loss %f" % (batch_index, loss.numpy()))
  grads = tape.gradient(loss, model.variables)
  optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

batch 0: loss 0.290286
batch 1: loss 0.265546
batch 2: loss 0.251465
batch 3: loss 0.278521
batch 4: loss 0.277983
batch 5: loss 0.206814
batch 6: loss 0.231549
batch 7: loss 0.242734
batch 8: loss 0.198607
batch 9: loss 0.230701
batch 10: loss 0.181315
batch 11: loss 0.184486
batch 12: loss 0.175068
batch 13: loss 0.228959
batch 14: loss 0.176043
batch 15: loss 0.201831
batch 16: loss 0.204633
batch 17: loss 0.157745
batch 18: loss 0.156266
batch 19: loss 0.122394
batch 20: loss 0.152576
batch 21: loss 0.129373
batch 22: loss 0.140789
batch 23: loss 0.161130
batch 24: loss 0.149253
batch 25: loss 0.158900
batch 26: loss 0.145901
batch 27: loss 0.108223
batch 28: loss 0.111316
batch 29: loss 0.109740
batch 30: loss 0.116526
batch 31: loss 0.136346
batch 32: loss 0.152749
batch 33: loss 0.120175
batch 34: loss 0.090257
batch 35: loss 0.101567
batch 36: loss 0.155162


KeyboardInterrupt: 

---

Output the accuracy of the trained model on the test set.

---

In [34]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data // batch_size)
for batch_index in range(num_batches):
  start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
  y_pred = model.predict(data_loader.test_data[start_index: end_index])
  sparse_categorical_accuracy.update_state(y_true=data_loader.test_label[start_index: end_index], y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())

test accuracy: 0.965300


---

Define CNN Model

---

In [35]:
class CNN(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(
            filters=32,
            kernel_size=[5, 5],
            padding='same',
            activation=tf.nn.relu
        )
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
        self.conv2 = tf.keras.layers.Conv2D(
            filters=64,
            kernel_size=[5, 5],
            padding='same',
            activation=tf.nn.relu
        )
        self.pool2 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
        self.flatten = tf.keras.layers.Reshape(target_shape=(7 * 7 * 64,))
        self.dense1 = tf.keras.layers.Dense(units=1024, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):
        x = self.conv1(inputs)                  # [batch_size, 28, 28, 32]
        x = self.pool1(x)                       # [batch_size, 14, 14, 32]
        x = self.conv2(x)                       # [batch_size, 14, 14, 64]
        x = self.pool2(x)                       # [batch_size, 7, 7, 64]
        x = self.flatten(x)                     # [batch_size, 7 * 7 * 64]
        x = self.dense1(x)                      # [batch_size, 1024]
        x = self.dense2(x)                      # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output