In [1]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)


import numpy as np
import matplotlib.pyplot as plt

TensorFlow version: 2.8.2


In [2]:
#Hyperparameter
learning_rate = 0.001
training_epochs = 10
batch_size = 100

In [3]:
#dataset processing
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#data normalization
x_train, x_test = x_train/ 255.0, x_test/ 255.0

x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Build dataset pipeline
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(buffer_size=100000).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [24]:
#build model

class mn_Model(tf.keras.Model):
  def __init__(self):
    super(mn_Model, self).__init__()    
    # Define your layers here. 
    self.conv1_1 = tf.keras.layers.Conv2D(28,kernel_size=[3,3],padding='same',activation=tf.nn.relu)
    self.conv1_2 = tf.keras.layers.Conv2D(28,kernel_size=[3,3],padding='same',activation=tf.nn.relu)
    self.pool1_1 = tf.keras.layers.MaxPool2D(pool_size=[2,2],strides=2,padding='same')
    #stack2
    self.conv2_1 = tf.keras.layers.Conv2D(32,kernel_size=[3,3],padding='same',activation=tf.nn.relu)
    self.conv2_2 = tf.keras.layers.Conv2D(32,kernel_size=[3,3],padding='same',activation=tf.nn.relu)
    self.pool2_1 = tf.keras.layers.MaxPool2D(pool_size=[2,2],strides=2,padding='same')
    
    #stack3
    self.conv3_1 = tf.keras.layers.Conv2D(64,kernel_size=[3,3],padding='same',activation=tf.nn.relu)
    self.conv3_2 = tf.keras.layers.Conv2D(64,kernel_size=[3,3],padding='same',activation=tf.nn.relu)
    self.conv3_3 = tf.keras.layers.Conv2D(64,kernel_size=[1,1],padding='same',activation=tf.nn.relu)
    self.pool3_1 = tf.keras.layers.MaxPool2D(pool_size=[2,2],strides=2,padding='same')
    
    #stack4
    self.conv4_1 = tf.keras.layers.Conv2D(128,kernel_size=[3,3],padding='same',activation=tf.nn.relu)
    self.conv4_2 = tf.keras.layers.Conv2D(128,kernel_size=[3,3],padding='same',activation=tf.nn.relu)
    self.conv4_3 = tf.keras.layers.Conv2D(128,kernel_size=[1,1],padding='same',activation=tf.nn.relu)
    self.pool4_1 = tf.keras.layers.MaxPool2D(pool_size=[2,2],strides=2,padding='same')  
    
    #stack5
    self.conv5_1 = tf.keras.layers.Conv2D(256,kernel_size=[3,3],padding='same',activation=tf.nn.relu)
    self.conv5_2 = tf.keras.layers.Conv2D(256,kernel_size=[3,3],padding='same',activation=tf.nn.relu)
    self.conv5_3 = tf.keras.layers.Conv2D(256,kernel_size=[1,1],padding='same',activation=tf.nn.relu)
    self.pool5_1 = tf.keras.layers.MaxPool2D(pool_size=[2,2],strides=2,padding='same') 
    
    self.den1 = tf.keras.layers.Flatten()

    self.den2 = tf.keras.layers.Dense(256, activation='relu')
    self.den3 = tf.keras.layers.Dense(256, activation='relu')
    self.den4 = tf.keras.layers.Dense(10, activation='softmax')
    
  def call(self, inputs, training=False):
    net = self.conv1_1(inputs)
    net = self.conv1_2(net)
    net = self.pool1_1(net)

    net = self.conv2_1(net)
    net = self.conv2_2(net)
    net = self.pool2_1(net)
    
    net = self.conv3_1(net)
    net = self.conv3_2(net)
    net = self.conv3_3(net)
    net = self.pool3_1(net)
    
    net = self.conv4_1(net)
    net = self.conv4_2(net)
    net = self.conv4_3(net)
    net = self.pool4_1(net)
    """
    net = self.conv5_1(net)
    net = self.conv5_2(net)
    net = self.conv5_3(net)
    net = self.pool5_1(net)
    """
    net = self.den1(net)
    net = self.den2(net)
    net = self.den3(net)
    net = self.den4(net)
    
    return net
  

model = mn_Model()

#simple print model
temp_inputs = tf.keras.Input(shape=(28,28,1))
model(temp_inputs)
model.summary()

Model: "mn__model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_43 (Conv2D)          multiple                  280       
                                                                 
 conv2d_44 (Conv2D)          multiple                  7084      
                                                                 
 max_pooling2d_18 (MaxPoolin  multiple                 0         
 g2D)                                                            
                                                                 
 conv2d_45 (Conv2D)          multiple                  8096      
                                                                 
 conv2d_46 (Conv2D)          multiple                  9248      
                                                                 
 max_pooling2d_19 (MaxPoolin  multiple                 0         
 g2D)                                                  

In [13]:
# Loss function
def loss_func(model, images, labels):
    logits = model(images, training=True)
    loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(y_true=tf.argmax(labels, 1), y_pred=logits))
    return loss

# Gradient Function
def grad(model, images, labels):
    with tf.GradientTape() as tape:
        loss = loss_func(model, images, labels)
    return tape.gradient(loss, model.trainable_variables)

# Evaluation Function
def evaluate(models, images, labels):    
    predicts = model(images, training=False)
    correct_predict = tf.equal(tf.argmax(predicts, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
    return accuracy

In [15]:
# learning decay
lr_decay = tf.keras.optimizers.schedules.ExponentialDecay(learning_rate,decay_steps=x_train.shape[0] / batch_size * 5 * 5,decay_rate=0.5,staircase=True)

# Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_decay)

In [25]:
for epoch in range(training_epochs):
    avg_loss = 0.
    avg_train_acc = 0.
    avg_test_acc = 0.
    train_step = 0
    test_step = 0
    
    for images, labels in train_ds:
        grads = grad(model, images, labels)                
        optimizer.apply_gradients(zip(grads, model.variables))
        loss = loss_func(model, images, labels)
        acc = evaluate(model, images, labels)
        avg_loss = avg_loss + loss
        avg_train_acc = avg_train_acc + acc
        train_step += 1
    avg_loss = avg_loss / train_step
    avg_train_acc = avg_train_acc / train_step
    
    for images, labels in test_ds:        
        acc = evaluate(model, images, labels)        
        avg_test_acc = avg_test_acc + acc
        test_step += 1    
    avg_test_acc = avg_test_acc / test_step    

    print('Epoch:', '{}'.format(epoch + 1), 'loss =', '{:.8f}'.format(avg_loss), 
          'train accuracy = ', '{:.4f}'.format(avg_train_acc), 
          'test accuracy = ', '{:.4f}'.format(avg_test_acc))

Epoch: 1 loss = 0.30995238 train accuracy =  0.8936 test accuracy =  0.9774
Epoch: 2 loss = 0.04561888 train accuracy =  0.9867 test accuracy =  0.9872
Epoch: 3 loss = 0.02932960 train accuracy =  0.9916 test accuracy =  0.9895
Epoch: 4 loss = 0.02060622 train accuracy =  0.9944 test accuracy =  0.9897
Epoch: 5 loss = 0.01628811 train accuracy =  0.9955 test accuracy =  0.9898
Epoch: 6 loss = 0.01320840 train accuracy =  0.9962 test accuracy =  0.9924
Epoch: 7 loss = 0.01073407 train accuracy =  0.9974 test accuracy =  0.9900
Epoch: 8 loss = 0.00900526 train accuracy =  0.9976 test accuracy =  0.9910
Epoch: 9 loss = 0.00835432 train accuracy =  0.9980 test accuracy =  0.9924
Epoch: 10 loss = 0.00655830 train accuracy =  0.9984 test accuracy =  0.9913
