In [1]:
## tf.keras.Model subclassing

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
import os

In [3]:
## 1. set hyper parameters
lr = 0.001
training_epochs = 1
batch_size = 100

# set checkpoint saving directory
cur_dir = os.getcwd()
ckpt_dir_name = 'checkpoints'
model_dir_name = 'mnist_cnn_seq'

checkpoint_dir = os.path.join(cur_dir, ckpt_dir_name, model_dir_name)
os.makedirs(checkpoint_dir, exist_ok = True)

checkpoint_prefix = os.path.join(checkpoint_dir, model_dir_name)

In [4]:
# 2. make a data pipelining

mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
train_images = train_images.astype(np.float32) / 255.
test_images = test_images.astype(np.float32) / 255.
train_images = np.expand_dims(train_images, axis = -1)  # for 4D shape
test_images = np.expand_dims(test_images, axis = -1)  # for 4D shape

#one hot encoding
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)

In [6]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(buffer_size = 100000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).batch(batch_size)

# shuffle : 고정된 buffer_size만큼 epoch 마다 이미지를 섞어서 오버피팅이 줄도록 도와줌

In [7]:
# ## Model Subclassing
# - build a fully-customizable model by subclassing tf.keras.Model (functional API보다 더 customizing 자유도 높음)
# - Create layers in the __init__ method and set them as attributes of the class instance 
# - define the forward pass in the call method (__init__ method 내에 저장된 layer들의 입력 연결)

In [18]:
# 3. build a neural network model - Model Subclassing / functionalAPI(Sequential API도 사용 가능)
class MNISTModel(tf.keras.Model) :
  def __init__(self) :
    super(MNISTModel, self).__init__()
    self.conv1 = keras.layers.Conv2D(filters = 32, kernel_size = 3, padding = 'SAME', activation = tf.nn.relu)
    self.pool1 = keras.layers.MaxPool2D(padding = 'SAME')
    self.conv2 = keras.layers.Conv2D(filters = 64, kernel_size = 3, padding = 'SAME', activation = tf.nn.relu)
    self.pool2 = keras.layers.MaxPool2D(padding = 'SAME')
    self.conv3 = keras.layers.Conv2D(filters = 128, kernel_size = 3, padding = 'SAME', activation = tf.nn.relu)
    self.pool3 = keras.layers.MaxPool2D(padding = 'SAME')
    self.pool3_flat = keras.layers.Flatten()
    self.dense4 = keras.layers.Dense(units=256, activation=tf.nn.relu)
    self.drop4 = keras.layers.Dropout(rate=0.4)
    self.dense5 = keras.layers.Dense(units = 10)

  def call(self,inputs, training=False) :
    net = self.conv1(inputs)
    net = self.pool1(net)
    net = self.conv2(net)
    net = self.pool2(net)
    net = self.conv3(net)
    net = self.pool3(net)
    net = self.pool3_flat(net)
    net = self.dense4(net)
    net = self.drop4(net)
    net = self.dense5(net)
    return net

model = MNISTModel()

In [19]:
# 4,5 define a loss function & calculate a gradient 
def loss_fn(model, images, labels) :
  logits = model(images, training = True) # Dropout 적용됨
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = labels))
  return loss


def grad(model, image, labels) :
  with tf.GradientTape() as tape :
    loss = loss_fn(model, images, labels) 
  return tape.gradient(loss, model.variables) #loss 를 이 모델에 있는 모든 parameter에 대해서 미분한 값을 구해주세요

In [20]:
# 6. select an optimizer - Adam.. etc
# 7. define a metric for model's performance - accuracy etc
# 8. (optional) make a checkpoint for saving

optimizer = tf.keras.optimizers.Adam(learning_rate = lr)

def evaluate(model, images, labels) :
  logits = model(images, training=False) # dropout 적용 x
  correct_prediction = tf.equal(tf.argmax(logits,1), tf.argmax(labels,1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  return accuracy

#tf.equal() 함수는 두 값이 동일하면 True, 다르면 False를 반환하는 함수입니다.
#tf.cast() 함수는 True/False 형태의 값을 1과 0으로 바꾸어주는 함수입니다.

checkpoint = tf.train.Checkpoint(cnn=model)


In [21]:
# 9.  train an validate a neural network model

for epoch in range(training_epochs) :
  avg_loss = 0
  avg_train_acc = 0
  avg_test_acc = 0
  train_step = 0
  test_step = 0

  for images, labels in train_dataset :
    grads = grad(model, images, labels)
    optimizer.apply_gradients(zip(grads, model.variables))
    loss = loss_fn(model, images, labels)
    acc = evaluate(model, images, labels)
    avg_loss = avg_loss + loss
    avg_train_acc = avg_train_acc + acc
    train_step += 1
  
  avg_loss = avg_loss / train_step
  avg_train_acc = avg_train_acc / train_step

  for images, labels in test_dataset:        
    acc = evaluate(model, images, labels)
    avg_test_acc = avg_test_acc + acc
    test_step += 1    
  avg_test_acc = avg_test_acc / test_step    

  print('Epoch:', '{}'.format(epoch + 1), 'loss =', '{:.8f}'.format(avg_loss), 
        'train accuracy = ', '{:.4f}'.format(avg_train_acc), 
        'test accuracy = ', '{:.4f}'.format(avg_test_acc))
    
  checkpoint.save(file_prefix=checkpoint_prefix) # epoch 한번 끝날때마다 모델 저장

print('Learning Finished!')

Epoch: 1 loss = 0.18933764 train accuracy =  0.9536 test accuracy =  0.9839
Learning Finished!


In [22]:
model.summary()

Model: "mnist_model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           multiple                  320       
                                                                 
 max_pooling2d_9 (MaxPooling  multiple                 0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          multiple                  18496     
                                                                 
 max_pooling2d_10 (MaxPoolin  multiple                 0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          multiple                  73856     
                                                                 
 max_pooling2d_11 (MaxPoolin  multiple               

In [24]:
## class method 사용 : model.summary가 train 후부터 볼 수 있음..
# CNN 모델을 각각 학습 시킨 후, infenrence 시에 힘을 합치는(?) Ensemble 방법 사용 가능  