In [1]:
## tf.kears functional API

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
import os

In [4]:
## 1. set hyper parameters
lr = 0.001
training_epochs = 1
batch_size = 100

# set checkpoint saving directory
cur_dir = os.getcwd()
ckpt_dir_name = 'checkpoints'
model_dir_name = 'mnist_cnn_seq'

checkpoint_dir = os.path.join(cur_dir, ckpt_dir_name, model_dir_name)
os.makedirs(checkpoint_dir, exist_ok = True)

checkpoint_prefix = os.path.join(checkpoint_dir, model_dir_name)

In [5]:
# 2. make a data pipelining

mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [6]:
train_images = train_images.astype(np.float32) / 255.
test_images = test_images.astype(np.float32) / 255.
train_images = np.expand_dims(train_images, axis = -1)  # for 4D shape
test_images = np.expand_dims(test_images, axis = -1)  # for 4D shape

#one hot encoding
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)

In [7]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(buffer_size = 100000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).batch(batch_size)

# shuffle : 고정된 buffer_size만큼 epoch 마다 이미지를 섞어서 오버피팅이 줄도록 도와줌

In [8]:
# Seqeuntial API 단점
# we can't make
# 1. Multi-input models
# 2. Multi-output models
# 3. Models with shared layers(the same layer called several times)
# 4. Models with non=sequential data flow(ex. residual connections)

In [12]:
# 3. build a neural network model using sequential API
def create_model() :
  inputs = keras.Input(shape=(28,28,1))  # input 선언
  conv1 = keras.layers.Conv2D(filters=32, kernel_size = 3, padding='SAME', activation = tf.nn.relu)(inputs)
  # conv1 : by __init__ method / (inputs) : by call method
  pool1 = keras.layers.MaxPool2D(padding = 'SAME')(conv1)
  conv2 = keras.layers.Conv2D(filters=64, kernel_size = 3, padding='SAME', activation = tf.nn.relu)(pool1)
  pool2 = keras.layers.MaxPool2D(padding = 'SAME')(conv2)
  conv3 = keras.layers.Conv2D(filters=128, kernel_size = 3, padding='SAME', activation = tf.nn.relu)(pool2)
  pool3 = keras.layers.MaxPool2D(padding = 'SAME')(conv3)
  pool3_flat = keras.layers.Flatten()(pool3)
  dense4 = keras.layers.Dense(units=356, activation = tf.nn.relu)(pool3_flat)
  drop4 = keras.layers.Dropout(rate=0.4)(dense4)
  # drop out : 인풋 데이터에 40%의 노드들을 무작위로 0으로 만드는 드롭아웃을 적용.
  logits = keras.layers.Dense(units=10)(drop4)
  return keras.Model(inputs = inputs, outputs = logits)

model = create_model()
model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 28, 28, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 64)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 7, 7, 128)         73856 

In [13]:
# ### Implementation of REsidual Block ###
# inputs = keras.Input(shape=(28,28,256))
# conv1 = keras.layers.Conv2D(filters=64, kernel_size=1. padding='SAME', activation = keras.layers.ReLU())(inputs)
# conv2 = keras.layers.Conv2D(filters=64, kernel_size=3. padding='SAME', activation = keras.layers.ReLU())(conv1)
# conv3 = keras.layers.Conv2D(filters=64, kernel_size=3. padding='SAME')(conv2)  # Relu 뒤에서 적용해야해서 activation funcdtion x
# add3 = keras.layers.add([conv3,inputs]) # conv3과 inputs 더하기
# relu3 = keras.layers.ReLU()(add3)
# model = keras.Model(inputs = inputs, outputs = relu3)

In [14]:
# 4,5 define a loss function & calculate a gradient 
def loss_fn(model, images, labels) :
  logits = model(images, training = True) # Dropout 적용됨
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = labels))
  return loss


def grad(model, image, labels) :
  with tf.GradientTape() as tape :
    loss = loss_fn(model, images, labels) 
  return tape.gradient(loss, model.variables) #loss 를 이 모델에 있는 모든 parameter에 대해서 미분한 값을 구해주세요

In [15]:
# 6. select an optimizer - Adam.. etc
# 7. define a metric for model's performance - accuracy etc
# 8. (optional) make a checkpoint for saving

optimizer = tf.keras.optimizers.Adam(learning_rate = lr)

def evaluate(model, images, labels) :
  logits = model(images, training=False) # dropout 적용 x
  correct_prediction = tf.equal(tf.argmax(logits,1), tf.argmax(labels,1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  return accuracy

#tf.equal() 함수는 두 값이 동일하면 True, 다르면 False를 반환하는 함수입니다.
#tf.cast() 함수는 True/False 형태의 값을 1과 0으로 바꾸어주는 함수입니다.

checkpoint = tf.train.Checkpoint(cnn=model)


In [17]:
# 9.  train an validate a neural network model

for epoch in range(training_epochs) :
  avg_loss = 0
  avg_train_acc = 0
  avg_test_acc = 0
  train_step = 0
  test_step = 0

  for images, labels in train_dataset :
    grads = grad(model, images, labels)
    optimizer.apply_gradients(zip(grads, model.variables))
    loss = loss_fn(model, images, labels)
    acc = evaluate(model, images, labels)
    avg_loss = avg_loss + loss
    avg_train_acc = avg_train_acc + acc
    train_step += 1
  
  avg_loss = avg_loss / train_step
  avg_train_acc = avg_train_acc / train_step

  for images, labels in test_dataset:        
    acc = evaluate(model, images, labels)
    avg_test_acc = avg_test_acc + acc
    test_step += 1    
  avg_test_acc = avg_test_acc / test_step    

  print('Epoch:', '{}'.format(epoch + 1), 'loss =', '{:.8f}'.format(avg_loss), 
        'train accuracy = ', '{:.4f}'.format(avg_train_acc), 
        'test accuracy = ', '{:.4f}'.format(avg_test_acc))
    
  checkpoint.save(file_prefix=checkpoint_prefix) # epoch 한번 끝날때마다 모델 저장

print('Learning Finished!')

Epoch: 1 loss = 0.08083422 train accuracy =  0.9805 test accuracy =  0.9889
Learning Finished!
