In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers

import numpy as np
import matplotlib.pyplot as plt
import random

random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [None]:
def plot_result(hist, loss_min=None, loss_max=None):
    
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(hist.epoch, hist.history['loss'], label="train")
    plt.plot(hist.epoch, hist.history['val_loss'], label='validation')
    plt.title('Loss')
    if loss_min!=None and loss_max!=None:
        plt.ylim(loss_min, loss_max)
        
    plt.xlabel('에폭수')
    plt.ylabel('loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(hist.epoch, hist.history['accuracy'], label='train')
    plt.plot(hist.epoch, hist.history['val_accuracy'], label='validation')
    plt.title('Accuracy')
    plt.xlabel('에폭수')
    plt.ylabel('accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
# MNIST 데이터셋 로딩
(train_image, train_label), (test_image, test_label)  = keras.datasets.mnist.load_data()

# 전처리 - X: scaling, y: one hot encoding 처리
y_train = keras.utils.to_categorical(train_label)
y_test = keras.utils.to_categorical(test_label)

X_train = train_image.astype('float32')/255
X_test = test_image.astype('float32')/255

print(y_train.shape, y_test.shape)
print(X_train.dtype, X_test.dtype)

In [None]:
# 하이퍼파리미터
LEARNING_RATE = 0.001
N_EPOCH = 20
N_BATCH = 1000

In [None]:
# Dataset 생성
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))\
                               .shuffle(60000).batch(N_BATCH, drop_remainder=True)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(N_BATCH)

print(len(train_dataset), len(test_dataset))

# 모델의 크기 변경 -> 성능 확인

In [None]:
def get_small_model():
    model = keras.Sequential()
    model.add(layers.Flatten(input_shape=(28,28))) 
    model.add(layers.Dense(10, activation='softmax', name='output_layer'))
    
    model.compile(optimizer=optimizers.Adam(learning_rate=LEARNING_RATE), 
                  loss='categorical_crossentropy', metrics=['accuracy'])
    return model    

In [None]:
small_model = get_small_model()
small_model.summary()

In [None]:
hist = small_model.fit(train_dataset, epochs=N_EPOCH, validation_data=test_dataset)

In [None]:
# 시각화
plot_result(hist)#, 0.25, 0.5)

In [None]:
def get_big_model():
    model = keras.Sequential()
    model.add(layers.Flatten(input_shape=(28,28)))
    
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    
    model.add(layers.Dense(10, activation='softmax', name='output_layer'))
    
    model.compile(optimizer=optimizers.Adam(learning_rate=LEARNING_RATE), 
                  loss='categorical_crossentropy', metrics=['accuracy'])
    return model    

In [None]:
big_model = get_big_model()
big_model.summary()

In [None]:
hist = big_model.fit(train_dataset, epochs=N_EPOCH, validation_data=test_dataset)

In [None]:
plot_result(hist, 0.05, 0.15)

In [None]:
print(small_model.evaluate(test_dataset))
print(big_model.evaluate(test_dataset))

# Dropout Layer

In [None]:
# Dropout Layer는 Dropout을 적용하려는 Layer 앞에 정의한다.
def get_dropout_model():
    model = keras.Sequential()
    model.add(layers.Flatten(input_shape=(28,28)))
    
    model.add(layers.Dropout(rate=0.3))
    model.add(layers.Dense(512, activation='relu'))    
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(512, activation='relu'))    
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Dense(256, activation='relu'))    
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(256, activation='relu'))    
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Dense(128, activation='relu'))    
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Dense(64, activation='relu'))    
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Dense(10, activation='softmax', name='output_layer')) # output layer
    
    model.compile(optimizer=optimizers.Adam(learning_rate=LEARNING_RATE), 
                  loss='categorical_crossentropy', metrics=['accuracy'])
    return model    

In [None]:
dropout_model = get_dropout_model()
dropout_model.summary()

In [None]:
# Dropout Layer를 사용하면 사용하지 않았을 때 보다 epoch을 더 늘려야 한다.
hist = dropout_model.fit(train_dataset, epochs=N_EPOCH, validation_data=test_dataset)

In [None]:
plot_result(hist)

# Batch Normalization 예제

In [None]:
def get_model_bn(lr=0.01):
    
    model = keras.Sequential()
    model.add(layers.Flatten(input_shape=(28,28)))
    
    
    # (Dropout Layer ->) Dense Layer -> Batch Normalization -> ReLU(Activation)
    model.add(layers.Dense(256))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    
    model.add(layers.Dense(256))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    
    model.add(layers.Dense(128))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    
    model.add(layers.Dense(128))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    
    model.add(layers.Dense(64))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    
    model.add(layers.Dense(64))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    
    # output layer
    model.add(layers.Dense(10))
    model.add(layers.Softmax())
    
    model.compile(optimizer=optimizers.Adam(learning_rate=lr), 
                  loss='categorical_crossentropy', metrics=['accuracy'])
    return model    

In [None]:
model_bn = get_model_bn(LEARNING_RATE)
model_bn.summary()

In [None]:
hist = model_bn.fit(train_dataset, epochs=N_EPOCH, validation_data=test_dataset)

In [None]:
plot_result(hist)

# Learning Rate 변화를 통한 성능개선

## Learning Rate scheduler 를 이용
- 특정 step 마다 learning rate를 변경

In [None]:
lr_scheduler = optimizers.schedules.ExponentialDecay(
    initial_learning_rate=LEARNING_RATE,
    decay_step=len(train_dataset)*5,
    decay_rate=0.5,
    staircase=True
)


In [None]:
def get_model_lr(initial_lr=0.01):
    model = keras.Sequential()
    model.add(layers.Flatten(input_shape=(28,28)))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10, activation='softmax', name='output_layer'))
    
    lr_sch = optimizers.schedules.ExponentialDecay(initial_learning_rate=initial_lr, 
                                                   decay_steps=len(train_dataset) * 5, 
                                                   decay_rate=0.5, 
                                                   staircase=True)
    
    model.compile(optimizer=optimizers.Adam(learning_rate=lr_sch),
                  loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
model_lr = get_model_lr(LEARNING_RATE)
hist = model_lr.fit(train_dataset, epochs=N_EPOCH, validation_data=test_dataset)

## callback 을 이용해 학습률 변경
- ReduceLROnPlateau callback을 이용

In [None]:
def get_model_lr2(initial_lr=0.01):
    model = keras.Sequential()
    model.add(layers.Flatten(input_shape=(28,28)))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10, activation='softmax', name='output_layer'))
    
    model.compile(optimizer=optimizers.Adam(learning_rate=initial_lr),
                  loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
model_lr2 = get_model_lr2(LEARNING_RATE)

In [None]:
lr_callback = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                                factor=0.5, 
                                                patience=3, 
                                                verbose=2
                                               )

hist = model_lr2.fit(train_dataset, epochs=N_EPOCH+10, validation_data=test_dataset, 
                    callbacks=[lr_callback])