In [2]:
import tensorflow as tf

# 케라스의 내장 데이터셋에서 mnist 데이터셋 로드
mnist = tf.keras.datasets.mnist

# load_data()로 데이터셋 로드
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 로드된 데이터셋 확인
print('train set: ', x_train.shape, y_train.shape)
print('test set: ', x_test.shape, y_test.shape)

# 데이터 정규화
x_train = x_train/x_train.max() # max: 255
x_test = x_test/x_test.max() # max: 255

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
train set:  (60000, 28, 28) (60000,)
test set:  (10000, 28, 28) (10000,)


In [3]:
# 초기값 설정
dense = tf.keras.layers.Dense(256, activation='relu')
dense.get_config()['kernel_initializer']

{'class_name': 'GlorotUniform', 'config': {'seed': None}}

In [4]:
# 문자열 초기화
dense = tf.keras.layers.Dense(256, kernel_initializer='he_normal', activation='relu')
print(dense.get_config()['kernel_initializer'])

# 클래스 인스턴스 초기화
he_normal = tf.keras.initializers.HeNormal()
dense = tf.keras.layers.Dense(256, kernel_initializer=he_normal, activation='relu')
print(dense.get_config()['kernel_initializer'])

{'class_name': 'HeNormal', 'config': {'seed': None}}
{'class_name': 'HeNormal', 'config': {'seed': None}}


In [5]:
# 규제
# 기본값
dense = tf.keras.layers.Dense(256, activation='relu')
dense.get_config()

{'activation': 'relu',
 'activity_regularizer': None,
 'bias_constraint': None,
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'bias_regularizer': None,
 'dtype': 'float32',
 'kernel_constraint': None,
 'kernel_initializer': {'class_name': 'GlorotUniform',
  'config': {'seed': None}},
 'kernel_regularizer': None,
 'name': 'dense_3',
 'trainable': True,
 'units': 256,
 'use_bias': True}

In [6]:
# L1규제 적용
dense = tf.keras.layers.Dense(256, kernel_regularizer='l1', activation='relu')
print(dense.get_config()['kernel_regularizer'])

# 클래스 인스턴스 적용, alpha=0.1 변경
regularizer = tf.keras.regularizers.l1(l1=0.1)
dense = tf.keras.layers.Dense(256, kernel_regularizer=regularizer, activation='relu')
print(dense.get_config()['kernel_regularizer'])

{'class_name': 'L1', 'config': {'l1': 0.009999999776482582}}
{'class_name': 'L1', 'config': {'l1': 0.10000000149011612}}


In [7]:
# dropout 25% 비율 적용 (25%의 노드가 삭제)
tf.keras.layers.Dropout(0.25)

<keras.layers.core.Dropout at 0x7f3db82af350>

In [8]:
# dense에 직접 추가 => A모델 // Batch Normalization 적용 => B모델 성능 비교
# Model A: Dense
model_a = tf.keras.Sequential([
                               tf.keras.layers.Flatten(input_shape=(28, 28)),
                               tf.keras.layers.Dense(64, activation='relu'),
                               tf.keras.layers.Dense(32, activation='relu'),
                               tf.keras.layers.Dense(10, activation='softmax'),
])

model_a.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 64)                50240     
_________________________________________________________________
dense_7 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_8 (Dense)              (None, 10)                330       
Total params: 52,650
Trainable params: 52,650
Non-trainable params: 0
_________________________________________________________________


In [9]:
# Model B: Dense + BatchNorm + ReLu
model_b = tf.keras.Sequential([
                               tf.keras.layers.Flatten(input_shape=(28, 28)),
                               tf.keras.layers.Dense(64),
                               # 배치 정규화 적용
                               tf.keras.layers.BatchNormalization(),
                               # 배치 정규화 후 활성화 함수 적용

                               tf.keras.layers.Activation('relu'),
                               tf.keras.layers.Dense(32),
                               tf.keras.layers.BatchNormalization(),
                               tf.keras.layers.Activation('relu'),

                               tf.keras.layers.Dense(10, activation='softmax'),
])

model_b.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 64)                50240     
_________________________________________________________________
batch_normalization (BatchNo (None, 64)                256       
_________________________________________________________________
activation (Activation)      (None, 64)                0         
_________________________________________________________________
dense_10 (Dense)             (None, 32)                2080      
_________________________________________________________________
batch_normalization_1 (Batch (None, 32)                128       
_________________________________________________________________
activation_1 (Activation)    (None, 32)               

In [10]:
# LeakyReLU 기본 설정
tf.keras.layers.LeakyReLU()

# LeakyReLU, alpha=0.2로 변경
tf.keras.layers.LeakyReLU(alpha=0.2)

<keras.layers.advanced_activations.LeakyReLU at 0x7f3db50914d0>

In [11]:
# Model C: Dense + BatchNorm + LeakyReLU(0.2)
model_c = tf.keras.Sequential([
                               tf.keras.layers.Flatten(input_shape=(28, 28)),
                               tf.keras.layers.Dense(64),
                               tf.keras.layers.BatchNormalization(),
                               tf.keras.layers.LeakyReLU(alpha=0.2),

                               tf.keras.layers.Dense(32),
                               tf.keras.layers.BatchNormalization(),
                               tf.keras.layers.LeakyReLU(alpha=0.2),

                               tf.keras.layers.Dense(10, activation='softmax')
])

model_c.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 64)                50240     
_________________________________________________________________
batch_normalization_2 (Batch (None, 64)                256       
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 64)                0         
_________________________________________________________________
dense_13 (Dense)             (None, 32)                2080      
_________________________________________________________________
batch_normalization_3 (Batch (None, 32)                128       
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 32)               

In [12]:
model_a.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuacy'])
model_b.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuacy'])
model_c.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuacy'])

history_a = model_a.fit(x_train, y_train,
                        validation_data=(x_test, y_test),
                        epochs=10)
history_b = model_b.fit(x_train, y_train,
                        validation_data=(x_test, y_test),
                        epochs=10)
history_c = model_c.fit(x_train, y_train,
                        validation_data=(x_test, y_test),
                        epochs=10)

Epoch 1/10


ValueError: ignored

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize = (12, 9))
plt.plot(np.arange(1, 11), history_a.history['val_loss'], color='navy', linestyle=':')
plt.plot(np.arange(1, 11), history_b.history['val_loss'], color='tomato', linestyle='-.')
plt.plot(np.arange(1, 11), history_c.history['val_loss'], color='green', linestyle='-')

plt.title('Losses', fontsize=20)
plt.xlabel('epochs')
plt.ylabel('Losses')
plt.legend(['ReLU', 'BatchNorm + ReLU', 'Batchnorm + LeakyReLU'], fontsize=12)
plt.show