# 심층 신경망 - 모델 세부 설정-초기값, 규제, Dropout-MNIST

In [4]:
import tensorflow as tf
from tensorflow import keras

# 데이터셋을 로드
mnist = keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()


# 로드된 데이터셋 확인
print('train set: ', x_train.shape, y_train.shape)
print('test  set: ', x_test.shape, y_test.shape)

# 데이터 정규화


train set:  (60000, 28, 28) (60000,)
test  set:  (10000, 28, 28) (10000,)


## 초기값 설정

In [5]:
from keras.layers import Dense

dense = Dense(256, activation='relu')
dense.get_config()

{'name': 'dense',
 'trainable': True,
 'dtype': 'float32',
 'units': 256,
 'activation': 'relu',
 'use_bias': True,
 'kernel_initializer': {'class_name': 'GlorotUniform',
  'config': {'seed': None}},
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'kernel_regularizer': None,
 'bias_regularizer': None,
 'activity_regularizer': None,
 'kernel_constraint': None,
 'bias_constraint': None}

In [6]:
# he normal 초기화
dense = Dense(256, kernel_initializer='he_normal', activation='relu')
dense.get_config()

{'name': 'dense_1',
 'trainable': True,
 'dtype': 'float32',
 'units': 256,
 'activation': 'relu',
 'use_bias': True,
 'kernel_initializer': {'class_name': 'HeNormal', 'config': {'seed': None}},
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'kernel_regularizer': None,
 'bias_regularizer': None,
 'activity_regularizer': None,
 'kernel_constraint': None,
 'bias_constraint': None}

In [None]:
# 클래스 인스턴스 초기화


In [None]:
print("https://www.tensorflow.org/api_docs/python/tf/keras/initializers")

## 규제

In [7]:
# 기본 값
dense.get_config()

{'name': 'dense_1',
 'trainable': True,
 'dtype': 'float32',
 'units': 256,
 'activation': 'relu',
 'use_bias': True,
 'kernel_initializer': {'class_name': 'HeNormal', 'config': {'seed': None}},
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'kernel_regularizer': None,
 'bias_regularizer': None,
 'activity_regularizer': None,
 'kernel_constraint': None,
 'bias_constraint': None}

In [9]:
# l1 규제 적용
dense = Dense(256, kernel_regularizer='l1', activation='relu')
print(dense.get_config())
# 클래스 인스턴스 적용, alpha 값 변경
regularizer = tf.keras.regularizers.l1(l1=0.1)
dense = Dense(256, kernel_regularizer=regularizer, activation='relu')
dense.get_config()

{'name': 'dense_3', 'trainable': True, 'dtype': 'float32', 'units': 256, 'activation': 'relu', 'use_bias': True, 'kernel_initializer': {'class_name': 'GlorotUniform', 'config': {'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': {'class_name': 'L1', 'config': {'l1': 0.009999999776482582}}, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}


{'name': 'dense_4',
 'trainable': True,
 'dtype': 'float32',
 'units': 256,
 'activation': 'relu',
 'use_bias': True,
 'kernel_initializer': {'class_name': 'GlorotUniform',
  'config': {'seed': None}},
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'kernel_regularizer': {'class_name': 'L1',
  'config': {'l1': 0.10000000149011612}},
 'bias_regularizer': None,
 'activity_regularizer': None,
 'kernel_constraint': None,
 'bias_constraint': None}

## 드랍아웃

In [18]:
# Dropout 25% 비율 적용 (25%의 노드가 삭제)
from keras.layers import Dropout
# DropOut 예제
import numpy as np
data = np.arange(1, 11).reshape(2, 5).astype(np.float32)
layer = Dropout(0.3, input_shape=(2, )) # 확률이기 때문에 30프로를 드랍아웃 해달라는 게 아니고 30퍼센트로 드랍아웃이 될 수도 있고 안될 수도 있다
output = layer(data, training=True)
print(output)

tf.Tensor(
[[ 1.4285715  0.         4.285714   5.714286   0.       ]
 [ 8.571428  10.         0.        12.857143  14.285715 ]], shape=(2, 5), dtype=float32)


##  배치 정규화

In [19]:
# Model A: Dense + ReLU
model_a = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
model_a.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense_5 (Dense)             (None, 64)                50240     
                                                                 
 dense_6 (Dense)             (None, 32)                2080      
                                                                 
 dense_7 (Dense)             (None, 10)                330       
                                                                 
Total params: 52,650
Trainable params: 52,650
Non-trainable params: 0
_________________________________________________________________


In [20]:
# Model A: Dense + ReLU
model_b = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(64),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dense(32),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
model_b.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_8 (Dense)             (None, 64)                50240     
                                                                 
 batch_normalization (BatchN  (None, 64)               256       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 64)                0         
                                                                 
 dense_9 (Dense)             (None, 32)                2080      
                                                                 
 batch_normalization_1 (Batc  (None, 32)               128       
 hNormalization)                                      

In [None]:
# Model B: Dense + BatchNorm + ReLU


## 활성화 함수: relu 이외에 Keras가 지원하는 다른 활성화 함수 사용 가능

In [None]:
# LeakyReLU 기본 설정


# LeakyReLU, alpha=0.2 로 변경


In [None]:
# Model C: Dense + BatchNorm + LeakyReLU(0.2)

# 모델 요약


In [None]:

# Model A: Dense + ReLU

# Model B: Dense + BatchNorm + ReLU

# Model C: Dense + BatchNorm + LeakyReLU(0.2)


In [21]:
# 컴파일
model_a.compile(loss='sparse_categorical_crossentropy', metrics='accuracy', \
    optimizer='adam')
model_b.compile(loss='sparse_categorical_crossentropy', metrics='accuracy', \
    optimizer='adam')
# 학습 
hist_a = model_a.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10)
hist_b = model_b.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10)
# 시각화


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [23]:
import pandas as pd
print(pd.DataFrame(hist_a.history))
print('*'*100)
print(pd.DataFrame(hist_b.history))

       loss  accuracy  val_loss  val_accuracy
0  1.231599  0.773733  0.482620        0.8725
1  0.353221  0.903100  0.288152        0.9217
2  0.254070  0.929833  0.229807        0.9395
3  0.202268  0.944283  0.233044        0.9397
4  0.173622  0.951383  0.191564        0.9527
5  0.145200  0.958967  0.184215        0.9526
6  0.128802  0.963950  0.158323        0.9618
7  0.120013  0.966533  0.160873        0.9607
8  0.111263  0.968867  0.168776        0.9609
9  0.103866  0.971067  0.161019        0.9626
****************************************************************************************************
       loss  accuracy  val_loss  val_accuracy
0  0.322711  0.910633  0.144831        0.9542
1  0.145545  0.956217  0.106282        0.9657
2  0.112406  0.965267  0.092116        0.9703
3  0.094246  0.970633  0.082362        0.9737
4  0.082213  0.973450  0.084436        0.9727
5  0.072316  0.977417  0.078350        0.9754
6  0.065252  0.978517  0.075891        0.9783
7  0.060888  0.980283  0.