## MNIST
<br>이제 higher level API(tf.layers 등)를 적극 활용하면서 필요에 따라 앞서 배운 low level API(tf.nn)를 활용해 세부적인 model tuning이 가능합니다. (https://goo.gl/Rmy8qq)
<br>
<br><span style="color:red;"> - 더욱 편하게 layer 를 구성할 수 있도록 돕는 **tf.layers** 를 적용합니다.
<br>- 모델 Parameter 초기화 방법 중 하나인 **He initialization**을 적용합니다.
<br>- layers.dropout()을 통해 **Dropout**을 layer마다 다른 비율로 적용할 수 있습니다.</span>
<br><br>

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import layers

In [2]:
import os
tf.logging.set_verbosity(tf.logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # https://stackoverflow.com/questions/35911252/disable-tensorflow-debugging-information

In [3]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./mnist/data/", one_hot=True)

Extracting ./mnist/data/train-images-idx3-ubyte.gz
Extracting ./mnist/data/train-labels-idx1-ubyte.gz
Extracting ./mnist/data/t10k-images-idx3-ubyte.gz
Extracting ./mnist/data/t10k-labels-idx1-ubyte.gz


In [4]:
# 각종 placeholder 들을 선언해줍니다.

X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

dropout_sign = tf.placeholder(tf.bool) # layers.dropout() 은 True/False 로 Training/Testing 여부를 결정해 줄 수 있습니다.

## TensorFlow Function System
1. tf.nn
2. tf.layers
3. tf.contrib

Fully-Connected NN  
Dense Network  
Feed Forward NN  
-> 모두 같은 이름

# 1. No dropout, No initializers

In [5]:
# 앞서 어렵게 진행했던 layer architecture 구성을 다음과 같이 편하게 할 수 있습니다.

L1 = layers.dense(X, 256, activation=tf.nn.relu) # Hidden layer 1
L2 = layers.dense(L1, 256, activation=tf.nn.relu) # Hidden layer 2
model = layers.dense(L2, 10, activation=None) # 10 == # of label's columns

# 2. Dropout, Initializers, Tw

In [12]:
# 위의 코드를 아래와 같이 변경하면 he 초기화 방법과 dropout을 추가로 적용할 수 있습니다.

L1 = layers.dense(X, 256, activation=tf.nn.relu, kernel_initializer=tf.keras.initializers.he_normal()) # he (keras)
L1 = layers.dropout(L1, rate=0.2, training=dropout_sign) # layers.dropout()의 "rate"는 keeping rate가 아닌 dropping rate이며, default 값은 0.5입니다.
L2 = layers.dense(L1, 256, activation=tf.nn.relu, kernel_initializer=tf.keras.initializers.he_normal()) # he (keras)
L2 = layers.dropout(L2, rate=0.2, training=dropout_sign) 
model = layers.dense(L2, 10, activation=None) # 10 == # of label's columns

# 3. 

In [23]:
# layers.dense(inputs, outputs(은닉층의 노드 수), activation_fn, kernel_init(가중치 초기화 방법), use_bias=True(default))
# - output result 는 [ activation(inputs * kernel + bias) ] 의 방식으로 계산됩니다.
# - kernel_initializer(가중치 초기화 방법)의 default 값 : [ glorot uniform initializer (= Xavier init) ], 설명 @ https://goo.gl/2Av59i
# - bias_initializer(bias 초기화 방법)의 default 값 : [ tf.zeros_initializer() (= 0) ]

L1 = layers.dense(X, 256, activation=tf.nn.relu, kernel_initializer=tf.keras.initializers.he_normal()) # he (keras, he_uniform() is also available)
# L1 = layers.dense(X, 256, activation=tf.nn.relu, kernel_initializer=tf.contrib.layers.variance_scaling_initializer()) # he (contrib)
# L1 = layers.dense(X, 256, activation=tf.nn.relu, kernel_initializer=tf.keras.initializers.glorot_normal()) # xavier (keras, glorot_uniform() is also available)
# L1 = layers.dense(X, 256, activation=tf.nn.relu, kernel_initializer=tf.contrib.layers.xavier_initializer()) # xavier (contrib)
L1 = layers.dropout(L1, rate=0.2, training=dropout_sign)

L2 = layers.dense(L1, 256, activation=tf.nn.relu, kernel_initializer=tf.keras.initializers.he_normal()) # he (keras, he_uniform() is also available)
# L2 = layers.dense(L1, 256, activation=tf.nn.relu, kernel_initializer=tf.contrib.layers.variance_scaling_initializer()) # he (contrib)
# L2 = layers.dense(L1, 256, activation=tf.nn.relu, kernel_initializer=tf.contrib.layers.xavier_initializer()) # xavier (contrib)
# L2 = layers.dense(L1, 256, activation=tf.nn.relu, kernel_initializer=tf.keras.initializers.glorot_normal()) # xavier (keras, glorot_uniform() is also available)
L2 = layers.dropout(L2, rate=0.2, training=dropout_sign)

model = layers.dense(L2, 10, activation=None) 


# (extra) 초기화 함수 간 관계는 아래와 같습니다. (동일한 것 중 어느 것이든 택하여 쓰셔도 됩니다) @ https://goo.gl/XZESC6
# contrib.layers.variance_scaling_initializer(uniform=False) == tf.keras.initializers.he_normal()
# contrib.layers.variance_scaling_initializer(uniform=True) == tf.keras.initializers.he_uniform()
# tf.contrib.layers.xavier_initializer(uniform=False) == keras.initializers.glorot_normal()
# tf.contrib.layers.xavier_initializer(uniform=True) == keras.initializers.glorot_uniform()
# * 가능한 가중치 초기화 방식 (keras) @ https://goo.gl/zia5uK 


### Regularization

In [None]:
# (extra) 추가로 원할 경우 layers.dense()에 L1 이나 L2 정규화를 적용할 수 있습니다.
# ,kernel_regularizer=tf.contrib.layers.l1_regularizer()
# ,kernel_regularizer=tf.contrib.layers.l2_regularizer()

# (extra) tf.layers.dense() 대신 tf.contrib.layers.fully_connencted()를 활용할 수도 있습니다.
# -> L1 = tf.contrib.layers.fully_connected(X, 256, activation_fn=tf.nn.relu, weights_initializer=tf.keras.initializers.he_normal())
# * 2가지 방식 모두 근본적으로 동일합니다 (fully_connected()가 사실 dense()를 호출합니다. fully_connected()는 dense()에 몇 가지 추가적인 기능을 더한 함수입니다. fully_connected()는 기본 활성화함수가 relu이며 dense()는 linear입니다.) @ https://goo.gl/ayVudM
# * contrib module containing volatile or experimental code. (Ops for building neural network layers, regularizers, summaries, etc.)

In [13]:
# tf.losses 에는 보다 편하게 cost function 들을 구성할 수 있는 함수들이 구현되어 있습니다.

cost = tf.losses.softmax_cross_entropy(Y, model) 
optimizer = tf.train.AdamOptimizer(1e-3).minimize(cost) # 1e-3 == 0.001

In [14]:
is_correct = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

In [15]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [16]:
batch_size = 100
total_batch = mnist.train.num_examples // batch_size # == int(mnist.train.num_examples / batch_size)
total_batch

550

In [17]:
# from tqdm import trange, tqdm_notebook
# for epoch in tqdm_notebook(range(15)):

for epoch in range(15):
    train_cp = [] # Training accuracy 를 동시에 출력해보도록 합니다.
    total_cost = 0 # cost

    for _ in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        
        _, cost_val = sess.run([optimizer, cost], feed_dict={X: batch_xs, Y: batch_ys, 
                                                             dropout_sign: True}) 
        
        # 매 Epoch마다 Total cost를 출력합니다.
        total_cost += cost_val # cost

        # 매 Epoch마다 Training accuracy를 출력합니다. (dropout_sign을 False로 바꾸어 dropout을 걷어내줘야 합니다.)
        train_cp += sess.run([is_correct], feed_dict={X: batch_xs, Y: batch_ys, 
                                                      dropout_sign: False}) 
        
    print('Epoch:', '%04d' % (epoch + 1), 
          '|| Avg. cost =', '{:.3f}'.format(total_cost / total_batch), # cost
          '|| Training accuracy : {:.3f}'.format(np.mean(train_cp))) # Training accuracy
    
print('Learning process is completed!')

Epoch: 0001 || Avg. cost = 0.300 || Training accuracy : 0.934
Epoch: 0002 || Avg. cost = 0.129 || Training accuracy : 0.973
Epoch: 0003 || Avg. cost = 0.093 || Training accuracy : 0.982
Epoch: 0004 || Avg. cost = 0.072 || Training accuracy : 0.988
Epoch: 0005 || Avg. cost = 0.062 || Training accuracy : 0.990
Epoch: 0006 || Avg. cost = 0.051 || Training accuracy : 0.992
Epoch: 0007 || Avg. cost = 0.045 || Training accuracy : 0.994
Epoch: 0008 || Avg. cost = 0.039 || Training accuracy : 0.995
Epoch: 0009 || Avg. cost = 0.038 || Training accuracy : 0.996
Epoch: 0010 || Avg. cost = 0.034 || Training accuracy : 0.997
Epoch: 0011 || Avg. cost = 0.031 || Training accuracy : 0.997
Epoch: 0012 || Avg. cost = 0.030 || Training accuracy : 0.997
Epoch: 0013 || Avg. cost = 0.027 || Training accuracy : 0.998
Epoch: 0014 || Avg. cost = 0.024 || Training accuracy : 0.998
Epoch: 0015 || Avg. cost = 0.024 || Training accuracy : 0.999
Learning process is completed!


In [18]:
# Test accuracy 를 출력합니다. dropout_sign을 False로 바꾸어 dropout을 걷어내줘야 합니다.
print('Test accuracy : {}'.format(sess.run(accuracy, 
                                           feed_dict={
                                               X: mnist.test.images, 
                                               Y: mnist.test.labels,
                                               dropout_sign: False})))

Test accuracy : 0.9818999767303467
