### MNIST image에서 image가 0에서 9중 어떤 숫자인지 예측하는 Bidirectional LSTM
#### - MNIST : 0에서 9까지의 image data로써, 각 이미지는 28x28의 matrix로 구성되어 있음.
#### - 28x28 image matrix를 순차적으로 읽어 학습해야  학습된 matrix가 어떤 숫자인지 알수 있음. 
#### - image matrix를 28번의 Time step으로 나누어 넣은 후 학습.

##### 1) 라이브러리 로드

In [3]:
# coding: utf-8

import tensorflow as tf
import numpy as np
import time
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./mnist/data/", one_hot=True)

np.random.seed(144)

Extracting ./mnist/data/train-images-idx3-ubyte.gz
Extracting ./mnist/data/train-labels-idx1-ubyte.gz
Extracting ./mnist/data/t10k-images-idx3-ubyte.gz
Extracting ./mnist/data/t10k-labels-idx1-ubyte.gz


In [4]:
print(mnist)

Datasets(train=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x00000190AB1AAF60>, validation=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x00000190AB1C9438>, test=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x00000190AB1C95F8>)


##### 2) 변수 초기화

In [5]:
learning_rate = 0.001
training_epochs = 10 # 전체 데이터 학습 반복 횟수 (정확도 향상)
batch_size = 256     # 한번에 처리할 데이터 개수

# 입력되는 이미지 사이즈 28*28
input_size = 28   # input size(=input dimension)는 셀에 입력되는 리스트 길이
input_steps = 28  # input step(=sequence length)은 입력되는 리스트를 몇개의 time-step에 나누어 담을 것인가?  
n_hidden = 128
n_classes = 10    # classification label 개수

##### 3) placeholder 와 variable 선언

In [6]:
X = tf.placeholder(tf.float32,[None, input_steps, input_size])
Y = tf.placeholder(tf.float32,[None, n_classes])

W = tf.Variable(tf.random_normal([n_hidden * 2, n_classes]))
b = tf.Variable(tf.random_normal([n_classes]))

keep_prob = tf.placeholder(tf.float32)

print(X)
print(Y)

Instructions for updating:
Colocations handled automatically by placer.
Tensor("Placeholder:0", shape=(?, 28, 28), dtype=float32)
Tensor("Placeholder_1:0", shape=(?, 10), dtype=float32)


##### 4) 정방향, 역방향 LSTM Cell 2개 생성 (각 Cell에 대한 Dropout으로 Overfitting 방지)

In [5]:
lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(num_units = n_hidden, state_is_tuple = True)
lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_fw_cell, output_keep_prob=keep_prob)
lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(num_units = n_hidden, state_is_tuple = True)
lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_bw_cell, output_keep_prob=keep_prob)

outputs,_ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell, X, dtype = tf.float32)

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


##### 5) 결과 값 병합

In [6]:
outputs_fw = tf.transpose(outputs[0], [1,0,2])
outputs_bw = tf.transpose(outputs[1], [1,0,2])

outputs_concat = tf.concat([outputs_fw[-1], outputs_bw[-1]], axis=1)

pred = tf.matmul(outputs_concat,W) + b

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = pred, labels = Y))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

correct_prediction = tf.equal(tf.argmax(pred,1),tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

##### 6) 학습 및 예측

In [8]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

global_step = 0

start_time = time.time()

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples/batch_size)
    
    for i in range(total_batch):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        batch_x = batch_x.reshape((batch_size, input_steps, input_size)).astype(np.float32)
        
        c, _ = sess.run([cost, optimizer], feed_dict={X:batch_x, Y:batch_y, keep_prob:0.9})
    
        avg_cost += c/total_batch
        
        global_step += 1
    
    test_data = mnist.test.images.reshape((-1, input_steps, input_size))
    test_label = mnist.test.labels
    
    print('Eopch:{:2d}, cost={:9f}'.format((epoch+1), avg_cost))
    print('Accuracy:', accuracy.eval(session=sess, feed_dict={X:test_data, Y:test_label, keep_prob:1.0}))
    
end_time = time.time()
    
print("execution time :", (end_time - start_time))

Eopch: 1, cost= 0.673620
Accuracy: 0.9346
Eopch: 2, cost= 0.186167
Accuracy: 0.9593
Eopch: 3, cost= 0.124113
Accuracy: 0.9655
Eopch: 4, cost= 0.098185
Accuracy: 0.9764
Eopch: 5, cost= 0.081090
Accuracy: 0.9755
Eopch: 6, cost= 0.065739
Accuracy: 0.9793
Eopch: 7, cost= 0.059047
Accuracy: 0.9817
Eopch: 8, cost= 0.048674
Accuracy: 0.9813
Eopch: 9, cost= 0.045496
Accuracy: 0.9828
Eopch:10, cost= 0.038620
Accuracy: 0.9855
execution time : 417.2646760940552
