# [Tensorflow 1.x]  MNIST

<br>

- MNIST (Mixed National Institute of Standards and Technology database)
- 숫자(0~9) 손글씨 이미지 데이터
- 각 이미지는 가로, 세로 28px의 흑백 이미지로 만들어져 있음
- Training data == 55,000장, Validation data == 5,000장, Test data = 10,000장

<br>

![image](image/mnist2.gif)

![image](image/mnist1.png)

In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

<br>

### 1. Prepare the data

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./mnist/data/", one_hot=True)

Extracting ./mnist/data/train-images-idx3-ubyte.gz
Extracting ./mnist/data/train-labels-idx1-ubyte.gz
Extracting ./mnist/data/t10k-images-idx3-ubyte.gz
Extracting ./mnist/data/t10k-labels-idx1-ubyte.gz


![image](image/mnist3.png)

In [3]:
mnist # Datasets 함수 그룹

Datasets(train=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x0000029C3609A4A8>, validation=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x0000029C4E9A3BE0>, test=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x0000029C4E9A3B38>)

In [4]:
mnist.train

<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet at 0x29c3609a4a8>

In [5]:
dir(mnist.train)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_epochs_completed',
 '_images',
 '_index_in_epoch',
 '_labels',
 '_num_examples',
 'epochs_completed',
 'images',
 'labels',
 'next_batch',
 'num_examples']

In [6]:
type(mnist.train.images)

numpy.ndarray

In [7]:
mnist.train.images.shape # 55000장(행), 784(== 28*28, 열)

(55000, 784)

![image](image/nparray.PNG)

In [8]:
# 이미지에 대한 답이 one-hot vector 형태로 주어짐
pd.DataFrame(mnist.train.labels).head() 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


<br>

### 2. Build the model

In [6]:
# placeholder 생성 / [행, 열]
X = tf.placeholder(tf.float32, [None, 784]) # 총 열의 개수 (28px*28px)
Y = tf.placeholder(tf.float32, [None, 10]) # 정답 열의 수 0~9

2 hidden layers

In [7]:
# 모든 Parameter Theta는 Variable로 선언

W1 = tf.Variable(tf.random_normal([784, 256], stddev=0.01)) # 256: X 열의 수, 랜덤 정규분포 표준편차 0.01
L1 = tf.nn.relu(tf.matmul(X, W1))

In [8]:
W2 = tf.Variable(tf.random_normal([256, 256], stddev=0.01)) # 256: W1 열의 수
L2 = tf.nn.relu(tf.matmul(L1, W2))

In [9]:
W3 = tf.Variable(tf.random_normal([256, 10], stddev=0.01)) # 10: W2 열의 수
model = tf.matmul(L2, W3)

<br>

### 3. Set the criterion

In [17]:
# cost = tf.losses.mean_squared_error(Y, model) # MSE for Regression

# cross-entropy for Classification
cost = tf.losses.softmax_cross_entropy(Y, model) 

# 마지막 퍼셉트론의 열은 output 개수만큼 있어야함 
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)

<br>

### 4. Train the model

In [18]:
# session 초기화
init = tf.global_variables_initializer()
sess = tf.Session()

sess.run(init)

#### mini batch

In [19]:
mnist.train.images.shape

(55000, 784)

In [20]:
batch_size = 100 # 100행 per batch * 550 batches
total_batch = int(mnist.train.num_examples / batch_size) 
print(total_batch)

550


In [21]:
# 550 * 15 = 8250 -> 총 8250 iteration (gd 실행 횟수)
for epoch in range(15):
    total_cost = 0

    for i in range(total_batch): # 55000장 이미지를 대상으로 100장씩 550회 나눠서
        
        # 55000장 중 랜덤으로 100장 꺼내는 코드 (비복원 추출) - mnist에서만 사용
        batch_xs, batch_ys = mnist.train.next_batch(100)
    
        _, cost_val = sess.run([optimizer, cost], feed_dict={X: batch_xs, Y: batch_ys})

        total_cost += cost_val

    # batch 한번당 run 실행 (gd 550번 * 15번)
    test_cost = sess.run([cost], feed_dict={X: mnist.test.images, Y: mnist.test.labels}) # current test error
    
    print('Epoch: {}'.format(epoch+1), 
          '|| Avg. Training cost = {:.3f}'.format(total_cost / total_batch),
          '|| Current Test cost = {:.3f}'.format(test_cost[0]))

print('Learning process is completed!')

Epoch: 1 || Avg. Training cost = 0.411 || Current Test cost = 0.199
Epoch: 2 || Avg. Training cost = 0.150 || Current Test cost = 0.118
Epoch: 3 || Avg. Training cost = 0.095 || Current Test cost = 0.098
Epoch: 4 || Avg. Training cost = 0.069 || Current Test cost = 0.082
Epoch: 5 || Avg. Training cost = 0.053 || Current Test cost = 0.075
Epoch: 6 || Avg. Training cost = 0.039 || Current Test cost = 0.081
Epoch: 7 || Avg. Training cost = 0.032 || Current Test cost = 0.084
Epoch: 8 || Avg. Training cost = 0.023 || Current Test cost = 0.080
Epoch: 9 || Avg. Training cost = 0.020 || Current Test cost = 0.088
Epoch: 10 || Avg. Training cost = 0.020 || Current Test cost = 0.092
Epoch: 11 || Avg. Training cost = 0.017 || Current Test cost = 0.086
Epoch: 12 || Avg. Training cost = 0.013 || Current Test cost = 0.111
Epoch: 13 || Avg. Training cost = 0.010 || Current Test cost = 0.092
Epoch: 14 || Avg. Training cost = 0.012 || Current Test cost = 0.119
Epoch: 15 || Avg. Training cost = 0.015 || 

In [None]:
# batch suffle
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

# # (original)
# for i in range(total_batch): batch_xs, batch_ys = mnist.train.next_batch(batch_size) ~~~
# # (new)
# for batch_xs, batch_ys in shuffle_batch(X_train, y_train, batch_size): ~~~

for epoch in range(15):
    total_cost = 0

    # mini batch
    for batch_xs, batch_ys in shuffle_batch(X_train, y_train, batch_size):
        _, cost_val = sess.run([optimizer, cost], feed_dict={X: batch_xs, Y: batch_ys})
        total_cost += cost_val

    test_cost = sess.run([cost], feed_dict={X: mnist.test.images, Y: mnist.test.labels})
    
    print('Epoch: {}'.format(epoch+1), 
          '|| Avg. Training cost = {:.3f}'.format(total_cost / total_batch),
          '|| Current Test cost = {:.3f}'.format(test_cost[0]))

print('Learning process is completed!')

<br>

### 5. Test the model

데이터 살펴보기

In [22]:
# y_test(logits) shape
sess.run(model, feed_dict={X: mnist.test.images, Y: mnist.test.labels}).shape

(10000, 10)

In [23]:
# logits 
sess.run(model, feed_dict={X: mnist.test.images, Y: mnist.test.labels})[0] # 첫번째 행

array([ -6.176651  ,  -4.1752687 ,   0.27102244,  -3.532663  ,
       -13.568699  , -13.777136  , -29.931406  ,  17.736004  ,
        -6.867575  ,  -5.754381  ], dtype=float32)

In [24]:
# logits + softmax
sess.run(tf.nn.softmax(model), feed_dict={X: mnist.test.images, Y: mnist.test.labels})[0]

array([4.1197032e-11, 3.0482841e-10, 2.6004894e-08, 5.7960908e-10,
       2.5382847e-14, 2.0607121e-14, 1.9874951e-21, 1.0000000e+00,
       2.0644368e-11, 6.2842780e-11], dtype=float32)

In [25]:
# softmax 합은 1
np.sum(sess.run(tf.nn.softmax(model), feed_dict={X: mnist.test.images, Y: mnist.test.labels})[0])

1.0

In [26]:
# argmax : 가장 큰 값의 인덱스 번호
# tf.argmax([0.1 0 0 0.7 0 0.2 0 0 0 0]) -> 3 
sess.run(tf.argmax(model, axis=1), feed_dict={X: mnist.test.images, Y: mnist.test.labels})[0]

7

#### 성능 계산

In [27]:
is_correct = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1)) # model: 예측값, Y: 실제 정답
# 같으면 true, 다르면 false

# 자료형 변환(cast) 후, 차원을 줄이면서(reduce) 평균(mean) 계산
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32)) 

In [28]:
print('정확도 :', sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels}))

정확도 : 0.9786


예측값 확인

In [29]:
predicted_labels = sess.run(tf.argmax(model, 1), feed_dict={X: mnist.test.images, Y: mnist.test.labels})
print(list(predicted_labels)[:10])

[7, 2, 1, 0, 4, 1, 4, 9, 5, 9]


실제 정답

In [30]:
print(np.argmax(mnist.test.labels, 1)[:10])

[7 2 1 0 4 1 4 9 5 9]


---

<br>

## ++) Dropout

일부 뉴런 랜덤으로 삭제(drop)하여 과적합 방지

#### Build the model

In [11]:
# placeholder 동일
X = tf.placeholder(tf.float32, [None, 784]) # 총 열의 개수 (28px*28px)
Y = tf.placeholder(tf.float32, [None, 10]) # 정답 열의 수 0~9

# dropout 비율
keep_prob = tf.placeholder(tf.float32)

# layer
# 전체 인공신경망이 깊을 때(layer가 많을 때) 마지막 혹은 마지막 2개의 layer에 적용
W1 = tf.Variable(tf.random_normal([784, 256], stddev=0.01))
L1 = tf.nn.relu(tf.matmul(X, W1))
L1 = tf.nn.dropout(L1, keep_prob) # Dropout을 적용할 layer, 유지시킬 비율

W2 = tf.Variable(tf.random_normal([256, 256], stddev=0.01))
L2 = tf.nn.relu(tf.matmul(L1, W2))
L2 = tf.nn.dropout(L2, keep_prob) # Dropout을 적용할 layer & 살릴 비율

W3 = tf.Variable(tf.random_normal([256, 10], stddev=0.01))
model = tf.matmul(L2, W3)

#### Set the criterion

In [12]:
cost = tf.losses.softmax_cross_entropy(Y, model) 
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)

#### Train the model

In [15]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [16]:
# mini batch
batch_size = 100
total_batch = int(mnist.train.num_examples / batch_size)
print(total_batch)

550


In [17]:
for epoch in range(15):
    total_cost = 0

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)

        _, cost_val = sess.run([optimizer, cost], feed_dict={X: batch_xs,
                                                             Y: batch_ys,
                                                             keep_prob: 0.8})
        # 살릴 비율 지정, node 중 80%만 유지하고 20%를 train 시마다 off
        
        total_cost += cost_val

    print('Epoch: {}'.format(epoch+1),
          'Avg. cost =', '{:.3f}'.format(total_cost / total_batch))

print('Learning process is completed!')

Epoch: 1 Avg. cost = 0.427
Epoch: 2 Avg. cost = 0.163
Epoch: 3 Avg. cost = 0.115
Epoch: 4 Avg. cost = 0.089
Epoch: 5 Avg. cost = 0.073
Epoch: 6 Avg. cost = 0.060
Epoch: 7 Avg. cost = 0.054
Epoch: 8 Avg. cost = 0.044
Epoch: 9 Avg. cost = 0.041
Epoch: 10 Avg. cost = 0.037
Epoch: 11 Avg. cost = 0.034
Epoch: 12 Avg. cost = 0.030
Epoch: 13 Avg. cost = 0.030
Epoch: 14 Avg. cost = 0.026
Epoch: 15 Avg. cost = 0.024
Learning process is completed!


#### Test the model

train에서는 dropout 사용하나 test에서는 keep_prob를 1(100)으로 줘서 dropout 적용하지 않음

In [18]:
is_correct = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

In [19]:
print('정확도:', sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1}))

정확도: 0.9807


In [20]:
# 예측값 확인
labels = sess.run(tf.argmax(model, 1),
                  feed_dict={X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1}) 
print(labels)

[7 2 1 ... 4 5 6]
