# 42. MNIST CNN with Dropout

<p style="text-align: right;">
blackdew7@gmail.com<br>
Your name :
</p>

#### 선행지식
1. TensorFlow 다루기 기초
2. 모델링을 한다는 것에 대한 이해.
3. Supervised Learning 중 Classification에 대한 기본 지식.
4. CNN에 대한 기본 구조와 개념

#### 실습목표
1. CNN의 구조를 Graph로 그려낼 수 있다.
2. 그려낸 Graph를 텐서플로우를 이용해 코딩할 수 있다.
3. Dropout을 이해하고 사용할 수 있다.

#### 사용데이터.

01. Multinomial Classification : http://yann.lecun.com/exdb/mnist/

## 00. 라이브러리 불러오기

In [0]:
# MNIST and Convolutional Neural Network
import tensorflow as tf 
import numpy as np
import random

## 01. 데이터 불러오기 & 전처리

In [0]:
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
print(tf.convert_to_tensor(mnist.train.images).get_shape())

## 02. Prepare Variables for a CNN Graph with TF

In [0]:
# Graph Clear
tf.reset_default_graph()
tf.set_random_seed(2017) # random seeding - reproduct

############################
# Place Holders

X = tf.placeholder(tf.float32, [None, 784])
X_img = tf.reshape(X, [-1, 28, 28, 1])
Y = tf.placeholder(tf.float32, [None, 10])


## 03. Make a CNN Graph with TF

In [0]:
# for Conv Layer 01 filter - shape=(3, 3, 1)
w1 = tf.Variable(tf.random_normal([3, 3, 1, 64], stddev=0.01))
b1 = tf.Variable(tf.random_normal([64], stddev=0.01))
conv1 = tf.nn.conv2d(X_img, w1, strides=[1, 1, 1, 1], padding='SAME')
conv1 = tf.add(conv1, b1)
conv1 = tf.nn.relu(conv1)
pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# for Conv Layer 02 filter - shape=(3, 3, 32)
w2 = tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.01))
b2 = tf.Variable(tf.random_normal([128], stddev=0.01))
conv2 = tf.nn.conv2d(pool1, w2, strides=[1, 1, 1, 1], padding='SAME')
conv2 = tf.add(conv2, b2)
conv2 = tf.nn.relu(conv2)
pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# pool2 => (?, 7, 7, 128)

# fully connected layer 1
flat = tf.reshape(pool2, [-1, 7 * 7 * 128])
wfc1 = tf.Variable(tf.random_normal([7 * 7 * 128, 128], stddev=0.01))
bfc1 = tf.Variable(tf.random_normal([128], stddev=0.01))
fc1 = tf.add(tf.matmul(flat, wfc1), bfc1)
fc1 = tf.nn.relu(fc1, name="L1_fc_relu")

# dropout
keep_prob = tf.placeholder(tf.float32)
fc_drop = tf.nn.dropout(fc1, keep_prob=keep_prob)

# fully connected layer 2
wfc2 = tf.Variable(tf.random_normal([128, 10], stddev=0.01))
bfc2 = tf.Variable(tf.random_normal([10], stddev=0.01))
logits = tf.add(tf.matmul(fc_drop, wfc2), bfc2)

#### Dropout

![image](https://2.bp.blogspot.com/-WXlVLu2mT4g/WGUcrNdmzcI/AAAAAAAALHA/LmUZbEsJHrw4EjpIkGDVgPzZte4rcM8bwCLcB/s1600/dropout.png)

###  Cost & Optimizer

In [0]:
# learning_rate도 placeholder로 만들어보자.
learning_rate = tf.placeholder(tf.float32)

# Cost(loss) function & Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

### 실습 - Make a CNN Graph with TF

In [0]:
'''
CNN Graph 코드를 완성해보자

여러분이 직접 스크립트를 짜야 합니다.
복사하여 붙여넣기를 하지 마시고, 직접 타이핑 하는 것을 권장합니다.
'''

# Graph Clear
tf.reset_default_graph()
tf.set_random_seed(2017) # random seeding - reproduct

############################
# Place Holders

X = tf.placeholder(tf.float32, [None, 784])
X_img = tf.reshape(X, [-1, 28, 28, 1])
Y = tf.placeholder(tf.float32, [None, 10])

kp = tf.placeholder(tf.float32)
lr = tf.placeholder(tf.float32)

#########################
# ConvLayer 01 

with tf.name_scope("layer1_conv"):

    # for Conv Layer 01 filter - shape=(3, 3, 1)
    W1 = 

    # Convolution Layer 01 -> (?, 28, 28, 32)
    conv1 = 
    conv1 = 

    # Pooling Layer 01 -> (?, 14, 14, 32)
    pool1 = 


#########################
# ConvLayer 02

with tf.name_scope("layer2_conv"):

    # for Conv Layer 02 filter - shape=(3, 3, 32)
    W2 = 

    # Convolution Layer 02 -> (?, 14, 14, 32)
    conv2 = 
    conv2 = 

    # Pooling Layer 02 -> (?, 7, 7, 64)
    pool2 = 


#########################
# Fully Connected Layer 01

with tf.name_scope("layer1_fc"):

    # Flatten layer
    flat = 

    # for Final FC 7x7x64 inputs -> 1000
    wfc1 = tf.Variable(tf.random_normal([7 * 7 * 64, 1000], stddev=0.01))
    bfc1 = tf.Variable(tf.random_normal([1000], stddev=0.01))

    # for Final FC Layer 1: 7x7x64 inputs -> 1000
    fc1 = tf.add(tf.matmul(flat, wfc1), bfc1)
    fc1 = tf.nn.relu(fc1)

    # Dropout
    fc_drop = tf.nn.dropout(fc1, keep_prob=kp)

#########################
# Fully Connected Layer 02

with tf.name_scope("layer2_fc"):

    # for Final FC 1000 inputs -> 10
    wfc2 = tf.Variable(tf.random_normal([1000, 10], stddev=0.01))

    # bias
    bfc2 = tf.Variable(tf.random_normal([10]))

    # for Final FC Layer 2: 1000 inputs -> 10
    logits = tf.add(tf.matmul(fc_drop, wfc2), bfc2)


#########################
# Cost & Optimizer

with tf.name_scope("Optimizer"):
    # Cost(loss) function & Optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y), name="cost")
    optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)


## 04. Traning & Evaluation

In [0]:
'''
버그가 있습니다. 버그를 해결해 봅시다.
'''

###########################
# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

###########################
# Training

print('Learning started. It takes sometime.')


# 총 갯수: 55000개를 500개씩 (batch_size) 나누어 훈련 
#        => 110번 (n_of_batches) 훈련하게 된다. 
epochs = 2
batch_size = 500
n_of_batches = int(mnist.train.num_examples / batch_size)

for epoch in range(epochs):
    print("%dth epoch" % (epoch + 1))
    
    for i in range(n_of_batches):
        X_batch, Y_batch = mnist.train.next_batch(batch_size)
        # 학습 진행
#         sess.run(optimizer, feed_dict={X: X_batch, Y: Y_batch})
        sess.run(optimizer, feed_dict={X: X_batch, Y: np.eye(10)[Y_batch], 
                                       keep_prob: 0.7, learning_rate: 0.01})
        
        if ((i + 1) % 10 == 0):
            # 학습 상황 디스플레이
#             loss = sess.run(cost, feed_dict={X: X_batch, Y: Y_batch})
            loss = sess.run(cost, feed_dict={X: X_batch, Y: np.eye(10)[Y_batch], 
                                             keep_prob: 1.0})
            print("%dth records, training cost: %.3f" % (((i + 1) * batch_size), loss))
            
###########################
# Evaluation

# Test model and check accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# accuracy = sess.run(acc, feed_dict={X: mnist.test.images, Y: mnist.test.labels})
accuracy = sess.run(acc, feed_dict={X: mnist.test.images, Y: np.eye(10)[mnist.test.labels], 
                                    keep_prob: 1.0})

print('Accuracy: %.2f' % (accuracy * 100))

In [0]:

# Test model and check accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# accuracy = sess.run(acc, feed_dict={X: mnist.test.images, Y: mnist.test.labels})
accuracy = sess.run(acc, feed_dict={X: mnist.test.images, Y: np.eye(10)[mnist.test.labels], 
                                    keep_prob: 1.0, learning_rate: 0.01})

print('Accuracy: %.2f' % (accuracy * 100))

## 05. 실습 해답

### 05.1 Import Library & Prepare MNIST Data

In [0]:
import tensorflow as tf
import numpy as np

mnist = tf.contrib.learn.datasets.load_dataset("mnist")
print(tf.convert_to_tensor(mnist.train.images).get_shape())


### 05.2 Prepare Variables for a CNN Graph with TF

In [0]:
# Graph Clear
tf.reset_default_graph()
tf.set_random_seed(2017) # random seeding - reproduct

############################
# Place Holders

X = tf.placeholder(tf.float32, [None, 784], name="X")
X_img = tf.reshape(X, [-1, 28, 28, 1], name="X_img")
Y = tf.placeholder(tf.float32, [None, 10], name="Y")

kp = tf.placeholder(tf.float32)
lr = tf.placeholder(tf.float32)


### 05.3 Make a CNN Graph & Run Session with TF

In [0]:


#########################
# ConvLayer 01 

with tf.name_scope("layer1_conv"):

    # for Conv Layer 01 filter - shape=(3, 3, 1)
    w1 = tf.Variable(tf.random_normal([3, 3, 1, 64], stddev=0.01))
    b1 = tf.Variable(tf.random_normal([64], stddev=0.01))

    # Convolution Layer 01 -> (?, 28, 28, 32)
    conv1 = tf.nn.conv2d(X_img, w1, strides=[1, 1, 1, 1], padding='SAME')
    conv1 = tf.add(conv1, b1)
    conv1 = tf.nn.relu(conv1)

    # Pooling Layer 01 -> (?, 14, 14, 32)
    pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


#########################
# ConvLayer 02

with tf.name_scope("layer2_conv"):

    # for Conv Layer 02 filter - shape=(3, 3, 32)
    w2 = tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.01))
    b2 = tf.Variable(tf.random_normal([128], stddev=0.01))

    # Convolution Layer 02 -> (?, 14, 14, 32)
    conv2 = tf.nn.conv2d(pool1, w2, strides=[1, 1, 1, 1], padding='SAME')
    conv2 = tf.add(conv2, b2)
    conv2 = tf.nn.relu(conv2)

    # Pooling Layer 02 -> (?, 7, 7, 64)
    pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


#########################
# Fully Connected Layer 01

with tf.name_scope("layer1_fc"):

    ###############################
    # Flatten layer
    flat = tf.reshape(pool2, [-1, 7 * 7 * 128])

    # for Final FC 7x7x64 inputs -> 1000
    wfc1 = tf.Variable(tf.random_normal([7 * 7 * 128, 1000], stddev=0.01))
    bfc1 = tf.Variable(tf.random_normal([1000], stddev=0.01))

    # for Final FC Layer 1: 7x7x64 inputs -> 1000
    fc1 = tf.add(tf.matmul(flat, wfc1), bfc1)
    fc1 = tf.nn.relu(fc1)

    #########################
    # Dropout
    fc_drop = tf.nn.dropout(fc1, keep_prob=kp)
    #########################


#########################
# Fully Connected Layer 02

with tf.name_scope("layer2_fc"):

    # for Final FC 1000 inputs -> 10
    wfc2 = tf.Variable(tf.random_normal([1000, 10], stddev=0.01))
    bfc2 = tf.Variable(tf.random_normal([10]))

    # for Final FC Layer 2: 1000 inputs -> 10
    logits = tf.add(tf.matmul(fc_drop, wfc2), bfc2)


#########################
# Cost & Optimizer

with tf.name_scope("Optimizer"):
    # Cost(loss) function & Optimizer
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y)
    cost = tf.reduce_mean(cross_entropy)
    optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)


### 05.4 Train & Evaluation

In [0]:
#########################
# Session initialize

sess = tf.Session()
sess.run(tf.global_variables_initializer())

###########################
# Traning

print('Learning started. It takes sometime.')

# 총 갯수: 55000개를 500개씩 (batch_size) 나누어 훈련 
#        => 110번 (n_of_batches) 훈련하게 된다. 
epochs = 10
batch_size = 512
n_of_batches = int(mnist.train.num_examples / batch_size)

for epoch in range(epochs):
    print("%dth epoch" % (epoch + 1))

    for i in range(n_of_batches):
        X_batch, Y_batch = mnist.train.next_batch(batch_size)
        # 학습 진행
        sess.run(optimizer, feed_dict={X: X_batch, Y: np.eye(10)[Y_batch], kp: 0.7, lr: 0.01})
        
        if ((i + 1) % 5 == 0):
            # 학습 상황 디스플레이
            loss = sess.run(cost, feed_dict={X: X_batch, Y: np.eye(10)[Y_batch], kp: 1.0, lr: 0.01})
            print("%dth records, training cost: %.3f" % (((i + 1) * batch_size), loss))
            
print("Training Complete")


###########################
# Evaluation
with tf.name_scope("Prediction"):
    # Test model and check accuracy
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
    acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    accuracy = sess.run(acc, feed_dict={X: mnist.test.images, Y: np.eye(10)[mnist.test.labels], kp: 1.0, lr: 0.01})
    
print('Accuracy: %.2f' % (accuracy * 100))

### 성능을 한 번 확인해 보자. 제대로 되고 있는걸까?

In [0]:
# Get one and predict
r = random.randint(0, mnist.test.num_examples - 1)
print("Label: ", sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1)))
print("Prediction: ", sess.run(tf.argmax(logits, 1), 
                               feed_dict={X: mnist.test.images[r:r + 1],
                                         keep_prob: 1.0}))

import matplotlib.pyplot as plt
plt.imshow(mnist.test.images[r:r + 1].reshape(28, 28), cmap='Greys', interpolation='nearest')
plt.show()

In [0]:
# 세션을 종료하자
sess.close()