# MNIST

### MNIST 데이터 받기

In [16]:
import os
import sys
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import warnings
warnings.simplefilter("ignore")

mnist = input_data.read_data_sets("data/", one_hot=True)
train_data = mnist.train.images
train_label = mnist.train.labels
test_data = mnist.test.images
test_label = mnist.test.labels

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


### MNIST 데이터 확인하기

In [17]:
# size of MNIST
print(train_data.shape)
print(train_label.shape)
print(test_data.shape)
print(test_label.shape)

(55000, 784)
(55000, 10)
(10000, 784)
(10000, 10)


### Data Pre-processing (데이터 전처리)

# Implementation (1)


## Loss function (손실 함수) : Cross Entropy

# <center> \\( L(y_i, f(x_i; W)) = -\frac{1}{n}\sum_{i=1}^{n}\sum_{k=1} y_{i,j} log(f(x_i)_k)\\)</center>


#### get_cross_entropy_loss 함수의 내용을 완성하세요.
#### (Hint : (1) tf.reduce_mean(), tf.reduce_sum(), tf.log() (2) Tensor dimension에 유의 (3) log 함수 사용 시 epsilon 사용하세요.)

In [29]:
def get_cross_entropy_loss(y_true, y_hat, epsilon=1e-8):
    """
    compute cross entropy

    Args:
        y_true: true label
        y_hat: predicted label
        epsilon: small value to prevent NaN in log

    Returns:
        cross entropy loss
    """
    with tf.name_scope('cross_entropy'):
        loss = -tf.reduce_mean(tf.reduce_sum(y_true * tf.log(y_hat + epsilon),axis=1))
        return loss

def get_accuracy(y_true, y_hat):
    """
    compute cross entropy

    Args:
        y_true: true label
        y_hat: predicted label
        epsilon: small value to prevent NaN in log

    Returns:
        cross entropy loss
    """
    with tf.name_scope('accuracy'):
        # Compare the highest indices between the predicted label and the true label
        correct_prediction = tf.equal(tf.argmax(y_hat, 1), tf.argmax(y_true, 1), name='correct_prediction')
        # Compute accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
    return accuracy

## Hyper-parameter (하이퍼 파라미터)

In [30]:
# Set hypyerparameters
learning_rate = 0.01
max_iter = 2000
batch_size = 100

# Implementation (2)
## Linear Classifier (선형 분류기)

## <center> \\( f(x) = W^Tx+b \\)</center>

### linear function을 완성하세요.
### (Hint : (1) weight, bias 선언 (2) tf.get_variable()의 initializer  (3) tf.matmul())

In [31]:
def fc(name, out_dim, inputs):
    """
    Args :
        Inputs : Input tensor
        out_dim : output dimension
        
    Returns:
        inputs * weight + bias
    """
    with tf.variable_scope(name):
        shp = inputs.get_shape().as_list()[1]
        initial = tf.truncated_normal([shp,out_dim],stddev=0.01)
        weight = tf.get_variable('w',initializer=initial)
        y_hat = tf.matmul(inputs,weight)
        return y_hat
        
        



# Implementation (3)

## Model Setting

### 1. Training data 및 Test data의 각각의 image를 한 vector로 만들어서 train_data, test_data에 각각 저장하세요.
#### Hint) 데이터 차원.
### 2. Dataset로부터 받은 데이터(Image, label)를 담을 변수를 각각 x 및 y_true에 선언하세요.
#### Hint) tf.placeholder
### 3. Implementation (2)에서 구현한 linear classifier 함수값과 softmax 함수를 통한 prediction 값을 y_hat에 저장하세요.
#### Hint) tf.nn.softmax
### 4. 3으로부터 얻은 결과를 통해 Implementation (1)에서 구현한 loss function을 통해 얻은 loss를 cross_entropy에 저장하세요.

In [32]:
tf.reset_default_graph()
# Flatten data
###################################################################
#                    Implementation 3-1                           #
###################################################################
# train_data = None
# test_data = None

###################################################################
#                    Implementation 3-2                           #
###################################################################
x = tf.placeholder(tf.float32, [None, 784])
y_true = tf.placeholder(tf.float32, [None, 10])

###################################################################
#                    Implementation 3-3                          #
###################################################################
h1 = fc('layer1',512,x)
y_logits = fc('layer2',10,h1)
y_hat = tf.nn.softmax(y_logits)

###################################################################
#                    Implementation 3-4                           #
###################################################################
cross_entropy = get_cross_entropy_loss(y_true,y_hat)

# Calculate accuracy
accuracy = get_accuracy(y_true, y_hat)
# Make gradient descent op
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)

# Make op to initialize declared variable
init = tf.global_variables_initializer()

with tf.Session() as sess:
    # Initialize variables
    sess.run(init)
    
    # Training
    for step in range(max_iter):
        # Get batch data and label
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # train the network and calculate cross entropy
        _, loss = sess.run([train_step, cross_entropy], feed_dict={x: batch_x, y_true: batch_y})
        # calcualte accuracy
        acc = sess.run(accuracy, feed_dict={x: test_data, y_true: test_label})
        # print loss (cross entropy) and accuracy at every 10th step
        if (step + 1) % 10 == 0:
            print("{}th iteration, loss: {:.4f}, test accuracy: {:.4f}".format(step + 1, loss, acc))

10th iteration, loss: 2.2926, test accuracy: 0.3039
20th iteration, loss: 2.2842, test accuracy: 0.4328
30th iteration, loss: 2.2728, test accuracy: 0.5030
40th iteration, loss: 2.2694, test accuracy: 0.5586
50th iteration, loss: 2.2588, test accuracy: 0.5979
60th iteration, loss: 2.2463, test accuracy: 0.6067
70th iteration, loss: 2.2442, test accuracy: 0.6030
80th iteration, loss: 2.2216, test accuracy: 0.5995
90th iteration, loss: 2.2169, test accuracy: 0.6197
100th iteration, loss: 2.2144, test accuracy: 0.6434
110th iteration, loss: 2.2029, test accuracy: 0.6493
120th iteration, loss: 2.1996, test accuracy: 0.6514
130th iteration, loss: 2.1773, test accuracy: 0.6527
140th iteration, loss: 2.1712, test accuracy: 0.6646
150th iteration, loss: 2.1440, test accuracy: 0.6772
160th iteration, loss: 2.1407, test accuracy: 0.6822
170th iteration, loss: 2.0817, test accuracy: 0.6858
180th iteration, loss: 2.0789, test accuracy: 0.6970
190th iteration, loss: 2.0562, test accuracy: 0.7017
20

1550th iteration, loss: 0.5840, test accuracy: 0.8759
1560th iteration, loss: 0.5370, test accuracy: 0.8761
1570th iteration, loss: 0.5826, test accuracy: 0.8757
1580th iteration, loss: 0.5460, test accuracy: 0.8753
1590th iteration, loss: 0.5187, test accuracy: 0.8769
1600th iteration, loss: 0.3672, test accuracy: 0.8770
1610th iteration, loss: 0.4735, test accuracy: 0.8777
1620th iteration, loss: 0.5797, test accuracy: 0.8778
1630th iteration, loss: 0.4633, test accuracy: 0.8771
1640th iteration, loss: 0.4705, test accuracy: 0.8771
1650th iteration, loss: 0.6281, test accuracy: 0.8774
1660th iteration, loss: 0.4982, test accuracy: 0.8778
1670th iteration, loss: 0.4971, test accuracy: 0.8784
1680th iteration, loss: 0.5071, test accuracy: 0.8791
1690th iteration, loss: 0.3932, test accuracy: 0.8792
1700th iteration, loss: 0.3904, test accuracy: 0.8791
1710th iteration, loss: 0.4569, test accuracy: 0.8802
1720th iteration, loss: 0.3614, test accuracy: 0.8811
1730th iteration, loss: 0.39