# MNIST

### MNIST 데이터 받기

In [1]:
import os
import sys
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("data/", one_hot=True)
train_data = mnist.train.images
train_label = mnist.train.labels
test_data = mnist.test.images
test_label = mnist.test.labels

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


### MNIST 데이터 확인하기

In [2]:
# size of MNIST
print(train_data.shape)
print(train_label.shape)
print(test_data.shape)
print(test_label.shape)

(55000, 784)
(55000, 10)
(10000, 784)
(10000, 10)


### Data Pre-processing (데이터 전처리)

# Implementation (1)


## Loss function (손실 함수) : Cross Entropy

# <center> \\( L(y_i, f(x_i; W)) = -\frac{1}{n}\sum_{i=1}^{n}\sum_{k=1} y_{i,j} log(f(x_i)_k)\\)</center>


#### get_cross_entropy_loss 함수의 내용을 완성하세요.
#### (Hint : (1) tf.reduce_mean(), tf.reduce_sum(), tf.log() (2) Tensor dimension에 유의 (3) log 함수 사용 시 epsilon 사용하세요.)

In [3]:
def get_cross_entropy_loss(y_true, y_hat, epsilon=1e-8):
    """
    compute cross entropy

    Args:
        y_true: true label
        y_hat: predicted label
        epsilon: small value to prevent NaN in log

    Returns:
        cross entropy loss
    """
    with tf.name_scope('get'):
        loss = -tf.reduce_mean(tf.reduce_sum(y_true * tf.log(y_hat), axis = 1))
#         loss = y_ture * tf.log(y_hat)
        return loss
# y_true = tf.placeholder(tf.float32, [None, 10]3

In [4]:
def get_accuracy(y_true, y_hat):
    """
    compute cross entropy

    Args:
        y_true: true label
        y_hat: predicted label
        epsilon: small value to prevent NaN in log

    Returns:
        cross entropy loss
    """
    with tf.name_scope('cross_entropy'):
        # Compare the highest indices between the predicted label and the true label
        correct_prediction = tf.equal(tf.argmax(y_hat, 1), tf.argmax(y_true, 1), name='correct_prediction')
        # Compute accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
        return accuracy

## Hyper-parameter (하이퍼 파라미터)

In [5]:
# Set hypyerparameters
learning_rate = 0.01
max_iter = 2000
batch_size = 100

# Implementation (2)
## Linear Classifier (선형 분류기)

## <center> \\( f(x) = W^Tx+b \\)</center>

### linear function을 완성하세요.
### (Hint : (1) weight, bias 선언 (2) tf.get_variable()의 initializer  (3) tf.matmul())

In [6]:
def fc(name, out_dim, inputs):
    """
    Args :
        Inputs : Input tensor
        out_dim : output dimension
        
    Returns:
        inputs * weight + bias
    """
    with tf.variable_scope(name):
        shp = inputs.get_shape().as_list()[1]
        initial = tf.truncated_normal([shp,out_dim], stddev=.01)
        weights = tf.get_variable('w', initializer=initial)
        y_hat = tf.matmul(inputs, weights) #벡터가 가로로 되어있다고 가정해서 x, w순서로 곱한다.
        return y_hat


In [7]:
a = tf.placeholder(tf.float32, [None, 784])
a.get_shape().as_list()[1]

784

# Implementation (3)

## Model Setting

### 1. Training data 및 Test data의 각각의 image를 한 vector로 만들어서 train_data, test_data에 각각 저장하세요.
#### Hint) 데이터 차원.
### 2. Dataset로부터 받은 데이터(Image, label)를 담을 변수를 각각 x 및 y_true에 선언하세요.
#### Hint) tf.placeholder
### 3. Implementation (2)에서 구현한 linear classifier 함수값과 softmax 함수를 통한 prediction 값을 y_hat에 저장하세요.
#### Hint) tf.nn.softmax
### 4. 3으로부터 얻은 결과를 통해 Implementation (1)에서 구현한 loss function을 통해 얻은 loss를 cross_entropy에 저장하세요.

In [8]:
tf.reset_default_graph()
# Flatten data
###################################################################
#                    Implementation 3-1                           #
###################################################################


###################################################################
#                    Implementation 3-2                           #
###################################################################
x = tf.placeholder(tf.float32, [None, 784])
y_true = tf.placeholder(tf.float32, [None, 10])

###################################################################
#                    Implementation 3-3                          #
###################################################################
h1 = fc('layer1', 512, x)
y_logits = fc('layer2', 10, h1)
y_hat = tf.nn.softmax(y_logits)


###################################################################
#                    Implementation 3-4                           #
###################################################################
cross_entropy = get_cross_entropy_loss(y_true, y_hat) #epsilon은 default

# Calculate accuracy
accuracy = get_accuracy(y_true, y_hat)
# Make gradient descent op
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

# Make op to initialize declared variable
init = tf.global_variables_initializer()

with tf.Session() as sess:
    # Initialize variables
    sess.run(init)
    
    # Training
    for step in range(max_iter):
        # Get batch data and label
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # train the network and calculate cross entropy
        _, loss = sess.run([train_step, cross_entropy], feed_dict={x: batch_x, y_true: batch_y})
        # calcualte accuracy
        acc = sess.run(accuracy, feed_dict={x: test_data, y_true: test_label})
        # print loss (cross entropy) and accuracy at every 10th step
        if (step + 1) % 10 == 0:
            print("{}th iteration, loss: {:.4f}, test accuracy: {:.4f}".format(step + 1, loss, acc))

10th iteration, loss: 0.8966, test accuracy: 0.8126
20th iteration, loss: 0.5409, test accuracy: 0.8294
30th iteration, loss: 0.7574, test accuracy: 0.8393
40th iteration, loss: 0.7660, test accuracy: 0.8578
50th iteration, loss: 0.5179, test accuracy: 0.8206
60th iteration, loss: 0.5715, test accuracy: 0.8372
70th iteration, loss: 0.5267, test accuracy: 0.8487
80th iteration, loss: 0.5346, test accuracy: 0.8496
90th iteration, loss: 0.5877, test accuracy: 0.8537
100th iteration, loss: 0.6032, test accuracy: 0.8611
110th iteration, loss: 0.5863, test accuracy: 0.8482
120th iteration, loss: 0.4588, test accuracy: 0.8254
130th iteration, loss: 0.5973, test accuracy: 0.8433
140th iteration, loss: 0.6023, test accuracy: 0.8728
150th iteration, loss: 0.3725, test accuracy: 0.8707
160th iteration, loss: 0.3787, test accuracy: 0.8471
170th iteration, loss: 0.4895, test accuracy: 0.8853
180th iteration, loss: 0.6697, test accuracy: 0.8583
190th iteration, loss: 0.4426, test accuracy: 0.8839
20

1590th iteration, loss: nan, test accuracy: 0.0980
1600th iteration, loss: nan, test accuracy: 0.0980
1610th iteration, loss: nan, test accuracy: 0.0980
1620th iteration, loss: nan, test accuracy: 0.0980
1630th iteration, loss: nan, test accuracy: 0.0980
1640th iteration, loss: nan, test accuracy: 0.0980
1650th iteration, loss: nan, test accuracy: 0.0980
1660th iteration, loss: nan, test accuracy: 0.0980
1670th iteration, loss: nan, test accuracy: 0.0980
1680th iteration, loss: nan, test accuracy: 0.0980
1690th iteration, loss: nan, test accuracy: 0.0980
1700th iteration, loss: nan, test accuracy: 0.0980
1710th iteration, loss: nan, test accuracy: 0.0980
1720th iteration, loss: nan, test accuracy: 0.0980
1730th iteration, loss: nan, test accuracy: 0.0980
1740th iteration, loss: nan, test accuracy: 0.0980
1750th iteration, loss: nan, test accuracy: 0.0980
1760th iteration, loss: nan, test accuracy: 0.0980
1770th iteration, loss: nan, test accuracy: 0.0980
1780th iteration, loss: nan, te