In [1]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np



# mnist를 불러오고 train_data, train_label, test_data, test_label로 나눠주세요.
mnist = keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()   


# 모델에 맞게 데이터 가공
x_train_norm, x_test_norm = x_train / 255.0, x_test / 255.0
x_train_reshaped = x_train_norm.reshape(-1, x_train_norm.shape[1]*x_train_norm.shape[2])
x_test_reshaped = x_test_norm.reshape(-1, x_test_norm.shape[1]*x_test_norm.shape[2])

# 입력층 데이터의 모양(shape)
print(x_train_reshaped.shape)

# 테스트를 위해 x_train_reshaped의 앞 5개의 데이터를 가져온다.
X = x_train_reshaped[:]

# 초기화된 파라미터를 정의하는 함수를 만들고 초기값을 만드세요.
def init_params(input_size, hidden_size, output_size, weight_init_std=0.01):
    #W1, b1, W2, b2를 모두 정의해주세요.
    
    # 인접 레이어간 관계를 나타내는 파라미터 W를 생성하고 random 초기화
    W1 = weight_init_std * np.random.randn(input_size, hidden_size)  
    # 바이어스 파라미터 b를 생성하고 Zero로 초기화
    b1 = np.zeros(hidden_size)
    
    W2 = weight_init_std * np.random.randn(hidden_size, output_size)
    b2 = np.zeros(output_size)
    
    return W1, b1, W2, b2

W1, b1, W2, b2 = init_params(input_size = 784, hidden_size = 50, output_size = 10)
print(W1.shape,b1.shape,W2.shape,b2.shape)

(60000, 784)
(784, 50) (50,) (50, 10) (10,)


In [2]:
# MLP를 정의하세요.
def affine_layer_forward(X, W, b):
    y = np.dot(X, W) + b
    cache = (X, W, b)
    return y, cache

In [3]:
# relu를 정의하세요 (np.maximum을 활용하세요)

def relu(x):
    return np.maximum(x, 0)

In [4]:
# softmax를 정의하세요
def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x))
        return y.T
    x = x - np.max(x)
    return np.exp(x) / np.sum(np.exp(x))
        

In [5]:
# one-hot 인코딩을 정의하세요
def _change_one_hot_label(X, num_category):
    T = np.zeros((X.size, num_category))
    for idx, row in enumerate(T):
        row[X[idx]] = 1
        
    return T

In [6]:
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    # 훈련 데이터가 원-핫 벡터라면 정답 레이블의 인덱스로 반환
    if t.size == y.size:
        t = t.argmax(axis=1)
             
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size

In [7]:
# MLP의 backward pass를 정의하세요
def affine_layer_backward(dy, cache):
    X, W, b = cache
    dX = np.dot(dy, W.T)
    dW = np.dot(X.T, dy)
    db = np.sum(dy, axis=0)
    return dX, dW, db


# relu 함수의 backward pass를 정의하세요. (np.where 함수를 활용하세요)

def relu_grad(x):
        
    return (1.0 - relu(x) ) * relu(x) 


#파라미터를 업데이트하는 함수를 정의하세요.
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2
    return W1, b1, W2, b2


# train_step을 정의합니다.
def train_step(X, Y, W1, b1, W2, b2, learning_rate=0.1, verbose=False):
    a1, cache1 = affine_layer_forward(X, W1, b1)
    # z1 = relu(a1)
    z1 = relu(a1)
    a2, cache2 = affine_layer_forward(z1, W2, b2)
    y_hat = softmax(a2)
    t = _change_one_hot_label(Y, 10)
    Loss = cross_entropy_error(y_hat, t)

    if verbose:
        print('---------')
        print(y_hat)
        print(t)
        print('Loss: ', Loss)
        
    dy = (y_hat - t) / X.shape[0]
    dz1, dW2, db2 = affine_layer_backward(dy, cache2)
    #da1 = relu_grad(a1) * dz1
    da1 = relu_grad(a1) * dz1
    dX, dW1, db1 = affine_layer_backward(da1, cache1)
    
    W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)
    
    return W1, b1, W2, b2, Loss

def predict(W1, b1, W2, b2, X):
    a1 = np.dot(X, W1) + b1
  #  z1 = relu(a1)
    z1 = relu(a1)
    a2 = np.dot(z1, W2) + b2
    y = softmax(a2)

    return y

def accuracy(W1, b1, W2, b2, x, y):
    y_hat = predict(W1, b1, W2, b2, x)
    y_hat = np.argmax(y_hat, axis=1)

    accuracy = np.sum(y_hat == y) / float(x.shape[0])
    return accuracy

In [8]:
# 하이퍼파라미터
iters_num = 50000  # 반복 횟수를 적절히 설정한다.
train_size = x_train.shape[0]
batch_size = 100   # 미니배치 크기
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

# 1에폭당 반복 수
iter_per_epoch = max(train_size / batch_size, 1)

W1, b1, W2, b2 = init_params(784, 50, 10)

for i in range(iters_num):
    # 미니배치 획득
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train_reshaped[batch_mask]
    y_batch = y_train[batch_mask]
  
    W1, b1, W2, b2, Loss = train_step(x_train_reshaped, y_train, W1, b1, W2, b2,learning_rate=0.1, verbose=False)

    # 학습 경과 기록
    train_loss_list.append(Loss)
  
    # 1에폭당 정확도 계산
    # train_accuracy와 test_accuracy를 완성해주세요
    if i % iter_per_epoch == 0:
        print('Loss: ', Loss)
        train_acc = accuracy(W1, b1, W2, b2, x_train_reshaped, y_train)
        test_acc = accuracy(W1, b1, W2, b2, x_test_reshaped, y_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

Loss:  13.304728080105994
train acc, test acc | 0.07191666666666667, 0.068
Loss:  13.344171243250965
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  13.490346331898738
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  13.823689310361036
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  14.275789675512016
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  14.83531393260901
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  15.46548973457011
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  16.13745807768338
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  16.832713414780486
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  17.540695118719484
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  18.255621113352905
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  18.974377112271753
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  19.695268682324915
train acc, test acc | 0.11236666666666667, 0.1135
Loss:  20.417387

KeyboardInterrupt: 