# 학습 알고리즘 구현하기  

Neural Network에서 학습이란, 결국 가중치들을 계속 갱신해 나가는 것  
데이터를 활용하여, 손실함수의 기울기를 구하고, 손실함수 값을 줄이는 방향으로 찾아간다.

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from common.functions import *
from common. gradient import numerical_gradient

In [3]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size,
                weight_init_std = 0.01):
        
        #가중치 초기화
        self.params = {} #가중치들 기록하는 딕셔너리 변수. forward에서 사용된다.
        self.params["W1"] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params["b1"] = np.zeros(hidden_size)
        self.params["W2"] = weight_init_std * np.random.rand(hidden_size, output_size)
        self.params["b2"] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params["W1"], self.params["W2"]
        b1, b2 = self.params["b1"], self.params["b2"]
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
    
    # x : 입력데이터, t : 정답 레이블
    def loss(self, x, t):
        y = self.predict(x)
        
        return cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.array(y, axis=1)
        t = np.array(t, axis=1)
        
        accruacy = np.sum(y == t) / float(x.shape[0])
        return accraucy
    
    # x: 입력데이터, t : 정답 레이블
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x,t)
        
        grads = {} #기울기를 보관하는 딕셔너리 변수. backward에서 사용된다.
        grads["W1"] = numerical_gradient(loss_W, self.params["W1"])
        grads["b1"] = numerical_gradient(loss_W, self.params["b1"])
        grads["W2"] = numerical_gradient(loss_W, self.params["W2"])
        grads["b2"] = numerical_gradient(loss_W, self.params["b2"])
        
        return grads

In [4]:
net = TwoLayerNet(input_size = 784, hidden_size= 100, output_size= 10)
net.params["W1"].shape
net.params["b1"].shape
net.params["W2"].shape
net.params["b2"].shape

(784, 100)

(100,)

(100, 10)

(10,)

In [5]:
#3개의 임의의 데이터 집어넣은 경우 softmax 출력 화면
x = np.random.rand(3, 784)
y = net.predict(x)
y

array([[0.09902074, 0.09727994, 0.10344222, 0.10010467, 0.10195182,
        0.09750801, 0.10057587, 0.10088278, 0.09732352, 0.10191044],
       [0.09895488, 0.09733354, 0.10342054, 0.10001675, 0.10202331,
        0.09747945, 0.10054624, 0.10093035, 0.0974096 , 0.10188535],
       [0.09885855, 0.09741285, 0.10355837, 0.10006587, 0.10206597,
        0.09752737, 0.10062887, 0.10082487, 0.09732406, 0.10173325]])

In [6]:
x = np.random.rand(10, 784)
t = np.random.rand(10, 10)

grads = net.numerical_gradient(x, t)

grads["W1"].shape
grads["b1"].shape
grads["W2"].shape
grads["b2"].shape


(784, 100)

(100,)

(100, 10)

(10,)

In [7]:
grads["W1"]
grads["b1"]
grads["W2"]
grads["b2"]

array([[-1.05922999e-04, -4.58008542e-05, -5.85168780e-05, ...,
        -1.67300174e-04,  5.93423533e-06,  6.37266173e-05],
       [-1.67490404e-04, -1.38499114e-04,  7.01659641e-05, ...,
         4.48447013e-05,  2.09012807e-06,  9.14778475e-05],
       [-1.49354273e-04, -1.80773778e-04,  8.97543440e-05, ...,
         7.07812320e-05,  1.85961677e-04, -8.15613443e-05],
       ...,
       [-4.76562545e-05, -8.64392624e-05,  1.45723900e-05, ...,
        -6.82556922e-05,  6.11807294e-05, -2.40481213e-05],
       [-1.18449870e-04, -1.68920222e-04,  2.01702115e-04, ...,
         1.82985866e-04,  7.44374540e-05,  1.16527632e-05],
       [-1.78723760e-04, -1.17816827e-04, -1.59947655e-05, ...,
         1.98451700e-06,  2.73664336e-05,  8.86910079e-05]])

array([-1.87217213e-04, -2.57958925e-04,  1.66188638e-04, -1.63625500e-04,
       -1.40984771e-04, -1.02880882e-05, -1.31662505e-04, -5.42530887e-05,
       -2.73813747e-04,  6.66018418e-05,  2.40065474e-04,  1.12644756e-04,
        1.27470523e-05, -4.66494265e-05, -1.07564282e-04, -2.89039161e-04,
        1.69625682e-04, -3.10792325e-05,  1.25441513e-04, -3.84363896e-05,
       -1.55857010e-04,  2.26851355e-04, -2.43062912e-04, -1.48784640e-04,
       -2.20604885e-04, -1.88619971e-04, -7.11918968e-05,  9.43745704e-05,
        1.70139847e-04, -3.30397543e-04,  5.61639979e-05, -9.74792447e-05,
        2.79375456e-05, -1.97401968e-04,  3.48852347e-05, -3.07581729e-04,
       -6.49721787e-05, -1.41396692e-04, -2.68213292e-04,  6.52258536e-05,
        3.08330257e-04, -5.20504024e-04,  1.71297447e-04,  3.26795764e-04,
       -3.57718890e-04,  1.99386179e-04,  9.18407395e-05, -1.49209691e-04,
       -1.06578897e-04, -1.92500460e-05, -1.53931310e-04, -1.17771788e-04,
       -2.83177837e-05,  

array([[ 5.39332951e-02,  5.30635171e-02,  6.02185451e-03,
        -5.22370111e-02, -5.79786353e-02,  5.31333331e-02,
         1.70167558e-03, -8.48123287e-04, -5.65453961e-02,
        -2.44509348e-04],
       [ 4.46857028e-02,  4.39654619e-02,  9.03342547e-04,
        -4.80556544e-02, -4.34233430e-02,  4.40223834e-02,
         1.53917301e-03,  5.37107003e-04, -4.32606534e-02,
        -9.13519684e-04],
       [ 5.36814171e-02,  5.28174752e-02,  1.83146265e-03,
        -5.08521774e-02, -4.85302715e-02,  5.28856304e-02,
        -1.73420970e-03, -1.27651162e-03, -5.86240506e-02,
        -1.98764398e-04],
       [ 4.98437363e-02,  4.90407493e-02,  2.19637315e-03,
        -5.25794384e-02, -4.69140781e-02,  4.91041952e-02,
         1.31905695e-03, -7.41956023e-04, -4.91687877e-02,
        -2.09985062e-03],
       [ 4.57226946e-02,  4.49855121e-02,  2.77544817e-03,
        -4.64991510e-02, -4.77577388e-02,  4.50441128e-02,
         3.00446781e-03,  5.24948347e-04, -4.80018950e-02,
         2.

array([ 0.09900699,  0.09741169,  0.00339207, -0.09995239, -0.09797035,
        0.09753799,  0.00059431,  0.00085916, -0.1026278 ,  0.00174832])

In [None]:
#미니배치 학습 구현하기
import numpy as np
from dataa.mnist import load_mnist


(x_train, t_train), (x_test, t_test) = load_mnist(normalize = True, one_hot_label = True)

train_loss_list = []

#하이퍼 파라미터 설정
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size = 50, output_size = 10)

for i in range(iters_num):
    #미니배치 획득
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    #기울기 계산
    grad = network.numerical_gradient(x_batch, t_batch)
    
    for key in ("W1", "b1", "W2", "b2"):
        network.params[key] -= learning_rate * grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

In [None]:
#시험 데이터로 평가하기
(x_train, t_train), (x_test, t_test) = load_mnist(normalize = True, one_hot_label = True)

network = TwoLayerNet(input_size=784, hidden_size = 50, output_size = 10)

#하이퍼 파라미터 설정
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

#1에포크당 반복 수
iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    #미니배치 획득
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    #기울기 계산
    grad = network.numerical_gradient(x_batch, t_batch)
    
    for key in ("W1", "b1", "W2", "b2"):
        network.params[key] -= learning_rate * grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    #1에포크당 정확도 계산
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        
        print("train acc, test acc |"
             + str(train_acc) _ ", " + str(test_acc)) 