# 학습 알고리즘 구현하기
학습 = 가중치, 편향 조정
## 1. 미니 배치

- Train Data 무작위로 가져옴
- 선별한 데이터 미니 배치
- Loss Function 값을 줄이는 것을 목표로 함

## 2. 기울기 산출

- 미니배치의 Loss Function 값을 줄이기 위해 Parameter 기울기 구함
- 기울기는 Loss Function의 값을 가장 작게 하는 방향을 제시

## 3. Parameter 갱신
- 가중치 Parameter를 기울기 방향으로 수정

## 4. 반복
- 1 ~ 3 단계 반복

In [13]:
# TwoLayerNet

import numpy as np
import sys, os
sys.path.append(os.pardir)
from common.functions import *
from common.gradient import numerical_gradient

class TwoLayerNet:
    def __init__(self,input_size, hidden_size ,output_size,weight_init_std=0.01):
    
        self.params = {}
        self.params['W1'] = weight_init_std *\
                            np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std *\
                            np.random.randn(hidden_size,output_size)
        self.params['b2'] = np.zeros(output_size)
    
    def predict(self,x):
        W1,W2 = self.params['W1'],self.params['W2']
        b1,b2 = self.params['b1'],self.params['b2']
        
        a1 = np.dot(x,W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
    
    def loss(self,x,t):
        y  = self.predict(x)
        
        return cross_entropy_error(y,t)
    
    def accuracy(self,x,t):
        y = self.predict(x)
        y = np.argmax(y,axis = 1)
        t = np.argmax(y,axis = 1)
        
        accuracy = np.sum(y == t)/ float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self,x,t):
        loss_w = lambda W : self.loss(x,t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_w,self.params['W1'])
        grads['b1'] = numerical_gradient(loss_w,self.params['b1'])
        grads['W2'] = numerical_gradient(loss_w,self.params['W2'])
        grads['b2'] = numerical_gradient(loss_w,self.params['b2'])
        
        return grads

In [14]:
net = TwoLayerNet(input_size = 784, hidden_size = 100, output_size = 10)
net.params['W1'].shape

(784, 100)

In [15]:
net.params['b1'].shape

(100,)

In [16]:
net.params['W2'].shape

(100, 10)

In [17]:
net.params['b2'].shape

(10,)

In [18]:
x = np.random.rand(100,784)
y = net.predict(x)

In [19]:
x = np.random.rand(100,784)
t = np.random.rand(100,10)

grads = net.numerical_gradient(x,t)

grads['W1'].shape

(784, 100)

In [20]:
grads['b1'].shape

(100,)

In [21]:
grads['W2'].shape

(100, 10)

In [22]:
grads['b2'].shape

(10,)

## 미니 배치 Train 구현

In [None]:
# Train Neural Net

import numpy as np
from DeepLearningFromScratch.mnist import load_mnist
from two_layer_net import TwoLayerNet

(x_train,t_train),(x_test,t_test) = load_mnist(normalize=True, one_hot_label = True)

train_loss_list = []

iters_num =10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
network = TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = network.numerical_gradient(x_batch, t_batch)
    
    for key in ('W1','b1','W2','b2'):
        network.params[key] -= learning_rate*grad[key]
        
    loss = network.loss(x_batch,t_batch)
    train_loss_list.append(loss)

## Test Data 평가하기

In [None]:
import numpy as np
from DeepLearningFromScratch import load_mnist
from two_layer_net import TwoLayerNet

(x_train,x_train),(x_test,t_test) = load_mnist(normalize = True, one_hot_labe = True)

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

iters_num = 10000
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)

for i in range(iters_num):
    
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = network.numerical_gradient(x_batch,t_batch)
    
    for key in ('W1','b1','W2','b2'):
        network.params[key] -= learning_rate*grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i% iter_per_epoch == 0:
        train_acc = network.accuracy(x_train,t_train)
        test_acc = network.accuracy(x_test,t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc \ "
             + str(train_acc) + ", " + str(test_acc))