# step 46. Optimizer

### optimizer class
* 매개변수 갱신을 위한 기반클래스
* 구체적인 최적화 기법은 optimizer 클래스를 상속한 곳에서 구현한다

In [None]:
class Optimizer:
    def __init__(self):
        self.target = None           # 업데이트 대상 : Model 또는 Layer
        self.hooks = []
        
    def setup(self, target):         # setup 메서드에서 업데이트 타겟 설정
        self.target = target
        return self
    
    def update(self):
        # gradient가 None이 아닌 모든 파라미터를 모은다.
        params = [p for p in self.target.params() if p.grad is not None]
        
        # 전처리(option) : weight decay / gradient clipping 등
        for f in self.hooks:
            f(params)
        
        # 매개변수 갱신
        for param in params:
            self.update_one(param)
    
    def update_one(self, params):     # 구체적인 업데이트 방식은 자식클래스에서 재정의
        raise NotImplementedError()
    
    def add_hook(self, f):
        self.hooks.append(f)

### SGD class

In [None]:
class SGD(Optimizer):
    def __init__(self, lr = 0.01):
        super().__init__()
        self.lr = lr
        
    def update_one(self, param):
        param.data -= self.lr * param.grad.data

### MomentumSGD

In [None]:
import numpy as np

class MomentumSGD(Optimizer):
    def __init__(self, lr = 0.01, momentum = 0.9):
        super().__init__()
        self.lr = lr
        self.momentum = momentum
        self.vs = {}
        
    def update_one(self, param):
        v_key = id(param)
        if v_key not in self.vs:
            self.vs[v_key] = np.zeros_like(param.data)
            
        v = self.vs[v_key]
        v *= self.momentum
        v -= self.lr * param.grad.data
        param.data += v

In [2]:
import numpy as np
from dezero import Variable
from dezero import optimizers
import dezero.functions as F
from dezero.models import MLP

# data
np.random.seed(0)
x = np.random.rand(100, 1)
y = np.sin(2 * np.pi * x) + np.random.rand(100, 1)

# parameter setting
lr = 0.2
iters = 10000
hidden_size = 10

# model define
model = MLP((hidden_size, 1))
optimizer = optimizers.MomentumSGD(lr).setup(model)

# learning
for i in range(iters):
    y_pred = model(x)
    loss = F.mean_squared_error(y, y_pred)

    model.cleargrads()
    loss.backward()
    
    optimizer.update()
    if i % 1000 == 0:
        print(loss)

variable(0.8165178492839196)
variable(0.07743134827996008)
variable(0.07544895146731474)
variable(0.0746326030585864)
variable(0.07420983776361521)
variable(0.07397000396385317)
variable(0.07383179319278566)
variable(0.07375198316276851)
variable(0.07370578149495666)
variable(0.07367887538341852)


# step 47. Softmax & Cross Entropy

In [None]:
# add utility function - get_item()
# 다차원 배열을 slicing하는 기능

import numpy as np
from dezero import Variable
import dezero.functions as F

x = Variable(np.array([[1,2,3], [4,5,6]]))
y = F.get_item(x, 1)
print(y)

y.backward()
print(x.grad())

indices = np.array([0,0,1])
y = F.get_item(x, indices)
print(y)

# Variable.__getitem__ = F.get_item

y = x[1]
print(y)

y = x[:,2]
print(y)

### implement softmax function

In [2]:
from dezero.models import MLP
import numpy as np

model = MLP((10,3))

x = np.array([[0.2,-0.4],[0.3,0.5],[1.3,-3.2]])
y = model(x)
print(y)

variable([[-0.54224404  0.09685547  0.52286398]
          [-0.45705156  0.09128976  0.65779778]
          [-0.7642369   0.21514968  0.05824683]])


In [None]:
from dezero import Variable. as_variable
import dezero.functions as F

def softmax1d(x):
    x = as_variable(x)
    y = F.exp(x)
    sum_y = F.sum(y)
    return y / sum_y

x = Variable(np.array([[0.2, -0.4]]))
y = model(x)
p = softmax1d(y)
print(y)
print(p)

In [None]:
# softmax for batch computation
def softmax_simple(x, axis = 1):
    y = as_variable(x)
    y = exp(x)
    sum_y = sum(y, axis = axis, keepdims = True)
    eps = 1e-7
    return y / (sum_y + eps)

### implement cross entropy

In [None]:
def softmax_cross_entropy_simple(x, t):
    x, t = as_variable(x), as_variable(t)
    N = x.shape[0]
    
    p = softmax(x)
    p = clip(p, 1e-15, 1.0)
    log_p = log(p)
    tlog_p = log_p[np.arange(N), t.data]
    y = -1 * sum(tlog_p) / N
    return y

In [3]:
import numpy as np
import dezero.functions as F

X = np.array([[0.2,-0.4], [0.3, 0.5], [1.3,-3.2]])
t = np.array([2, 0, 1])
y = model(x)
loss = F.softmax_cross_entropy(y, t)
print(loss)

variable(1.0828371573902766)
