# 学習に関するテクニック
## パラメータの更
### Stochastic Gradient Descent(SGD)
- 勾配のみを考慮
- 伸びた形の関数だと非効率な経路で探索することになる

### Momentum
- 勾配のほかに抵抗のような項を追加

### AdaGrad
- 学習係数の減衰

### Adam
- ざっくりMomentumとAdaGradの融合

In [1]:
import numpy as np

class SGD:
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate

    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.learning_rate * grads[key]

class Momentum:
    def __init__(self, learning_rate=0.01, momentum=0.9):
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.v = None

    def update(self, params, grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():
                self.v[key] = np.zeros_like(val)

class AdaGrad:
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate
        self.h = None
    
    def update(self, params, grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)
    
        for key in params.keys():
            self.h[key] += grads[key] * grads[key]
            params[key] -= self.learning_rate / (np.sqrt(self.h[key]) + 1e-7) # 0を避けるための小さな値を足している


## 重みの初期値