In [None]:
import numpy as np

In [1]:
class SGD:
    def __init__(self, lr = 0.01):
        self.lr = lr

    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr_grad[key]
            

In [2]:
class Momentum:
    def __init__(self, lr=0.01, momentum = 0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None

    def update(self, params, grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():
                self.v[key] = np.zeros_like(val)

        for key in params.keys():
            self.v[key] = self.momentum*self.v[key] - self.lr*grads[key]
            params[key] += self.v[key]

In [3]:
class AdaGrad:
    def __init__(self, lr=0.01):
        self.lr = lr
        self.h = None

    def update(self, params, grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)

        for key in params.keys():
            self.h[key] += grads[key] * grads[key]
            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)

In [12]:
class Adam:
    def __init__(self, lr, momentum_1=0.9, momentum_2=0.999):
        self.lr = lr
        self.momentum_1 = momentum_1
        self.momentum_2 = momentum_2
        self.v = None
        self.s = None

    def update(self, params, grads):
        if self.v == None:
            self.v = {}
            for key, val in params.items():
                self.v[key] = np.zeros_like(val)

        for key in params.keys():
            self.v[key] = (self.momentum_1*self.v[key] - self.lr*grads[key]) / (1 - self.momentum_1)
        '''
        v[key] = Momentum.update(params, grad)
        v[key] = v[key] / (1-self.momentum_1)
        '''

        if self.s == None:
            for key, val in params.items():
                self.s[key] = np.zeros_like(val)

        for key, val in params.items():
            self.s[key] = (self.momentum_2 * self.s[key] - self.lr * (grads[key]**2) ) / (1 - self.momentum_2)
            params[key] -= self.lr * self.v[key] / (np.sqrt(self.s[key]) + 1e-8)

In [13]:
def batchnorm_forward(x, gamma, beta, eps):
    N, D = x.shape

    mu = 1./N * np.sum(x, axis=0)
    xmu = x - mu

    sq = xmu ** 2
    var = 1./N * np.sum(sq, axis=0)

    sqrtvar = np.sqrt(var + eps)

    ivar = 1./ivar

    xhat = xmu * ivar

    gammax = gamma * xhat

    out = gammax + beta

    cache = (xhat, gamma, xmu, ivar, sqrtvar, var, eps)

    return out, cache


In [14]:
def batchnorm_backward(dout, cache):
    xhat, gamma, xmu, ivar, sqrtvar, var, eps = cache

    N, D = dout.shape

    dbeta = np.sum(dout, axis=0)
    dgammax = dout

    dgamma = np.sum(dgammax*xhat, axis =0)
    dxhat = dgamma * gamma

    divar = np.sum(dxhat * xmu, axis =0)
    dxmu_1 = dxhat * ivar

    dsqrtvar = -1. / (sqrtvar**2) * divar
    
    dvar = 1. / np.sqrt(var + eps) * dsqrtvar

    dsq = 1. /N *np.ones((N, D)) * dvar

    dxmu_2 = 2 * xmu * dsq

    dx1 = dxmu_1 + dxmu_2
    dmu = -1 * np.sum(dxmu_1+dxmu_2, axis = 0)

    dx2 = 1. / N * np.ones((N, D)) * dmu

    dx = dx1 + dx2

    return dx, dgamma, dbeta

In [15]:
class Dropout:
    def __init__(self, dropout_ratio = 0.5):
        self.dropout_ratio = dropout_ratio
        self.mask = None

    def forward(self, x, train_flag = True):
        if train_flag:
            self.mask = np.random.rand(*x.shape) > self.dropout_ratio
            return x * self.mask
        else:
            return x * (0.1 - self.dropout_ratio)
            
    def backward(self, dout):
         return dout * self.mask

In [21]:
x = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20])
np.random.shuffle(x)
x[:2]

array([10, 16])

In [22]:
weight_decay = 10 ** np.random.uniform(-8, -4)
lr = 10 ** np.random.uniform(-6, -2)

In [23]:
lr

0.001451801956583762

In [24]:
weight_decay

3.7783568482033835e-06

In [None]:
hyperparam_trial = 10
results_train = {}
results_val = {}

for _ in range(hyperparam_trial):
    weight_decay = 10 ** np.random.uniform(-8, -4)
    lr = 10 ** np.random.uniform(-6, -2)

    val_acc_list, train_acc_list = _train(lr, weight_decay)
    print('validation acc :' + str(val_acc_list[-1]) + "| weight_decay : " + str(weight_decay) + '| lr : ' + str(lr))
    key = "lr:" + str(lr) + ", weight decay:" + str(weight_decay)
    results_val[key] = val_acc_list
    results_train[key] = train_acc_list