In [1]:
import numpy as np

class AdamOptimizer:
    def __init__(self, params, lr=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.params = params
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = [np.zeros_like(p) for p in self.params]
        self.v = [np.zeros_like(p) for p in self.params]
        self.t = 0

    def step(self, grads):
        self.t += 1
        updated_params = []

        for i, (param, grad) in enumerate(zip(self.params, grads)):
            self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * grad
            self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * (grad ** 2)

            m_hat = self.m[i] / (1 - self.beta1 ** self.t)
            v_hat = self.v[i] / (1 - self.beta2 ** self.t)

            param_update = self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon)
            param -= param_update

            updated_params.append(param)
        
        self.params = updated_params
        return self.params


In [2]:
# Example usage:
np.random.seed(0)
params = [np.random.randn(3, 3), np.random.randn(3, 1)]  # Example parameters
grads = [np.random.randn(3, 3), np.random.randn(3, 1)]  # Example gradients

adam = AdamOptimizer(params, lr=0.001)
updated_params = adam.step(grads)
print(updated_params)


[array([[ 1.76305235,  0.39915721,  0.97773798],
       [ 2.2398932 ,  1.86655799, -0.97627788],
       [ 0.94908842, -0.15035721, -0.10221885]]), array([[0.4095985 ],
       [0.14304357],
       [1.45527351]])]
