Define a AdaGrad class which construct by mathematic formula

In [None]:
import numpy as np

class Adagrad:
    
    def __init__(self, learning_rate: float = 0.01, constant: float = 1e-8):
        self.learning_rate = learning_rate
        self.constant = constant

        # s(i,t)
        self.accumulated_gradients = None

        
    def calculate_param(self, params: np.ndarray, gradients: np.ndarray):
        if self.accumulated_gradients is None:
            self.accumulated_gradients = np.zeros_like(params)
        
        self.accumulated_gradients += gradients ** 2
        adaptive_lr = self.learning_rate / (np.sqrt(self.accumulated_gradients) + self.constant)
        updated_params = params - adaptive_lr * gradients
        
        return updated_params
    
    def get_accumulated_gradients(self):
        if self.accumulated_gradients is not None:
            return self.accumulated_gradients
        return None

Provide a function for testing and visualize it

In [None]:
if __name__ == "__main__":
    
    # Initial parameters
    params = np.array([0.0, 0.0])
    optimizer = Adagrad(learning_rate=0.8)
    print("Function: ")
    print("f(x,y) = (x - 2)^2 + (y - 3)^2")
    print("=" * 55)
    print("\nStep | x      | y      | Loss   | Adaptive LR (x,y)")
    print("-" * 55)
    
    for step in range(10):
        x, y = params[0], params[1]
        
        loss = (x -2 )**2 + (y - 3)**2
        gradients = np.array([2*(x - 2), 2*(y - 3)])
        
        if optimizer.accumulated_gradients is not None:
            # Calculate the learning rate
            adaptive_lr = optimizer.learning_rate / (np.sqrt(optimizer.accumulated_gradients) + optimizer.constant)
            lr_str = f"({adaptive_lr[0]:.3f},{adaptive_lr[1]:.3f})"
        else:
            lr_str = "(init,init)"
        
        print(f"{step:4d} | {x:6.3f} | {y:6.3f} | {loss:6.3f} | {lr_str}")
        
        params = optimizer.calculate_param(params, gradients)
        
        if loss < 1e-6:
            break

    print(f"\nFinal: x={params[0]:.6f}, y={params[1]:.6f}")
    print(f"Final loss: {((params[0]-2)**2 + (params[1]-3)**2):.10f}")
    
    print("We could see how learning rate adapt through different gradient level")


Function: 
f(x,y) = (x - 2)^2 + (y - 3)^2

Step | x      | y      | Loss   | Adaptive LR (x,y)
-------------------------------------------------------
   0 |  0.000 |  0.000 | 13.000 | (init,init)
   1 |  0.800 |  0.800 |  6.280 | (0.200,0.133)
   2 |  1.212 |  1.273 |  3.604 | (0.171,0.108)
   3 |  1.468 |  1.610 |  2.216 | (0.162,0.098)
   4 |  1.637 |  1.867 |  1.416 | (0.159,0.092)
   5 |  1.751 |  2.069 |  0.928 | (0.157,0.089)
   6 |  1.829 |  2.232 |  0.619 | (0.156,0.087)
   7 |  1.882 |  2.365 |  0.418 | (0.156,0.086)
   8 |  1.919 |  2.473 |  0.284 | (0.156,0.085)
   9 |  1.944 |  2.563 |  0.194 | (0.156,0.085)

Final: x=1.961620, y=2.636685
Final loss: 0.1334704943
We could see how learning rate adapt through different gradient level


: 