Define the AdaDelta Class base on the mathematic formula

In [51]:
import numpy as np
from EMA_class import EMA 

class AdaDelta:
    def __init__(self, rho=0.95, constant=1e-6):
        self.rho = rho
        self.constant = constant
        self.s = EMA(1 - self.rho, bias_correction=False) 
        self.delta_x = EMA(1 - self.rho, bias_correction=False) 
        
    def update(self, params, gradients):

        delta_x_value = self.delta_x.get_current_value()
        if delta_x_value is None:
            delta_x_value = np.zeros_like(params)
        
    
        s_t = self.s.calculate(gradients**2)
        rescaled_gradient = (np.sqrt(delta_x_value + self.constant) / 
                           np.sqrt(s_t + self.constant)) * gradients
        
        self.delta_x.calculate(rescaled_gradient**2)
        new_params = params - rescaled_gradient
        
        
        return new_params

Provide some function for testing

In [52]:
if __name__ == "__main__":
    print("AdaDelta Optimizer Demo")
    print("\nOptimizing f(x,y) = (x-2)^2 + (y-3)^2")
    print("=" * 55)
    
    # Initial parameters
    params = np.array([0.0, 0.0])
    optimizer = AdaDelta(rho=0.5, constant=1e-6)
    
    print("\nStep | x      | y      | Loss   | RMS[g] (x,y) | RMS[Δx] (x,y)")
    print("-" * 60)
    
    for step in range(10):
        x, y = params[0], params[1]
        
        # Compute loss and gradients for f(x,y) = (x-2)^2 + (y-3)^2
        loss = 4*(x - 2)**2 + 5*(y - 3)**2
        gradients = np.array([8*(x - 2), 10*(y - 3)])

        s_val = optimizer.s.get_current_value()
        deltax_val = optimizer.delta_x.get_current_value()

        if s_val is not None and deltax_val is not None:
            rms_g = np.sqrt(s_val + optimizer.constant)
            rms_dx = np.sqrt(deltax_val + optimizer.constant)
            rms_g_str = f"({rms_g[0]:.3f},{rms_g[1]:.3f})"
            rms_dx_str = f"({rms_dx[0]:.3f},{rms_dx[1]:.3f})"
        else:
            rms_g_str = "(init,init)"
            rms_dx_str = "(init,init)"
        
        print(f"{step:4d} | {x:6.3f} | {y:6.3f} | {loss:6.3f} | {rms_g_str:12} | {rms_dx_str}")
        
        # Update parameters
        params = optimizer.update(params, gradients)
        
        if loss < 1e-8:
            break
    
    print(f"\nFinal: x={params[0]:.6f}, y={params[1]:.6f}")
    print(f"Final loss: {((params[0]-2)**2 + (params[1]-3)**2):.10f}")
    

AdaDelta Optimizer Demo

Optimizing f(x,y) = (x-2)^2 + (y-3)^2

Step | x      | y      | Loss   | RMS[g] (x,y) | RMS[Δx] (x,y)
------------------------------------------------------------
   0 |  0.000 |  0.000 | 61.000 | (init,init)  | (init,init)
   1 |  0.001 |  0.001 | 60.954 | (16.000,30.000) | (0.001,0.001)
   2 |  0.002 |  0.002 | 60.889 | (15.996,29.995) | (0.002,0.002)
   3 |  0.004 |  0.004 | 60.816 | (15.988,29.985) | (0.002,0.002)
   4 |  0.006 |  0.006 | 60.737 | (15.978,29.973) | (0.002,0.002)
   5 |  0.008 |  0.008 | 60.651 | (15.966,29.958) | (0.002,0.002)
   6 |  0.010 |  0.010 | 60.560 | (15.953,29.941) | (0.002,0.002)
   7 |  0.012 |  0.012 | 60.463 | (15.938,29.922) | (0.002,0.002)
   8 |  0.014 |  0.014 | 60.360 | (15.922,29.903) | (0.002,0.002)
   9 |  0.016 |  0.016 | 60.253 | (15.905,29.882) | (0.002,0.002)

Final: x=0.018718, y=0.018729
Final loss: 12.8134556209
