In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
from torch.autograd import Variable
import copy

In [11]:
d = 100
device = "cuda"

def synthetic_example(iters=100_000, lr=1e-3):
    # Objective function
    def func(x):
        val = 0
        for i in np.arange(d - 1):
            val += (100*(x[i + 1] - x[i]**2)**2 + (x[i] - 1)**2)
        return val
    
    x0 = np.random.uniform(-2.048, 2.048, d)

    print("Initial guess: ")
    print(x0)

    x_Adam = Variable(torch.tensor(x0), requires_grad=True)
    x_AMS  = Variable(torch.tensor(x0), requires_grad=True)
    x_SGD  = Variable(torch.tensor(x0), requires_grad=True)

    # avg_regret_checkpoints = []
    # iteration_checkpoints  = []
    # x_checkpoints          = []

    optimizer_Adam = torch.optim.Adam([x_Adam], lr=lr, betas=(0.9, 0.999), eps=1e-08, amsgrad=False)
    optimizer_AMSGrad = torch.optim.Adam([x_AMS], lr=lr, betas=(0.9, 0.999), eps=1e-08, amsgrad=True)
    optimizer_SGD  = torch.optim.SGD([x_SGD], lr=lr, momentum=0.9, dampening=0,
                                     weight_decay=0, nesterov=True)
    
    # Create learning rate schedulers for Adam and AMSGrad
    lambda1 = lambda iter: 1/np.sqrt(iter + 1)
    scheduler_Adam = torch.optim.lr_scheduler.LambdaLR(optimizer_Adam, lr_lambda=lambda1,
                                                       verbose=False)

    scheduler_AMS  = torch.optim.lr_scheduler.LambdaLR(optimizer_AMSGrad, lr_lambda=lambda1,
                                                       verbose=False)

    # lambda3 = lambda iter: 1/np.sqrt(iter + 1)
    # scheduler_SGD  = torch.optim.lr_scheduler.LambdaLR(optimizer_SGD, lr_lambda=lambda3,
                                                    #    verbose=False)

    # total_regret = 0

    for iter in np.arange(1, iters + 1):
        loss_Adam     = func(x_Adam)
        loss_AMS      = func(x_AMS)
        loss_SGD      = func(x_SGD)

        # total_regret += np.linalg.norm(loss.item() - x_true)

        # if (iter % 10000 == 0):
        #     avg_regret = total_regret / iter
        #     avg_regret_checkpoints.append(avg_regret)
        #     iteration_checkpoints.append(iter)
        #     x_checkpoints.append(x.item())

        if (iter % 10000 == 0):
            print(f"Iteration: {iter}")
            print("---------------------------")
            print(f"f(x_Adam) = {func(x_Adam)}")
            print(f"f(x_AMS)  = {func(x_AMS)}")
            print()

        optimizer_Adam.zero_grad()
        loss_Adam.backward()
        optimizer_Adam.step()
        scheduler_Adam.step()

        optimizer_AMSGrad.zero_grad()
        loss_AMS.backward()
        optimizer_AMSGrad.step()
        scheduler_AMS.step()

        # optimizer_SGD.zero_grad()
        # loss_SGD.backward()
        # optimizer_SGD.step()
        # scheduler_SGD.step()

    # return x_Adam, x_AMS, x_SGD
    return x_Adam, x_AMS

In [12]:
x_true = torch.tensor(np.ones(d))

iters = 50000
lr    = 1e-3
# x_Adam, x_AMS, x_SGD = synthetic_example(iters=iters, lr=lr)
x_Adam, x_AMS = synthetic_example(iters=iters, lr=lr)

print(f"2-norm between Adam x and true x:    {torch.linalg.vector_norm(x_Adam - x_true)}")
print(f"2-norm between AMSGrad x and true x: {torch.linalg.vector_norm(x_AMS - x_true)}")
# print(f"2-norm between SGD x and true x:     {torch.linalg.vector_norm(x_SGD - x_true)}")

Initial guess: 
[-1.21543448 -1.83352503  1.88962754  0.38736266 -0.63890009  1.67301002
  1.43122575  0.9633221  -1.53772435  1.69497534 -0.07942481  1.24619746
 -0.06625525  0.70825886 -1.72496726 -1.05068075  0.71528666 -0.78439539
 -1.13651196  0.95651966  0.49545871  1.47285548  0.82852624 -1.94276572
  0.68108261 -0.93660826  0.18271864  1.1647551  -1.79214006 -1.96330496
 -1.90019148 -1.0489831  -0.19136372 -1.83809797 -1.28275548 -0.12809114
  1.68608313  0.43220145  1.18236565  0.50168503 -0.93107045 -1.61207924
 -0.46878759  0.97444819  1.62343849 -1.46974484  1.67211728  0.58763585
  0.91446441 -1.98483777 -1.35799843 -0.02987472  0.73625436  0.97984658
 -1.07628695 -1.56495946 -0.20759028 -0.01199815 -0.31392419  1.27812989
 -1.19890956  0.36165421  0.48992165 -1.48924547  0.44915513  0.73184001
 -1.11694419  0.72723694  0.19307259 -0.92100698  0.49966358  0.78266792
  1.81341711  1.53661033 -1.03762018  1.14328076  0.15958244 -1.93770351
 -1.59791338 -0.12430738 -0.2478769

In [10]:
print(f"Infinity-norm between Adam x and true x:    {torch.linalg.vector_norm(x_Adam - x_true, float('inf'))}")
print(f"Infinity-norm between AMSGrad x and true x: {torch.linalg.vector_norm(x_AMS - x_true, float('inf'))}")
print()
# print(f"Infinity-norm between SGD x and true x:     {torch.linalg.vector_norm(x_SGD - x_true, float('inf'))}")

print(x_Adam)
print()
print(x_AMS)

Infinity-norm between Adam x and true x:    2.9466385363488605
Infinity-norm between AMSGrad x and true x: 2.946638536260939

tensor([-1.6279,  0.9314,  1.3776, -1.8301,  1.5839,  1.9376,  0.5168,  0.5733,
        -1.5466, -0.1653, -1.3776,  0.5230, -1.0613, -1.0587, -0.2595,  1.8116,
        -1.0637, -0.3622,  0.4930,  1.4301,  0.1768,  0.5528, -1.2596,  1.4856,
         0.2897,  0.0982,  0.4462,  1.8783,  1.4280, -1.8431, -1.0537, -1.6630,
         0.1492, -0.9179, -0.0432,  0.7669, -0.8113, -0.0878, -0.1132,  1.7207,
        -0.9251, -1.7720, -1.1809,  0.7866,  0.0638,  1.1108, -1.1978,  1.1405,
        -0.0583, -1.8863,  1.5367, -1.3935,  0.3069, -0.3668, -1.7436, -0.3047,
         1.9601, -1.8675, -0.8760,  1.6220,  0.1330,  0.5641, -0.7270,  0.5134,
        -1.2977,  0.5716, -0.2925, -1.2829, -1.0185, -1.1671, -1.2500, -1.9312,
         0.6983, -1.6910,  1.1969,  1.4270, -0.4979, -1.3388, -1.8272,  1.5727,
         1.9107, -1.9466,  1.7700, -1.9163, -1.4286, -0.0072,  1.4610,  1.