# This notebook is based on the paper: "Global-Convergence-Nonconvex-Optimization". 

The aim of this project is to find the global solution to  
\begin{equation}
  \min_{x \in \mathbb{R}^n} f(x).
\end{equation}

To obtain a global minimizer, the main idea is to minimize the Moreau Envelop instead, which "convexifies" the original function. To make the Moreau envelope tractable, we use connections to Hamilton-Jacobi Equations via the Cole-Hopf and Hopf-Lax formulas to efficiently compute the gradients of the Moreau envelope.

In [None]:

import matplotlib.pyplot as plt
import numpy as np
import torch
import time

from test_functions import Griewank, AlpineN1, Drop_Wave, Levy, Rastrigin, Ackley
from test_functions import Griewank_numpy, AlpineN1_numpy, Drop_Wave_numpy, Levy_numpy, Rastrigin_numpy, Ackley_numpy

from test_functions import MultiMinimaFunc, MultiMinimaAbsFunc
from test_functions import MultiMinimaFunc_numpy, MultiMinimaAbsFunc_numpy

seed   = 30
torch.manual_seed(seed)

from hj_mad_cd import HJ_MAD, HJ_MAD_CoordinateDescent, HJ_MAD_CoordinateDescent_parallel

### Set up hyperparameters for HJ-MAD for different functions

In [None]:
# Default values
delta         = 5e-3
max_iters     = 1000 #int(1e5)
tol           = 5e-2#7e-4
momentum      = 0.64
rescale0      = 0.5
# Set the number of trials to run
avg_trials = 1
sat_tol = 1e-10

# # def f(x):
# #   return MultiMinimaFunc(x)
# # def f_numpy(x):
# #   return MultiMinimaFunc_numpy(x)
# # ax_bry  = 30
# # f_name  = 'MultiMinimaFunc'
# # dim = 1; int_samples = int(100);
# # x0      = -30*torch.ones(dim, dtype=torch.double)
# # x_true  = -1.51034569*torch.ones(dim, dtype=torch.double)

# # delta         = 0.1
# # max_iters     = int(100)
# # tol           = 1e-3
# # momentum = 0.5

# # theta         = 1.0 # note: larger theta => easier to increase time
# # beta          = 0.0
# # t_min     = 1e-1
# # t_max     = 300
# # t_init    = 220
# # alpha     = 0.1
# # eta_min = 0.99
# # eta_plus = 5.0
# # eta_vec = [eta_min, eta_plus]


# # # ----------------------------------------------------------------------------------------------------

# def f(x):
#   return Griewank(x)
# def f_numpy(x):
#   return Griewank_numpy(x)
# ax_bry  = 20
# f_name  = 'Griewank'
# dim = 2; int_samples = int(10000);
# x0      = 10*torch.ones(dim, dtype=torch.double)
# x_true  = torch.zeros(dim, dtype=torch.double)
# rescale0      = 1
# delta         = 1e-6
# max_iters     = int(1e4)
# tol           = 1e-4
# momentum = 0.0

# theta         = 1.0 # note: larger theta => easier to increase time
# beta          = 0.9
# t_min     = 1e-2/delta
# t_max     = int(2)/delta
# t_init    = 1e-2/delta
# alpha     = 5e-2
# eta_min = 0.99
# eta_plus = 5.0
# eta_vec = [eta_min, eta_plus]

# # # ----------------------------------------------------------------------------------------------------

def f(x):
  return Griewank(x)
def f_numpy(x):
  return Griewank_numpy(x)
ax_bry  = 20
f_name  = 'Griewank'
dim = 20; int_samples = int(1e6);#int(1000000);
x0      = 15*torch.ones(dim, dtype=torch.double)
x_true  = torch.zeros(dim, dtype=torch.double)
rescale0      = 2**(-15)
delta         = 1e-6
max_iters     = int(1e5)
tol           = 5e-2
momentum = 0.64

theta         = 1.0 # note: larger theta => easier to increase time
beta          = 0.9
t_min     = 1e-1/delta
t_max     = int(2e1)/delta
t_init    = 1e-1/delta
alpha     = 5e-2
eta_min = 0.99
eta_plus = 5.0
eta_vec = [eta_min, eta_plus]

# # ----------------------------------------------------------------------------------------------------
# # def f(x):
# #   return Griewank(x)
# # def f_numpy(x):
# #   return Griewank_numpy(x)
# # f_name  = 'Griewank'
# # dim = 200; int_samples = int(100); # this one has higher dimension
# # x0      = 10*torch.ones(dim, dtype=torch.double)
# # x_true  = torch.zeros(dim, dtype=torch.double)
# # rescale0      = 2**(-5)
# # sat_tol = 1e-9 #7e-7 or 7e-10 (not sure) for 100 dims, 7e-8 for less than 100 dims
# # theta     = 1.0 # note: larger theta => easier to increase time
# # beta      = 0.9
# # # momentum  = 0.5
# # # beta      = 0.0
# # momentum  = 0.0
# # t_min     = 2e1
# # t_max     = 1e5
# # t_init    = 2e1
# # alpha     = 1.2
# # eta_min = 0.5
# # eta_plus = 5.0
# # eta_vec = [eta_min, eta_plus]

# # ----------------------------------------------------------------------------------------------------

# def f(x):
#   return Griewank(x)
# def f_numpy(x):
#   return Griewank_numpy(x)
# f_name  = 'Griewank'
# dim = 500; int_samples = int(100); 
# x0      = 10*torch.ones(dim, dtype=torch.double)
# x_true  = torch.zeros(dim, dtype=torch.double)
# rescale0      = 2**(-6)#256
# sat_tol = 1e-9 #7e-7 or 7e-10 (not sure) for 100 dims, 7e-8 for less than 100 dims
# theta     = 1.0 # note: larger theta => easier to increase time
# beta      = 0.0
# momentum  = 0.0
# t_min     = 2e1
# t_max     = 1e5
# t_init    = 2e1
# alpha     = 1.2
# eta_min = 0.5
# eta_plus = 5.0
# eta_vec = [eta_min, eta_plus]

# # ----------------------------------------------------------------------------------------------------

# def f(x):
#   return Drop_Wave(x)
# def f_numpy(x):
#   return Drop_Wave_numpy(x)
# ax_bry  = 20
# max_iters     = 1000
# f_name  = 'Drop_Wave'
# rescale0      = 1
# dim = 2; int_samples = int(10000)
# x0      = 10*torch.ones(dim, dtype=torch.double)
# x_true  = torch.zeros(dim, dtype=torch.double)

# momentum      = 0.5
# delta         = 1e-4
# theta         = 1.0 # note: larger theta => easier to increase time
# beta          = 0.8
# t_min     = 1e-6
# t_max     = int(2e1)/delta
# t_init    = 1e3
# alpha     = 0.5
# eta_min = 0.5
# eta_plus = 5.0
# eta_vec = [eta_min, eta_plus]

# # # ----------------------------------------------------------------------------------------------------

# def f(x):
#   return AlpineN1(x)
# def f_numpy(x):
#   return AlpineN1_numpy(x)
# ax_bry  = 20
# f_name  = 'AlpineN1'

# dim = 2; int_samples = int(100000)# int(10000);
# x0      = 10*torch.ones(dim, dtype=torch.double)
# x_true  = torch.zeros(dim, dtype=torch.double)

# momentum      = 0.45

# theta         = 1.0 # note: larger theta => easier to increase time
# beta          = 0.0
# # t_max     = int(2e1)/delta
# # t_init    = 1e-3
# # t_min     = t_init
# t_max     = int(2e3)/delta
# t_init    = 1e-3
# t_min     = 1e-4
# alpha     = 0.25
# eta_min = 0.6
# eta_plus = 5.0
# eta_vec = [eta_min, eta_plus]

# # ----------------------------------------------------------------------------------------------------

# def f(x):
#   return Levy(x)
# def f_numpy(x):
#   return Levy_numpy(x)
# ax_bry  = 20
# f_name  = 'Levy'

# # Set the number of trials to run
# rescale0 = 2**(-7)
# tol           = 5e-2
# sat_tol = 1e-12
# max_iters     = 1000

# dim = 2; int_samples = int(500000)
# x0      = -15*torch.ones(dim, dtype=torch.double)
# x_true  = torch.ones(dim, dtype=torch.double)

# theta         = 0.9 # note: larger theta => easier to increase time
# beta          = 0.5

# t_max     = int(2e5)/delta
# t_init    = 1e6
# t_min     = 1e2
# alpha     = 0.25
# eta_min = 0.6
# eta_plus = 1.5
# eta_vec = [eta_min, eta_plus]

# # ----------------------------------------------------------------------------------------------------

# def f(x):
#   return Rastrigin(x)
# def f_numpy(x):
#   return Rastrigin_numpy(x)
# ax_bry  = 20
# f_name  = 'Rastrigin'
# delta=5e-3
# dim = 2; int_samples = int(10000);
# x0      = 10*torch.ones(dim, dtype=torch.double)
# x_true  = torch.zeros(dim, dtype=torch.double)
# momentum      = 0.25
# theta         = 1.0 # note: larger theta => easier to increase time
# beta          = 0.5
# t_max     = int(2e1)/delta
# t_init    = 5.0
# t_min     = t_init
# alpha     = 0.5
# eta_min = 0.5
# eta_plus = 5.0
# eta_vec = [eta_min, eta_plus]
# tol=2e-10

# # # ----------------------------------------------------------------------------------------------------

# def f(x):
#   return Ackley(x)
# def f_numpy(x):
#   return Ackley_numpy(x)
# ax_bry  = 20
# f_name  = 'Ackley'

# dim = 2; int_samples = int(100000);
# x0      = 10*torch.ones(dim, dtype=torch.double)
# x_true  = torch.zeros(dim, dtype=torch.double)
# momentum      = 0.25
# theta         = 1.0 # note: larger theta => easier to increase time
# beta          = 0.9
# t_max     = int(2e1)/delta
# t_init    = 1e-3
# t_min     = t_init
# alpha     = 5e-1
# eta_min = 0.5
# eta_plus = 5.0
# eta_vec = [eta_min, eta_plus]

Run HJ-MAD and average its results over avg_trials trials

In [None]:
# Note: Under this transformation the standard deviation of the Gaussian is 1, hence we have more control over t and delta

# Create an instance of HJ_MAD_CoordinateDescent
if f_name == 'Ackley':
    hj_mad_cd_GHQ = HJ_MAD_CoordinateDescent(f, x_true, delta=delta*1e-10,
                    int_samples=int(1000), t_vec=[t_init, t_min, t_max], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=0.0,saturate_tol=sat_tol,integration_method='GHQ')
elif f_name == 'Griewank' and dim == 500:
    tol           = 5e-2
    hj_mad_cd_GHQ = HJ_MAD_CoordinateDescent(f, x_true, delta= 1e-8,
                    int_samples=int(150), t_vec=[t_init, t_min, t_max], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=0.0,saturate_tol=1e-10,integration_method='GHQ')

    hj_mad_cd_MC = HJ_MAD_CoordinateDescent(f, x_true, delta=1e-2,
                    int_samples=int(500), t_vec=[t_init, t_min*1e-2, t_max], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=momentum,saturate_tol=sat_tol,integration_method="MC")

    # hj_mad_cd_NMC = HJ_MAD_CoordinateDescent(f, x_true, delta=1e-6,
    #                 int_samples=int(1000), t_vec=[t_init*1e4, t_min, t_max*1e5], max_iters=max_iters, tol=tol, alpha=alpha,
    #                 beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
    #                 momentum=momentum,saturate_tol=sat_tol,integration_method="MC")
    

elif f_name == 'Rastrigin':
    tol = 1e-4
    hj_mad_cd_GHQ = HJ_MAD_CoordinateDescent(f, x_true, delta=delta,
                    int_samples=int(10000), t_vec=[t_init, t_min, t_max], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=0.0,saturate_tol=sat_tol,integration_method='GHQ')

    hj_mad_cd_MC = HJ_MAD_CoordinateDescent(f, x_true, delta=delta,
                    int_samples=int(5000), t_vec=[t_init, t_min, t_max], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=0.0,saturate_tol=sat_tol*1e5,integration_method="MC")

    hj_mad_cd_NMC = HJ_MAD_CoordinateDescent(f, x_true, delta=5e-5,
                    int_samples=int(5000), t_vec=[20, 1e-5, 1e5], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=0.1,saturate_tol=sat_tol*1e5,integration_method="NMC")
elif f_name == 'Levy': # All have been tuned
    hj_mad_cd_GHQ = HJ_MAD_CoordinateDescent(f, x_true, delta=1e-13,
                    int_samples=int(80), t_vec=[0.5, 0.5, 1], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=0.6,saturate_tol=sat_tol,integration_method='GHQ')
    
    hj_mad_cd_MC = HJ_MAD_CoordinateDescent(f, x_true, delta=delta,
                    int_samples=int(100000), t_vec=[t_init, t_min, t_max], max_iters=10, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=momentum,saturate_tol=sat_tol,integration_method="MC")

    hj_mad_cd_NMC = HJ_MAD_CoordinateDescent(f, x_true, delta=1e-2,
                        int_samples=int(10), t_vec=[10, 1e-2, 1000], max_iters=10, tol=tol, alpha=alpha,
                        beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                        momentum=0.5,saturate_tol=sat_tol,integration_method="NMC")
elif f_name == 'AlpineN1': # None are tuned
    hj_mad_cd_GHQ = HJ_MAD_CoordinateDescent(f, x_true, delta=5e-15,
                    int_samples=int(1000), t_vec=[50, 10, 60], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=0.0,saturate_tol=sat_tol,integration_method='GHQ')
    
    hj_mad_cd_MC = HJ_MAD_CoordinateDescent(f, x_true, delta=delta,
                    int_samples=int_samples, t_vec=[t_init, t_min, t_max], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=0.0,saturate_tol=sat_tol,integration_method="MC")

    hj_mad_cd_NMC = HJ_MAD_CoordinateDescent(f, x_true, delta=delta,
                    int_samples=int(50), t_vec=[t_init, t_min, t_max], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=0.0,saturate_tol=sat_tol,integration_method="NMC")
    
elif f_name == 'Drop_Wave':
    tol = 5e-2
    hj_mad_cd_GHQ = HJ_MAD_CoordinateDescent(f, x_true, delta=1e-21,
                    int_samples=int(1000), t_vec=[1e6, 1e4, 1e6], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=0.4,saturate_tol=sat_tol,integration_method='GHQ')
else:
    hj_mad_cd_GHQ = HJ_MAD_CoordinateDescent(f, x_true, delta=delta,
                    int_samples=int_samples, t_vec=[t_init, t_min, t_max], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=False, verbose=True,rescale0=rescale0,
                    momentum=0.0,saturate_tol=sat_tol,integration_method='GHQ')


In [None]:
# # Initialize accumulators for averages
# avg_func_evals = 0
# sum_elapsed_time = 0
# total_iterations = 0  # To store total iterations across trials

# # Run the specified number of trials
# for _ in range(avg_trials):
#     start_time = time.time()  # Record the start time

#     # Execute the HJ_MAD_CD algorithm and retrieve results
#     x_opt_cd_GHQ, coordinate_wise_xk_hist_GHQ, xk_hist_cd_GHQ, xk_error_hist_cd_GHQ, fk_hist_cd_GHQ = hj_mad_cd_GHQ.run(x0, num_cycles=20)

#     elapsed_time = time.time() - start_time  # Calculate elapsed time
#     sum_elapsed_time += elapsed_time  # Accumulate elapsed time

#     total_iterations += len(xk_error_hist_cd_GHQ)  # Add iterations used in this trial
#     avg_func_evals += len(xk_error_hist_cd_GHQ) * int_samples  # Update average function evaluations

#     print(f"Elapsed time: {elapsed_time:.4f} seconds")  # Print elapsed time for the current trial


# # Compute averages after all trials
# avg_func_evals /= avg_trials  # Average function evaluations per trial
# average_iterations = total_iterations / avg_trials  # Average number of iterations per trial

# # Output results
# # print('\n\n avg_func_evals = ', avg_func_evals)
# print(f"Average iterations before convergence/stopping: {average_iterations:.2f}")
# print(f"Average elapsed time: {sum_elapsed_time / avg_trials:.4f} seconds")

In [None]:


# # Initialize accumulators for averages
# avg_func_evals = 0
# sum_elapsed_time = 0
# total_iterations = 0  # To store total iterations across trials

# hj_mad_cd_MC.saturate_tol = 1e-4
# hj_mad_cd_MC.delta = 1e-3

# # #hj_mad_cd_MC.theta     = 0.99 # note: larger theta => easier to increase time
# # beta      = 0.0
# # momentum  = 0.0
# # t_min     = 2e1
# # t_max     = 1e5
# # t_init    = 2e1
# # alpha     = 1.2
# # eta_min = 0.5
# # eta_plus = 5.0

# # Run the specified number of trials
# for _ in range(avg_trials):
#     start_time = time.time()  # Record the start time

#     # Execute the HJ_MAD_CD algorithm and retrieve results
#     x_opt_cd_MC, coordinate_wise_xk_hist_MC, xk_hist_cd_MC, xk_error_hist_cd_MC, fk_hist_cd_MC = hj_mad_cd_MC.run(x0, num_cycles=2)

#     elapsed_time = time.time() - start_time  # Calculate elapsed time
#     sum_elapsed_time += elapsed_time  # Accumulate elapsed time

#     total_iterations += len(xk_error_hist_cd_MC)  # Add iterations used in this trial
#     avg_func_evals += len(xk_error_hist_cd_MC) * int_samples  # Update average function evaluations

#     print(f"Elapsed time: {elapsed_time:.4f} seconds")  # Print elapsed time for the current trial


# # Compute averages after all trials
# avg_func_evals /= avg_trials  # Average function evaluations per trial
# average_iterations = total_iterations / avg_trials  # Average number of iterations per trial

# # Output results
# # print('\n\n avg_func_evals = ', avg_func_evals)
# print(f"Average iterations before convergence/stopping: {average_iterations:.2f}")
# print(f"Average elapsed time: {sum_elapsed_time / avg_trials:.4f} seconds")

In [None]:


# # Initialize accumulators for averages
# avg_func_evals = 0
# sum_elapsed_time = 0
# total_iterations = 0  # To store total iterations across trials

# # Run the specified number of trials
# for _ in range(avg_trials):
#     start_time = time.time()  # Record the start time

#     # Execute the HJ_MAD_CD algorithm and retrieve results
#     x_opt_cd_NMC, coordinate_wise_xk_hist_NMC, xk_hist_cd_NMC, xk_error_hist_cd_NMC, fk_hist_cd_NMC = hj_mad_cd_NMC.run(x0, num_cycles=20)

#     elapsed_time = time.time() - start_time  # Calculate elapsed time
#     sum_elapsed_time += elapsed_time  # Accumulate elapsed time

#     total_iterations += len(xk_error_hist_cd_NMC)  # Add iterations used in this trial
#     avg_func_evals += len(xk_error_hist_cd_NMC) * int_samples  # Update average function evaluations

#     print(f"Elapsed time: {elapsed_time:.4f} seconds")  # Print elapsed time for the current trial


# # Compute averages after all trials
# avg_func_evals /= avg_trials  # Average function evaluations per trial
# average_iterations = total_iterations / avg_trials  # Average number of iterations per trial

# # Output results
# # print('\n\n avg_func_evals = ', avg_func_evals)
# print(f"Average iterations before convergence/stopping: {average_iterations:.2f}")
# print(f"Average elapsed time: {sum_elapsed_time / avg_trials:.4f} seconds")

In [None]:
# # Create an instance of HJ_MAD_CoordinateDescent
# hj_mad_cd = HJ_MAD_CoordinateDescent_parallel(f, x_true, delta=delta,
#                     int_samples=int_samples, t_vec=[t_init, t_min, t_max], max_iters=max_iters, tol=tol, alpha=alpha,
#                     beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False,plot=True, verbose=False,rescale0=rescale0,
#                     momentum=momentum,saturate_tol=sat_tol,integration_method='MC')

# # Initialize accumulators for averages
# avg_func_evals = 0
# sum_elapsed_time = 0
# total_iterations = 0  # To store total iterations across trials

# # Run the specified number of trials
# for _ in range(avg_trials):
#     start_time = time.time()  # Record the start time

#     # Execute the HJ_MAD_CD algorithm and retrieve results
#     x_opt_cd_para, coordinate_wise_xk_hist_para, xk_hist_cd_para, xk_error_hist_cd_para, fk_hist_cd_para = hj_mad_cd.run(x0, num_cycles=20)

#     elapsed_time = time.time() - start_time  # Calculate elapsed time
#     sum_elapsed_time += elapsed_time  # Accumulate elapsed time

#     total_iterations += len(xk_error_hist_cd_para)  # Add iterations used in this trial
#     avg_func_evals += len(xk_error_hist_cd_para) * int_samples  # Update average function evaluations

#     print(f"Elapsed time: {elapsed_time:.4f} seconds")  # Print elapsed time for the current trial


# # Compute averages after all trials
# avg_func_evals /= avg_trials  # Average function evaluations per trial
# average_iterations = total_iterations / avg_trials  # Average number of iterations per trial

# # Output results
# print('\n\n avg_func_evals = ', avg_func_evals)
# print(f"Average iterations before convergence/stopping: {average_iterations:.2f}")
# print(f"Average elapsed time: {sum_elapsed_time / avg_trials:.4f} seconds")

In [None]:
int_samples = int(1e6);
# rescale0=1
# delta         = 5e-7
# t_min     = 2e1
# t_max     = 1e6
# t_init    = 2e3#2e1
# delta         = 5e-2
# t_min     = 2e1
# t_max     = 1e5
# t_init    = 2e3#2e1
# theta     = 0.7
HJ_MAD_alg = HJ_MAD(f, x_true, delta=delta,
                    int_samples=int_samples, t_vec=[t_init, t_min, t_max], max_iters=max_iters, tol=tol, alpha=alpha,
                    beta=beta, eta_vec = eta_vec, theta=theta, fixed_time=False, verbose=True,rescale0=rescale0,momentum=0.5,
                    integration_method='MC')
# Initialize accumulators for averages
avg_func_evals = 0
sum_elapsed_time = 0
total_iterations = 0  # To store total iterations across trials

# Run the specified number of trials
for _ in range(avg_trials):
    #x0 = 10*torch.ones(dim, dtype=torch.double)
    start_time = time.time()  # Record the start time

    # Execute the HJ_MAD algorithm and retrieve results
    x_opt_MAD, xk_hist_MAD, tk_hist_MAD, xk_error_hist_MAD, rel_grad_uk_norm_hist_MAD, fk_hist_MAD = HJ_MAD_alg.run(x0)

    elapsed_time = time.time() - start_time  # Calculate elapsed time
    sum_elapsed_time += elapsed_time  # Accumulate elapsed time
    
    total_iterations += len(xk_error_hist_MAD)  # Add iterations used in this trial
    avg_func_evals += len(xk_error_hist_MAD) * int_samples  # Update average function evaluations

    print(f"Elapsed time: {elapsed_time:.4f} seconds")  # Print elapsed time for the current trial

# Compute averages after all trials
avg_func_evals /= avg_trials  # Average function evaluations per trial
average_iterations = total_iterations / avg_trials  # Average number of iterations per trial

# Output results
print('\n\n avg_func_evals = ', avg_func_evals)
print(f"Average iterations before convergence: {average_iterations:.2f}")
print(f"Average elapsed time: {sum_elapsed_time / avg_trials:.4f} seconds")

### Generate Convergence Histories and Optimization Path Plots

In [None]:

title_fontsize = 22
fontsize       = 18
fig1 = plt.figure()

plt.style.use('seaborn-whitegrid')
ax = plt.axes()

ax.semilogy(xk_error_hist_MAD, color='purple', linewidth=3,label='HJ-MAD(MC)');
#ax.semilogy(xk_error_hist_cd_NMC, color='red', linewidth=3,label='HJ-MAD-NMC');
ax.semilogy(xk_error_hist_cd_GHQ, color='blue', linewidth=3,label='HJ-MAD-CD-GHQ');
ax.semilogy(xk_error_hist_cd_MC, color='green', linewidth=3,label='HJ-MAD-CD-MC');
# ax.semilogy(xk_error_hist_EGD[0:len(xk_error_hist_GD)], 'm-', linewidth=3)
#ax.semilogy(xk_error_hist_GD[0:len(xk_error_hist_GD)], 'g-', linewidth=3)
ax.set_title(f'Dims={dim},Func={f_name},\n Adaptive Rescale Factor', fontsize=title_fontsize)
ax.set_xlabel("Iterations", fontsize=title_fontsize)
ax.set_ylabel("Errors", fontsize=title_fontsize)
ax.legend(fontsize=fontsize)
# title_str = 'Relative Errors'
# ax.set_title(title_str, fontsize=title_fontsize)
ax.tick_params(labelsize=fontsize, which='both', direction='in')

# save_str = 'griewank_error_hist.png'
# fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)

In [None]:
fig1 = plt.figure()

plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.semilogy(fk_hist_MAD, color='red', linewidth=3,label='HJ-MAD');
ax.semilogy(fk_hist_cd_NMC, color='red', linewidth=3,label='HJ-MAD-NMC');
ax.semilogy(fk_hist_cd_GHQ, color='blue', linewidth=3,label='HJ-MAD-CD-GHQ');
ax.semilogy(fk_hist_cd_MC, color='green', linewidth=3,label='HJ-MAD-CD-MC');

ax.set_xlabel("Iterations", fontsize=title_fontsize)
ax.set_ylabel("fk", fontsize=title_fontsize)
ax.legend(fontsize=fontsize)
title_str = 'Objective Function Values'
ax.set_title(title_str, fontsize=title_fontsize)
ax.tick_params(labelsize=fontsize, which='both', direction='in')

# save_str = 'griewank_func_hist.png'
# fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)

## 2D Plots


In [None]:
if dim == 2:

  if f_name == 'Levy':
    x0      = -15*torch.ones(dim, dtype=torch.double)
    x_true  = torch.ones(dim, dtype=torch.double)
  else:
    x0      = 10*torch.ones(dim, dtype=torch.double)
    x_true  = torch.zeros(dim, dtype=torch.double)

  surface_plot_resolution = 50
  x = np.linspace(-ax_bry, ax_bry, surface_plot_resolution)
  y = np.linspace(-ax_bry, ax_bry, surface_plot_resolution)

  X, Y = np.meshgrid(x, y)
  n_features = 2

  t_final = t_max

  Z                 = np.zeros(X.shape)
  Z_MAD             = np.zeros(X.shape)

  for i in range(X.shape[0]):
    for j in range(X.shape[1]):
      Z[i,j] = f(torch.FloatTensor([X[i,j],Y[i,j]]).view(1,n_features))  
     

  fig, ax = plt.subplots(1, 1)
  im = ax.contourf(X, Y, Z, 20, cmap=plt.get_cmap('gray'))
  plt.style.use('default')

  title_fontsize = 22
  fontsize       = 15

  ax.plot(np.vstack(xk_hist_cd_MC)[:,0], np.vstack(xk_hist_cd_MC)[:,1], '-o', color='blue',label='HJ-MAD-CD-MC')
  ax.plot(np.vstack(xk_hist_cd_GHQ)[:,0], np.vstack(xk_hist_cd_GHQ)[:,1], 'm-o', label='HJ-MAD-CD-GHQ')
  ax.plot(np.vstack(xk_hist_cd_NMC)[:,0], np.vstack(xk_hist_cd_NMC)[:,1], 'm-o',label='HJ-MAD-CD-NMC')


  ax.plot(x_true[0], x_true[1], 'rx', markeredgewidth=3, markersize=12,label='global min')
  ax.plot(x0[0], x0[1], 'kx', markeredgewidth=3, markersize=12,label='initial guess')

  ax.legend(fontsize=12, facecolor='white', markerfirst=False, loc='lower right')

  ax.set_xlim(-ax_bry,ax_bry)
  cb = plt.colorbar(im)

  # save_loc = 'optimization_paths.png'
  # plt.savefig(save_loc,bbox_inches='tight')
  plt.show()

## Interactive 2D Plot


In [None]:
if dim == 2:
    from mpl_toolkits.mplot3d import Axes3D
    import matplotlib.pyplot as plt
    import numpy as np

    ax_bry_3D_plot = 20
    surface_plot_resolution = 50
    x = np.linspace(-ax_bry_3D_plot, ax_bry_3D_plot, surface_plot_resolution)
    y = np.linspace(-ax_bry_3D_plot, ax_bry_3D_plot, surface_plot_resolution)

    X, Y = np.meshgrid(x, y)
    # Convert PyTorch tensors to NumPy
    # xk_hist_MAD_np = xk_hist_MAD.numpy()
    # coordinate_wise_xk_hist_np = np.vstack(xk_hist_cd)

    # # Ensure z_values are scalars
    # HJ_MAD_f_values = np.array([
    #     f(torch.FloatTensor([[xk_hist_MAD_np[i, 0], xk_hist_MAD_np[i, 1]]])).item()
    #     for i in range(len(xk_hist_MAD_np))
    # ])

    HJ_MAD_CD_f_values = np.array([
        f(torch.FloatTensor([[np.vstack(xk_hist_cd_MC)[i, 0], np.vstack(xk_hist_cd_MC)[i, 1]]])).item()
        for i in range(len(np.vstack(xk_hist_cd_MC)))
    ])

    # Global minimum and initial guess
    if x_true.dim() == 1:
        x_true = x_true.unsqueeze(0)
    global_min_f = f(x_true).item()

    # Initial guess point
    if x0.dim() == 1:
        x0 = x0.unsqueeze(0)
    f_initial = f(x0).item()


In [None]:
if dim == 2:
    import plotly.graph_objects as go

    # Create surface trace
    surface_trace = go.Surface(
        z=Z, x=X, y=Y, colorscale='Viridis', showscale=True, name='Surface'
    )

    # Create optimization paths
    # HJ_MAD_trace = go.Scatter3d(
    #     x=xk_hist_MAD_np[:, 0],
    #     y=xk_hist_MAD_np[:, 1],
    #     z=HJ_MAD_f_values,
    #     mode='lines+markers',
    #     marker=dict(size=5, color='red'),
    #     line=dict(color='red', width=3),
    #     name='HJ-MAD'
    # )

    HJ_MAD_CD_trace = go.Scatter3d(
        x=np.vstack(xk_hist_cd_MC)[:, 0],
        y=np.vstack(xk_hist_cd_MC)[:, 1],
        z=HJ_MAD_CD_f_values,
        mode='lines+markers',
        marker=dict(size=5, color='blue'),
        line=dict(color='blue', width=3),
        name='HJ-MAD-CD'
    )

    # Global minimum point
    global_min_trace = go.Scatter3d(
        x=[x_true[0, 0].item()],
        y=[x_true[0, 1].item()],
        z=[global_min_f],
        mode='markers',
        marker=dict(size=8, color='black', symbol='x'),
        name='Global min'
    )

    # Initial guess point
    initial_guess_trace = go.Scatter3d(
        x=[x0[0, 0].item()],
        y=[x0[0, 1].item()],
        z=[f_initial],
        mode='markers',
        marker=dict(size=8, color='green', symbol='x'),
        name='Initial guess'
    )

    # Combine traces
    fig = go.Figure(data=[surface_trace, HJ_MAD_CD_trace, global_min_trace, initial_guess_trace])

    # Set layout details
    fig.update_layout(
        title="Interactive 3D Optimization Path",
        scene=dict(
            xaxis_title="X-axis",
            yaxis_title="Y-axis",
            zaxis_title="f-axis",
        ),
        margin=dict(l=0, r=0, t=40, b=0),
        legend=dict(
            x=0.02,  # Adjust the x position of the legend
            y=0.98,  # Adjust the y position of the legend
            bgcolor='rgba(255, 255, 255, 0.5)',  # Set background color with transparency
        )
    )

    # Show interactive plot
    fig.show(renderer="notebook")

In [None]:
if dim == 2:

    # Create the 3D plot
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    # Plot the surface
    ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='viridis', edgecolor='none', zorder=1)

    # Plot the HJ-MAD optimization path
    # ax.plot(xk_hist_MAD_np[:, 0], xk_hist_MAD_np[:, 1], HJ_MAD_f_values, '-o', color='red', label="HJ-MAD", zorder=2)

    # Plot the HJ-MAD-CD optimization path
    ax.plot(np.vstack(xk_hist_cd_MC)[:, 0], np.vstack(xk_hist_cd_MC)[:, 1], HJ_MAD_CD_f_values, '-o', color='blue', label="HJ-MAD-CD", zorder=2)

    ax.plot(
        [x_true[0, 0].item()],  # Wrap in list
        [x_true[0, 1].item()],  # Wrap in list
        [global_min_f],  # Wrap in list
        'x', color='black', label="Global min", zorder=3
    )
    ax.plot(
        [x0[0, 0].item()],  # Wrap in list
        [x0[0, 1].item()],  # Wrap in list
        [f_initial],  # Wrap in list
        'x', color='green', label="Initial guess", zorder=3
    )

    # Set view angle
    ax.view_init(elev=50, azim=30)  # Increase the elevation angle to 90 degrees

    # Add labels and legend
    ax.set_xlabel('X-axis')
    ax.set_ylabel('Y-axis')
    ax.set_zlabel('f-axis')
    ax.legend()

    plt.show()