In [2]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
from Optimizer_Scripts.optimizers import Adam, Momentum, NesterovMomentum, GradientDescent
import matplotlib.animation as ani
from Optimizer_Scripts.Delayer import Delayer
from Optimizer_Scripts.AdaptiveDelayer import AdaptiveDelayer
import time
import multiprocessing as mp
import dill
import itertools
from Optimizer_Scripts.functions import ackley_gen, rastrigin_gen, ackley_deriv_gen, rast_deriv_gen
from Optimizer_Scripts.functions import adaptive_function, himmelblau, himmelblau_grad

In [3]:
%matplotlib notebook

#### Helper Functions

In [4]:
def animate_2d_conv(loss, time_series, x_range, y_range, ints, title):
    m,n = np.shape(time_series)
    #create the grid and get the z_vals
    x = np.linspace(x_range[0], x_range[1], ints, endpoint=True)
    y = np.linspace(y_range[0], y_range[1], ints, endpoint=True)
    X,Y = np.meshgrid(x,y)
    plt.clf()
    plt.ioff()
    Z = loss(np.array([X, Y])).reshape(ints,ints)
    fig = plt.figure(1)
    #plot the base function
    ax = fig.add_subplot(111)
    ax.contour(X,Y,Z, levels=50, cmap='CMRmap')
    plt.title(title)
    #initialize the particle plot
    traj, = plt.plot([],[],color='g')
    particle, = plt.plot([],[], marker='o', markersize=10, color='g', label="Optimizer")
    #update function for the animation
    def update(i):
        traj.set_data(time_series[:i,0],time_series[:i,1])
        particle.set_data(time_series[i,0],time_series[i,1])
        return [traj,particle]
    #now get the animation
    ax.legend()
    animation = ani.FuncAnimation(fig, update, frames=range(m), interval=100)
    animation.save('{}.mp4'.format(title))
    fig.clf()

### Himmeblau Normal Test

In [5]:
#compute the time series
params = dict()
max_iter = 500
tol = 1e-5
loss_function = himmelblau
grad = himmelblau_grad
n = 2
x_init = np.array([6.2,5.1])
params['learning_rate'] = [0.01 for i in range(max_iter+1)]
optimizer = GradientDescent(params)
optimization = Delayer(n, optimizer, loss_function, grad, x_init)
optimization.compute_time_series(tol=tol, maxiter=max_iter, use_delays=False)
#get the time series
time_series = optimization.time_series
final_state = optimization.final_state
animate_2d_conv(loss_function, time_series, [-6,6], [-6,6], 200, "Himmelblau Test")

<video width=520 height=520 src="Himmelblau Test.mp4" controls>

### Rastrigin Normal Test

In [8]:
#compute the time series
params = dict()
max_iter = 100
tol = 1e-5
n = 2
loss_function = rastrigin_gen(n)
grad = rast_deriv_gen(n)
x_init = np.array([-4.3,4.3])
params['learning_rate'] = [0.005 for i in range(max_iter+1)]
optimizer = GradientDescent(params)
optimization = Delayer(n, optimizer, loss_function, grad, x_init)
optimization.compute_time_series(tol=tol, maxiter=max_iter, use_delays=False)
#get the time series
time_series = optimization.time_series
final_state = optimization.final_state
animate_2d_conv(loss_function, time_series, [-5.12,5.12], [-5.12,5.12], 200, "RastriginTest")

<video width=520 height=520 src="RastriginTest.mp4" controls>

### Rastringin Adam

In [9]:
#compute the time series
params = dict()
max_iter = 100
tol = 1e-5
n = 2
loss_function = rastrigin_gen(n)
grad = rast_deriv_gen(n)
x_init = np.array([-4.3,4.3])
params['learning_rate'] = [0.9 for i in range(max_iter+1)]
params['beta_1'] = 0.9
params['beta_2'] = 0.999
optimizer = Adam(params)
optimization = Delayer(n, optimizer, loss_function, grad, x_init)
optimization.compute_time_series(tol=tol, maxiter=max_iter, use_delays=False)
#get the time series
time_series = optimization.time_series
final_state = optimization.final_state
animate_2d_conv(loss_function, time_series, [-5.12,5.12], [-5.12,5.12], 200, "RastriginAdamTest")

<video width=520 height=520 src="RastriginAdamTest.mp4" controls>

### Rastrigin Original Time Delay Test - decreasing max

In [25]:
#compute the time series
params = dict()
max_iter = 500
tol = 1e-5
n = 2
loss_function = rastrigin_gen(n)
grad = rast_deriv_gen(n)
x_init = np.array([-4.3,4.3])
params['learning_rate'] = [0.9 for i in range(max_iter+1)]
params['beta_1'] = 0.9
params['beta_2'] = 0.999
optimizer = Adam(params)
optimization = Delayer(n, optimizer, loss_function, grad, x_init, num_delays=100)
optimization.compute_time_series(tol=tol, maxiter=max_iter, use_delays=True, symmetric_delays=True, shrink=False)
#get the time series
time_series = optimization.time_series
final_state = optimization.final_state
animate_2d_conv(loss_function, time_series, [-5.12,5.12], [-5.12,5.12], 200, "RastriginAdamDelayedTest")

<video width=520 height=520 src="RastriginAdamDelayedTest.mp4" controls>

### Rastrigin Adam Adaptive Delay Test

In [24]:
#compute the time series
params = dict()
max_iter = 500
tol = 1e-5
n = 2
max_L = 6
loss_function = rastrigin_gen(n)
adapt_fun = adaptive_function 
grad = rast_deriv_gen(n)
x_init = np.array([-4.3,4.3])
params['learning_rate'] = [0.8 for i in range(max_iter+1)]
params['beta_1'] = 0.9
params['beta_2'] = 0.999
optimizer = Adam(params)
optimization = AdaptiveDelayer(n, optimizer, loss_function, adapt_fun, grad, x_init, max_L=max_L)
optimization.compute_time_series(tol=tol, maxiter=max_iter, use_delays=True, symmetric_delays=True)
#get the time series
time_series = optimization.time_series
final_state = optimization.final_state
animate_2d_conv(loss_function, time_series, [-5.12,5.12], [-5.12,5.12], 200, "RastriginAdamAdaptTest")

<video width=520 height=520 src="RastriginAdamAdaptTest.mp4" controls>