In [7]:
import numpy as np

In [8]:
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [20,20]


def fix_scaling(ax=None):
    if not ax:
        xlim = plt.xlim()
        ylim = plt.ylim()
        d1 = xlim[1] - xlim[0]
        d2 = ylim[1] - ylim[0]
        if d1 > d2:
            plt.ylim((ylim[0] - (d1-d2) / 2, ylim[1] + (d1-d2) / 2))
        else:
            plt.xlim((xlim[0] + (d1-d2) / 2, xlim[1] - (d1-d2) / 2))
    else:
        xlim = ax.get_xlim()
        ylim = ax.get_ylim()
        d1 = xlim[1] - xlim[0]
        d2 = ylim[1] - ylim[0]
        if d1 > d2:
            ax.set_ylim((ylim[0] - (d1-d2) / 2, ylim[1] + (d1-d2) / 2))
        else:
            ax.set_xlim((xlim[0] + (d1-d2) / 2, xlim[1] - (d1-d2) / 2))

In [10]:
from matplotlib.animation import FuncAnimation
from IPython.display import HTML


def animate_trajectory(trajectories, n, f, x_min):
    fig, ax = plt.subplots()
    colors = ['blue', 'orange']
    
    def step(t):
        ax.cla()
        ax.plot([x_min[0]], [x_min[1]], 'o', color='green')
        # Level contours
        delta = 0.025
        x = np.arange(-1, 1, delta)
        y = np.arange(-1, 1, delta)
        X, Y = np.meshgrid(x, y)
        Z = np.zeros_like(X)
        # print(X.shape, Y.shape)
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                Z[i][j] = f([X[i][j], Y[i][j]])
        CS = ax.contour(X, Y, Z, [3.0, 4.0, 5.0], colors=['blue', 'purple', 'red'])

        for traj, color in zip(trajectories, colors):
            ax.plot([u[0] for u in traj[0][:t]], [u[1] for u in traj[0][:t]], color=color)
            ax.plot([u[0] for u in traj[0][:t]], [u[1] for u in traj[0][:t]], 'o', color=color)

        fix_scaling(ax)
        ax.axis('off')

    plt.close()
    return FuncAnimation(fig, step, frames=range(n), interval=600)

In [11]:
NUMBER_OF_STEPS = 9000
PRINT_EACH = 300

In [12]:
from math import exp


def f(x):
    return exp(x[0] + 3 * x[1]) + exp(x[0] - 3 * x[1]) + exp(-x[0])


def f_grad(x):
    return np.array([exp(x[0] + 3 * x[1]) + exp(x[0] - 3 * x[1]) - exp(-x[0]),
                     3 * exp(x[0] + 3 * x[1]) - 3 * exp(x[0] - 3 * x[1])])


def f_grad_l():
    eps = 0.1
    C = 3
    L = eps
    delta = 0.001
    X = np.arange(-1, 1 + delta, delta)

    for i in range(len(X) - 1):
        for y in X:
            x1 = [X[i], y]
            x2 = [X[i + 1], y]
            L = max(L, np.linalg.norm(f_grad(x1) - f_grad(x2)) / delta)
            x1 = [y, X[i]]
            x2 = [y, X[i + 1]]
            L = max(L, np.linalg.norm(f_grad(x1) - f_grad(x2)) / delta)

    return C * L

In [13]:
M = f_grad_l()

In [14]:
def optimal_step(x_start):
    alpha = 1.0 / M
    traj_opt_step = [x_start.copy()]
    cur_x = x_start.copy()
    for i in range(NUMBER_OF_STEPS):
        cur_x = cur_x - alpha * f_grad(cur_x)
        if i % PRINT_EACH == 0:
            traj_opt_step.append(cur_x.copy())

    print(traj_opt_step[-1])
    return [traj_opt_step, 'Optimal step']

In [15]:
def nesterov(x_start):
    traj_nesterov = [x_start.copy()]
    cur_x = x_start.copy()
    alpha = 1.0 / (M + 1.0)
    cur_y = x_start.copy()

    for i in range(NUMBER_OF_STEPS):
        prev_x = cur_x
        cur_x = cur_y - 1.0 / M * f_grad(cur_y)
        prev_alpha = alpha
        alpha = max(np.roots([1, alpha ** 2, -alpha ** 2]))
        beta = prev_alpha * (1 - prev_alpha) / (prev_alpha ** 2 + alpha)
        cur_y = cur_x + beta * (cur_x - prev_x)
        if i % PRINT_EACH == 0:
            traj_nesterov.append(cur_x.copy())

    print(traj_nesterov[-1])
    return [traj_nesterov, 'Nesterov']

In [16]:
x_start = np.array([0.9, 0.7])
trajectories = []
trajectories.append(optimal_step(x_start))
trajectories.append(nesterov(x_start))

[ -3.46573455e-01   9.18596275e-18]


[ -3.46633675e-01  -1.16026822e-17]


In [17]:
from math import log

base_animation = animate_trajectory(trajectories, NUMBER_OF_STEPS // PRINT_EACH, f, np.array([-log(2) / 2, 0]))
HTML(base_animation.to_html5_video())