In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as sopt
from matplotlib.colors import LogNorm

In [None]:
def f(x):
    a = 1
    b = 100
    return (a - x[0])**2 + b*(x[1] - x[0]**2)**2

def df(x):
    a = 1
    b = 100
    df1 = -2*(a-x[0]) - 4*b*(x[1]-x[0]**2)*x[0]
    df2 = 2*b*(x[1]-x[0]**2)
    return np.array([df1, df2])

xstar = np.array([1,1])

np.random.seed(1028)
x0 = np.array([-1.4, 1.1])

# Steepest Descent

In [None]:
def sd(x0, xhist=None, maxiter=1000):
    x = x0.copy()
    
    for k in range(maxiter):

        s = -df(x)

        def f1d(alpha):
            return f(x + alpha*s)
        alpha = sopt.golden(f1d)
        alpha = 0.001
        x = x + alpha * s

        xhist.append(x)
    return x

In [None]:
xhist_sd = [x0]
x = sd(x0, xhist_sd)
plt.semilogy([np.linalg.norm(x-xstar) for x in xhist_sd])

In [None]:
plt.figure(figsize=(10,10))
X = np.linspace(-1, 1, 30)
Y = np.linspace(-1, 1, 30)
X, Y = np.meshgrid(X, Y)
XY = np.vstack((X.ravel(), Y.ravel()))
Z = f(XY).reshape(X.shape)
plt.contourf(X, Y, Z, levels=30, cmap='plasma')
plt.plot(xstar[0], xstar[1], 'm*', ms=10, zorder=10)

xhist = np.array(xhist_sd).T
plt.plot(xhist[0], xhist[1], 'wo-', ms=5)

# BFGS

In [None]:
def bfgs(x0, errors=None, xhistory=None):
    x = x0.copy()
    B = np.eye(2)
    C = np.eye(2)
    
    for k in range(100):

        s = -C @ df(x)

        def f1d(alpha):
            return f(x + alpha*s)
        alpha = sopt.golden(f1d)
        alpha = 0.01
        xnew = x + alpha * s
        
        y = df(xnew) - df(x)
        
        Bnew = B + (1/np.dot(y, s))*np.outer(y, y) - (1/np.dot(B@s, s))*np.outer(B@s, B@s)
        
        u = s - C @ y
        Cnew = C + (1/np.dot(s,y))*np.outer(u, s) + (1/np.dot(s,y))*np.outer(s, u) - (np.dot(y,u)/np.dot(s,y)**2)*np.outer(s,s)
        
        B = Bnew
        x = xnew
        C = Cnew

        errors.append(np.linalg.norm(x - xstar))
        xhistory.append(x)
        if errors[-1] < 1e-12:
            return x
    return x

In [None]:
errors_bfgs = []
xhist_bfgs = [x0]
x = bfgs(x0, errors_bfgs, xhist_bfgs)
plt.semilogy(errors_bfgs)

In [None]:
plt.figure(figsize=(10,10))
X = np.linspace(-4, 4, 30)
Y = np.linspace(-4, 4, 30)
X, Y = np.meshgrid(X, Y)
XY = np.vstack((X.ravel(), Y.ravel()))
Z = f(XY).reshape(X.shape)
plt.contourf(X, Y, Z, levels=30, cmap='plasma')
plt.plot(xstar[0], xstar[1], 'm*', ms=10, zorder=10)

xhist = np.array(xhist_bfgs).T
plt.plot(xhist[0], xhist[1], 'wo-', ms=15)

In [None]:
import torch

In [None]:
# https://docs.pytorch.org/docs/stable/optim.html#optimizer-step-closure
x = torch.tensor([-1.4, 1.1], requires_grad=True)
optimizer = torch.optim.SGD([x])

xhist_torch = [x.detach().tolist()]

def closure():
    optimizer.zero_grad()
    loss = f(x)
    loss.backward()
    xhist_torch.append(x.detach().tolist())
    return loss

for i in range(1000):
    optimizer.step(closure)
# optimizer.step(closure)

print("Optimized x:", x)
print("Minimum value of the function:", f(x))

In [None]:
plt.semilogy([np.linalg.norm(x-xstar) for x in xhist_torch])

In [None]:
# https://docs.pytorch.org/docs/stable/optim.html#optimizer-step-closure
x = torch.tensor([-1.4, 1.1], requires_grad=True)
optimizer = torch.optim.LBFGS([x], max_iter=1000, lr=0.01, history_size=1)

xhist_torch = [x.detach().tolist()]

def closure():
    optimizer.zero_grad()
    loss = f(x)
    loss.backward()
    xhist_torch.append(x.detach().tolist())
    return loss

optimizer.step(closure)

print("Optimized x:", x)
print("Minimum value of the function:", f(x))

In [None]:
plt.semilogy([np.linalg.norm(x-xstar) for x in xhist_torch])