## *You need to finish code where "..." exists and add some Markdown cells to give answers based on the outputs when necessary*

In [2]:
# Helper function
import time

def timeit(f):

    def timed(*args, **kw):

        ts = time.time()
        result = f(*args, **kw)
        te = time.time()

        print(f'func:{f.__name__} took: {te-ts:.4f} sec')
        return result

    return timed

# Question 1

In [None]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

def func(X):
    x, y = X
    return ...

def first_derivative(X):
    ...

def second_derivative(X):
    ...

## (a)

*For debugging*: new point should be $(0.15,0.9)$

In [None]:
original_point = ...
new_point = ... 

if ...:
    print("This is a good step")
else:
    print("This is a bad step")


## (b)

*For debugging*: Take 41 steps to converge. Converge to point [-0.99999852,  0.99999607] with value -2.999999999985186.  took: 0.0020 sec

In [None]:
def steepest_descent(func, first_derivative, starting_point, step_size, tol):
    """
    Steepest Descent
    
    Parameters
    ----------
    func: Callable
        Function to be minimized
    first_derivative: Callable
        First derivative of the function to be minimized
    starting_point: np.ndarray
        Starting point of minimization
    step_size: float
        Size of each gradient descent step
    tol: float
        If the norm of the gradient is smaller than tol, the minimization will terminate
    
    Returns
    -------
    res: dict
        Optimization result
    """
    deriv = first_derivative(starting_point)
    count = 0
    visited = []
    while np.linalg.norm(deriv) > tol and count < 1e6:
        # calcualte new position
        new_point = ...
        if func(new_point) < func(starting_point):
            # the step makes function evaluation smaller - it is a good step. what do you do?
            ...
        else:
            # the step makes function evaluation larger - it is a bad step. what do you do?
            ...
        count += 1
    return {
        "x": starting_point,
        "evaluation": func(starting_point),
        "path": np.array(visited)
    }

Funtion for drawing path:

In [None]:
def draw_path(func, path, x_min=-2, x_max=2, y_min=-2, y_max=2):
    a = np.linspace(x_min, x_max, 100)
    b = np.linspace(y_min, y_max, 100)
    x, y = np.meshgrid(a, b)
    z = func((x, y))
    fig, ax = plt.subplots()
    contour = ax.contour(x, y, z, 50)
    plt.colorbar(contour)
    ax.plot(path[:, 0], path[:, 1], color='red')
    print("Length of the path:", ...)

In [None]:
# optimization & draw path
...

## (c)

In [None]:
# CG/BFGS optimization with scipy
...

## Question 2

In [None]:
def Rosenbrock(X):
    return ...

def Rosenbrock_grad(X):
    return ...

## (a)

In [None]:
original_point = ...
# SD minimization & draw path
...

## (b)

In [None]:
def sgd(func, first_derivative, starting_point, step_size, tol, stochastic_injection=0):
    """
    Stochastic Gradient Descent
    
    Parameters
    ----------
    func: Callable
        Function to be minimized
    first_derivative: Callable
        First derivative of the function to be minimized
    starting_point: np.ndarray
        Starting point of minimization
    step_size: float
        Size of each gradient descent step
    tol: float
        If the norm of the gradient is smaller than tol, the minimization will terminate
    stochastic_injection: int
        Enable stochastic gradient (set to 1) or not (set to 0).
    
    Returns
    -------
    res: dict
        Optimization result
    """
    # evaluate the gradient of the starting point at first
    deriv = ...
    count = 0
    visited = []
    while np.linalg.norm(deriv) > tol and count < 1e6:
        if stochastic_injection > 0:
            # formulate a stochastic_deriv (random vector) 
            # that is the same norm as your gradient 
            stochastic_deriv = ...
        else:
            stochastic_deriv = np.zeros(len(starting_point))
        
        direction = -(deriv + stochastic_injection * stochastic_deriv)
        # new position
        new_point = ...

        if func(new_point) < func(starting_point):
            # good step
            ...
            step_size = ...
        else:
            # bad step
            step_size = ...
        count += 1
    return {
        "x": starting_point,
        "evaluation": func(starting_point),
        "path": np.array(visited)
    }

*For debugging*: This is a stochastic method so your outputs may vary. For SGD, it takes ~1700 steps to converge and it takes ~0.1 sec

In [None]:
original_point = np.array([-0.5, 1.5])
# SGD optimization and draw path
...

## (c)

In [None]:
# CG/BFGS with scipy
...

## (d)

## (e)

In [None]:
def statistics_test(method, args, times, global_minimum=None):
    result = []
    for n in range(times):
        res = method(*args)
        
        # If the norm of the differnce vector between the global minimum
        # and the resulting point is larger than 1e-3, the point will not
        # be considered as a global minimum
        if (global_minimum is not None) and (...):
            continue
        result.append(...)
    
    msg = f"Running {times} times: reach global minimum {len(result)} times"
    if len(result) > 0:
        avg = ... # calculate the average
        std = ... # calculate the standard deviation
        msg += f", average {int(avg)} steps with variance {std:.2f}"
    print(msg)

starting_points = [
    original_point,
    np.array([0.0, 1.0]),
    np.array([-1.0, 1.0]),
    np.array([1.5, 0.5])
]

In [None]:
# test different minimization algorithms for different starting points
...

# Question 3

In [None]:
def Camel(X):
    return ...

def Camel_grad(X):
    return ...

## (a)

In [None]:
original_point = np.array([-1.5, -1.5])

starting_points = [
    original_point,
    np.array([0.0, 1.0]),
    np.array([-1.0, 1.0]),
    np.array([1.5, 0.5])
]

In [None]:
# test different minimization algorithms for different starting points
...

## (b)

*For debugging*: This is a stochastic method so your outputs may vary. For SGDM, it takes ~200 steps to converge and it takes ~0.02 sec

In [None]:
def sgdm(func, first_derivative, starting_point, step_size, tol, stochastic_injection=0, momentum=0):
    """
    Stochastic Gradient Descent with Momentum
    
    Parameters
    ----------
    func: Callable
        Function to be minimized
    first_derivative: Callable
        First derivative of the function to be minimized
    starting_point: np.ndarray
        Starting point of minimization
    step_size: float
        Size of each gradient descent step
    tol: float
        If the norm of the gradient is smaller than tol, the minimization will terminate
    stochastic_injection: int
        Enable stochastic gradient (set to 1) or not (set to 0).
    momentum: float
        Momentum (\eta) value in SGDM algorithm
    
    Returns
    -------
    res: dict
        Optimization result
    """
    
    deriv = first_derivative(starting_point)
    count = 0
    visited = []
    previous_direction = np.zeros(len(starting_point))
    while np.linalg.norm(deriv) > tol and count < 1e6:
        if stochastic_injection > 0:
            # formulate a stochastic_deriv (random vector) 
            # that is the same norm as your gradient 
            stochastic_deriv = ...
        else:
            stochastic_deriv = np.zeros(len(starting_point))
        
        # new direction 
        direction = ...
        # calculate new point, don't forget momentum!
        new_point = ...

        if func(new_point) < func(starting_point):
            # good step
            ...
        else:
            # bad step
            if step_size < 1e-5:
                # step size too small, zero out the previous direction
                # since we know it is a bad direction
                previous_direction = np.zeros(len(starting_point))
            else:
                step_size = ...
        count += 1
    
    return {
        "x": starting_point,
        "evaluation": func(starting_point),
        "path": np.array(visited)
    }

In [None]:
# test SGDM
...