# IEMS 351 Lab 6, Fall 2024
# Finite-Difference Derivative Approximations 
References: Nocedal, Jorge, and Stephen J. Wright, eds. Numerical optimization (Second Edition). New York, NY: Springer New York, 1999. 

In [None]:
import matplotlib.pyplot as plt
import numpy as np

##  Forward-difference 
Recall that the gradient of $f:\mathbb{R}^n \mapsto \mathbb{R}$ is 
$$
\nabla f(x) = \begin{bmatrix}
\frac{\partial f}{\partial x_1} \\
\vdots \\
\frac{\partial f}{\partial x_n}
\end{bmatrix}.
$$
The forward-difference approximation of $\frac{\partial f}{\partial x_i} (x)$ is 
$$
\frac{\partial f}{\partial x_i} (x) \approx \frac{f(x + \epsilon e_i) - f(x)}{\epsilon}, \quad i = 1,2,\ldots,n.
$$

In [None]:
def forward_difference(f, x, e, epsilon):
    return (f(x + e * epsilon) - f(x)) / epsilon

In [None]:
# Define example function and its true derivative
def f(x):
    return np.sin(x)


def df(x):
    return np.cos(x)

In [None]:
x = 1
epsilon = 0.000001
e = 1
df_estimate = forward_difference(f, x, e, epsilon)
df_correct = df(x)

print("Derivative of f at point x=%e with stepsize epsilon=%e:" % (x, epsilon))
print("Estimate...: %15.8e" % df_estimate)
print("Correct ...: %15.8e" % df_correct)
print("Error   ...: %15.8e" % np.abs(df_correct - df_estimate))

In [None]:
def plot_df_error(f, df, x, smalltest_exponent=8):
    exponents = np.arange(0, smalltest_exponent + 1)
    epsilon = 10.0 ** (-exponents)
    df_correct = df(x)
    df_estimate = forward_difference(f, x, 1, epsilon)
    df_error = np.abs(df_correct - df_estimate)
    # print(df_error)

    plt.yscale("log")
    plt.xscale("log")
    plt.plot(epsilon, df_error, "ro")

    # plt.savefig('test.png', bbox_inches='tight')

In [None]:
# log plot of error
x = 1
plot_df_error(f, df, x)

In [None]:
x = 1
plot_df_error(np.exp, np.exp, 1)

## Central Difference 
$$
\frac{\partial f}{\partial x_i} (x) \approx \frac{f(x + \epsilon e_i) - f(x - \epsilon e_i)}{2\epsilon}, \quad i = 1,2,\ldots,n.
$$

In [None]:
def central_difference(f, x, e, epsilon):
    return (f(x + e * epsilon) - f(x - e * epsilon)) / (2 * epsilon)

In [None]:
x = 0
epsilon = 0.000001
df_estimate = central_difference(f, x, 1, epsilon)
df_correct = df(x)

print("Derivative of f at point x=%e with stepsize epsilon=%e:" % (x, epsilon))
print("Estimate...: %15.8e" % df_estimate)
print("Correct ...: %15.8e" % df_correct)
print("Error   ...: %15.8e" % np.abs(df_correct - df_estimate))

In [None]:
def plot_df_error_central(f, df, x, smalltest_exponent=5):
    exponents = np.arange(0, smalltest_exponent + 1)
    epsilon = 10.0 ** (-exponents)
    df_correct = df(x)
    df_estimate = central_difference(f, x, 1, epsilon)
    df_error = np.abs(df_correct - df_estimate)
    # print(df_error)

    plt.yscale("log")
    plt.xscale("log")
    plt.plot(epsilon, df_error, "ro")
    # plt.savefig('test.png', bbox_inches='tight')

In [None]:
x = 1
plot_df_error_central(f, df, 1)

In [None]:
x = 1
plot_df_error_central(np.exp, np.exp, 1)

## Exercises 
Given a function 
$$
f(x) = 2 (x_2 - x_1^2)^2 + (x_1 - 1)^2
$$
The gradient of $f(x)$ is 
$$
\nabla f(x) = \begin{bmatrix}
8(x_1^3 - x_1 x_2) + 2 (x_1 - 1) \\
4(x_2 - x_1^2)
\end{bmatrix}
$$
The Hessian of $f(x)$ is 
$$
\nabla^2 f(x) = \begin{bmatrix}
24(x_1^2 - x_2) + 2 & -8 x_1 \\
-8 x_1 & 4
\end{bmatrix}
$$

## Exercise 1
Compute the gradients of $f(x)$ at $(0,0)$ and $(1,2)$ using forward-difference formula and then compare them with the analytical counterparts.

In [None]:
# Define example function and its true derivative
def f(x):
    return 2 * (x[1] - x[0] ** 2) ** 2 + (x[0] - 1) ** 2


# Analytical gradient
def df(x):
    grad_x1 = 8 * (x[0] ** 3 - x[0] * x[1]) + 2 * (x[0] - 1)
    grad_x2 = 4 * (x[1] - x[0] ** 2)
    return np.array([grad_x1, grad_x2])


# Analytical Hessian
def df2(x):
    hessian_00 = 24 * (x[0] ** 2 - x[1]) + 2
    hessian_01 = -8 * x[0]
    hessian_10 = -8 * x[0]
    hessian_11 = 4
    return np.array([[hessian_00, hessian_01], [hessian_10, hessian_11]])

Using the forward difference function from earlier. We can estimate the analytical derivative:

In [None]:
def forward_difference_gradient(f, x, epsilon):
    n = len(x)
    grad_approx = np.zeros(n)
    for i in range(n):
        e = np.zeros(n)
        e[i] = 1
        grad_approx[i] = (f(x + epsilon * e) - f(x)) / epsilon
    return grad_approx

In [None]:
x = np.array([0, 0])
epsilon = 0.000001
e = 1
df_estimate = forward_difference_gradient(f, x, epsilon)
df_correct = df(x)

print(
    "Derivative of f at point x=(%s, %s) with stepsize epsilon=%e:" % (x[0], x[1], epsilon)
)
print("Estimate...: %15.8e %15.8e" % (df_estimate[0], df_estimate[1]))
print("Estimate...: %15.8e %15.8e" % (df_correct[0], df_correct[1]))
print("Error L2-Norm...: %15.8e" % np.linalg.norm(np.abs(df_correct - df_estimate)))

In [None]:
x = np.array([1, 2])
epsilon = 0.000001
e = 1
df_estimate = forward_difference_gradient(f, x, epsilon)
df_correct = df(x)

print(
    "Derivative of f at point x=(%s, %s) with stepsize epsilon=%e:" % (x[0], x[1], epsilon)
)
print("Estimate...: %15.8e %15.8e" % (df_estimate[0], df_estimate[1]))
print("Estimate...: %15.8e %15.8e" % (df_correct[0], df_correct[1]))
print("Error L2-Norm...: %15.8e" % np.linalg.norm(np.abs(df_correct - df_estimate)))

In [None]:
def forward_difference_hessian(f, x, epsilon):
    n = len(x)
    hessian_approx = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            e_i = np.zeros(n)
            e_j = np.zeros(n)
            e_i[i] = 1
            e_j[j] = 1
            hessian_approx[i, j] = (
                f(x + epsilon * (e_i + e_j))
                - f(x + epsilon * e_i)
                - f(x + epsilon * e_j)
                + f(x)
            ) / epsilon**2
    return hessian_approx

In [None]:
x = np.array([0, 0])
epsilon = 0.000001
e = 1
df2_estimate = forward_difference_hessian(f, x, epsilon)
df2_correct = df2(x)

print(
    "Hessian of f at point x=(%s, %s) with stepsize epsilon=%e:" % (x[0], x[1], epsilon)
)
print("Estimate...: %15.8e %15.8e" % (df2_estimate[0], df2_estimate[1]))
print("Estimate...: %15.8e %15.8e" % (df_correct[0], df_correct[1]))
print("Error L2-Norm...: %15.8e" % np.linalg.norm(np.abs(df_correct - df_estimate)))

## Exercise 2 
Compute the gradients of $f(x)$ at $(0,0)$ and $(1,2)$ using central-difference formula and then compare them with the analytical counterparts.