
# Gradient Descent — Quadratic and Nonlinear Examples

This notebook reproduces and cleans up your scripts into a **teachable** workflow:
- Quadratic objective \( f(x)=\tfrac12 x^\top A x - b^\top x \) with \(A\succ 0\)
  - constant step and **optimal step** (closed-form) variants
  - 3D surface, gradient field, **contour + trajectory**
  - convergence diagnostics
- Nonlinear objective \( f(x,y)=(x^2-y)^2+(x-1)^2 \) (Rosenbrock-like)
  - constant step and optimal step (secant-like closed form on quadratic model)
  - 3D surface and trajectory on contours

> One plot per figure. Only Matplotlib is used.


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from numpy.linalg import norm

%matplotlib inline

def quadratic_value(A, b, x):
    """f(x) = 1/2 x^T A x - b^T x"""
    return 0.5 * x @ (A @ x) - b @ x

def quadratic_grad(A, b, x):
    return A @ x - b

def contour_and_quiver_for_quadratic(A, b, xlim=(-10,10), ylim=(-10,10), ngrid=100, nvec=12):
    xs = np.linspace(xlim[0], xlim[1], ngrid)
    ys = np.linspace(ylim[0], ylim[1], ngrid)
    X, Y = np.meshgrid(xs, ys)
    Z = np.zeros_like(X)
    for i in range(ngrid):
        for j in range(ngrid):
            Z[i,j] = quadratic_value(A, b, np.array([X[i,j], Y[i,j]]))

    # contour
    plt.figure()
    plt.contour(X, Y, Z, levels=20)
    plt.xlabel('x'); plt.ylabel('y'); plt.title('Quadratic: contours')
    plt.grid(True)
    plt.show()

    # gradient field (sparser grid for arrows)
    xg = np.linspace(xlim[0], xlim[1], nvec)
    yg = np.linspace(ylim[0], ylim[1], nvec)
    Xg, Yg = np.meshgrid(xg, yg)
    U = 2*Xg - Yg - 1    # grad wrt x when A = [[2,-1],[-1,2]] and b=[1,1]
    V = -Xg + 2*Yg - 1
    plt.figure()
    plt.quiver(Xg, Yg, U, V, color='g')
    plt.xlim(xlim); plt.ylim(ylim)
    plt.xlabel('x'); plt.ylabel('y'); plt.title('Quadratic: gradient field')
    plt.grid(True)
    plt.show()



## Gradient Descent Variants

- **Constant step**: \( x_{k+1}=x_k - s\,\nabla f(x_k)\).
- **Optimal step** (quadratic only): along the steepest descent direction \(d_k=-\nabla f(x_k)\),
  the minimizer of \( f(x_k + s d_k) \) has **closed form**:
  \[ s_k^\* = \frac{\|d_k\|^2}{d_k^\top A d_k}.\]


In [None]:
def gd_constant_step(grad, x0, step, tol=1e-6, itmax=10_000, callback=None):
    x = x0.astype(float).copy()
    traj = [x.copy()]
    k=0
    g = grad(x)
    while norm(g) > tol and k < itmax:
        d = -g
        x = x + step*d
        traj.append(x.copy())
        g = grad(x)
        k += 1
        if callback is not None:
            callback(k, x, g)
    return np.array(traj)

def gd_optimal_step_quadratic(A, b, x0, tol=1e-6, itmax=10_000, callback=None):
    x = x0.astype(float).copy()
    traj = [x.copy()]
    k=0
    g = quadratic_grad(A, b, x)
    while norm(g) > tol and k < itmax:
        d = -g
        s = (d @ d)/ (d @ (A @ d))    # exact line-minimizer for quadratic
        x = x + s*d
        traj.append(x.copy())
        g = quadratic_grad(A, b, x)
        k += 1
        if callback is not None:
            callback(k, x, g, s)
    return np.array(traj)



## Demo 1 — Quadratic objective

We use your matrix \(A=\\begin{bmatrix}2&-1\\\\-1&2\\end{bmatrix}\), \(b=(1,1)\).
- Minimizer is solution of \(Ax=b\): \(x^\*=A^{-1}b = (2/3, 2/3)\).
- We show trajectories for constant step and optimal step.


In [None]:
A = np.array([[2., -1.],
              [-1., 2.]])
b = np.array([1., 1.])
x_star = np.linalg.solve(A, b)
print("True minimizer x* =", x_star)

# Visuals
contour_and_quiver_for_quadratic(A, b, xlim=(-7,7), ylim=(-7,7), ngrid=120, nvec=16)

# Trajectories from the same start
x0 = np.array([5., 7.])

traj_cst = gd_constant_step(lambda x: quadratic_grad(A,b,x), x0, step=1e-2, tol=1e-8)
traj_opt = gd_optimal_step_quadratic(A, b, x0, tol=1e-8)

# Plot contours + both trajectories
xs = np.linspace(-10,10,200); ys = np.linspace(-10,10,200)
X, Y = np.meshgrid(xs, ys)
Z = 0.5*(2*X**2 - 2*X*Y + 2*Y**2) - (X + Y)   # expanded form for visuals
plt.figure()
cs = plt.contour(X, Y, Z, levels=25)
plt.plot(traj_cst[:,0], traj_cst[:,1], 'b.-', label='GD const step (s=1e-2)')
plt.plot(traj_opt[:,0], traj_opt[:,1], 'r.-', label='GD optimal step')
plt.plot([x_star[0]], [x_star[1]], 'ko', label='x*')
plt.xlabel('x'); plt.ylabel('y'); plt.title('Quadratic: trajectories on contours')
plt.legend(); plt.show()

# Convergence (distance to optimum)
def dist_to_opt(traj, xopt):
    return np.array([norm(x - xopt) for x in traj])

plt.figure()
plt.semilogy(dist_to_opt(traj_cst, x_star), label='const step')
plt.semilogy(dist_to_opt(traj_opt, x_star), label='optimal step')
plt.xlabel('iteration'); plt.ylabel('||x_k - x*||')
plt.title('Convergence (distance to optimum)')
plt.legend(); plt.show()



### Quadratic 3D surface


In [None]:
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

xs = np.linspace(-5,5,100); ys = np.linspace(-5,5,100)
X, Y = np.meshgrid(xs, ys)
Z = 0.5*(2*X**2 - 2*X*Y + 2*Y**2) - (X + Y)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z)
ax.set_xlabel('x'); ax.set_ylabel('y'); ax.set_zlabel('f(x,y)')
ax.set_title('Quadratic surface')
plt.show()



## Demo 2 — Nonlinear objective

\( f(x,y) = (x^2 - y)^2 + (x-1)^2 \).  
Gradient:
\[\nabla f = \\begin{bmatrix}2(x-1) + 4x(x^2 - y)\\\\ -2(x^2 - y)\\end{bmatrix}.\]

We will try constant step and a **quadratic-model optimal step**:
\[ s_k = \\frac{\\langle g_k, g_k\\rangle}{\\langle g_k, H_k g_k\\rangle} \]
where \(H_k\) is an approximation using local directional curvature via finite differences.
This mimics exact step on a quadratic model (secant-like).


In [None]:
def f_nl(X):
    x, y = X[0], X[1]
    return (x**2 - y)**2 + (x - 1)**2

def grad_nl(X):
    x, y = X[0], X[1]
    return np.array([2*(x-1) + 4*x*(x**2 - y), -2*(x**2 - y)])

def directional_curvature(f, x, d, h=1e-4):
    """Approximate d^T H d by 1D second finite difference of f along direction d."""
    d = d / (norm(d) + 1e-15)
    return (f(x + h*d) - 2*f(x) + f(x - h*d)) / (h**2)

def gd_optimal_step_quadratic_model(f, grad, x0, tol=1e-8, itmax=50):
    x = x0.astype(float).copy()
    traj = [x.copy()]
    k=0
    g = grad(x)
    while norm(g) > tol and k < itmax:
        d = -g
        curv = directional_curvature(f, x, d, h=1e-4)
        if curv <= 1e-12:  # fallback to safe small step
            s = 1e-3
        else:
            s = (d @ d)/curv
            s = np.clip(s, 1e-4, 1.0)  # guardrails
        x = x + s*d
        traj.append(x.copy())
        g = grad(x)
        k += 1
    return np.array(traj)


In [None]:
# Surface
xs = np.linspace(-1.5, 1.5, 120); ys = np.linspace(-1.5, 1.5, 120)
X, Y = np.meshgrid(xs, ys)
Z = (X**2 - Y)**2 + (X - 1)**2
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z)
ax.set_xlabel('x'); ax.set_ylabel('y'); ax.set_zlabel('f')
ax.set_title('Nonlinear surface')
plt.show()

# Contours + trajectories
x0 = np.array([-1.5, -1.5])
traj_cst = gd_constant_step(grad_nl, x0, step=1e-3, tol=1e-10, itmax=10_000)
traj_opt = gd_optimal_step_quadratic_model(f_nl, grad_nl, np.array([2., 7.]), tol=1e-10, itmax=200)

plt.figure()
plt.contour(X, Y, Z, levels=30)
plt.plot(traj_cst[:,0], traj_cst[:,1], 'b.-', label='GD const step (s=1e-3)')
plt.plot(traj_opt[:,0], traj_opt[:,1], 'r.-', label='GD optimal step (quadratic model)')
plt.xlabel('x'); plt.ylabel('y'); plt.title('Nonlinear: trajectories on contours')
plt.legend(); plt.show()

# Value along iterations
def values_along_traj(f, traj):
    return np.array([f(p) for p in traj])

plt.figure()
plt.semilogy(values_along_traj(f_nl, traj_cst), label='const step')
plt.semilogy(values_along_traj(f_nl, traj_opt), label='optimal step (model)')
plt.xlabel('iteration'); plt.ylabel('f(x_k)')
plt.title('Nonlinear: objective decrease')
plt.legend(); plt.show()
