# Descent Methods

In [25]:
using ForwardDiff: gradient
using LinearAlgebra: norm

In [19]:
function backtracking(f, x, Δx; t₀=1, β=0.5, α=0.25)
  """
  Performs backtracking line search to find a suitable step length.

  Args:
    f (Function): the objective function
    x (Vector): the current point
    t₀ (Float64): initial step length
    Δx (Vector): the search direction
    β (Float64): backtracking factor. β ∈ (0,1)
    α (Float64): sufficient decrease condition parameter. α ∈ (0, 0.5)
  """

  t = t₀
  fₓ = f(x)

  while f(x + t * Δx) > fₓ + α * t * dot(gradient(f, x), Δx)
    t *= β
  end

  return t
end;

## Gradient Descent

In [13]:
function gradient_descent(f, x0; tol=1e-8, max_iters=1000)
  """
  Performs the gradient descent algorithm with backtracking line search

  Args:
    f (Function): the objective function
    x (Vector): the initial point
    tol (Float64): tolerance to break the loop
    max_iters (Int64): maximum number of iterations
  """
  x = copy(x0)
  f_val = f(x)

  iteration = 0

  for i = 1:max_iters
    iteration = i
    ∇f = gradient(f, x)

    if norm(∇f) < tol
      break
    end

    t = backtracking(f, x, -∇f)

    x = x - t * ∇f
    f_val = f(x)
  end

  return x, f_val, iteration
end;

In [33]:
f(x) = x[1]^2 - x[1]
x₀ = [1000]

x, f_min, iterations = @time gradient_descent(f, x₀, max_iters=1000)
println("x_min = $x at f(x_min)=$f_val ($iter iterations)")

  0.071095 seconds (22.19 k allocations: 1.438 MiB, 99.68% compilation time: 93% of which was recompilation)
x_min = [0.5] at f(x_min)=2.5592666966582254 (13 iterations)


In [34]:
# 2-dimensional function
g(x) = x[1]^2 + x[2]^2 - 1
x₀ = [1.0, 1.0]

x, g_val, iterations = @time gradient_descent(g, x₀, max_iters=1000)
println("x_min = $x at f(x_min)=$g_val ($iter iterations)")

  0.149230 seconds (19.39 k allocations: 1.248 MiB, 99.91% compilation time: 97% of which was recompilation)
x_min = [0.0, 0.0] at f(x_min)=-1.0 (13 iterations)


## Steepest Descent

In [21]:
function steepest_descent(f, x₀, Δxsd, norm; tol=1e-12, max_iters=1000)
  x = copy(x₀)
  f_val = f(x)

  iter = 0
  for i = 1:max_iters
    iter = i
    ∇f = gradient(f, x)

    if norm(∇f) < tol
      break
    end

    Δx = Δxsd(f, x)

    t = backtracking(f, x, Δx, α=0.1, β=0.7)
    x = x + t * Δx
    f_val = f(x)
  end

  return x, f_val, iter

end;

In [28]:
f(x) = exp(x[1] + 3 * x[2] - 0.1) + exp(x[1] - 3 * x[2] - 0.1) + exp(-x[1] - 0.1)
P1 = [2 0; 0 8]
# P2 = [8 0; 0 2]
norm_fn(z) = z' * P1 * z
Δxsd(f, x) = -inv(P1) * gradient(f, x)

x, f_val, iter = steepest_descent(f, [0, 0], Δxsd, norm_fn)

println("x_min = $x at f(x_min)=$f_val ($iter iterations)")

x_min = [-0.346573502949323, 0.0] at f(x_min)=2.5592666966582254 (13 iterations)
