# Homework 3.2
- Jaan Tollander de Balsch
- 452056

## (a)

In [1]:
using ForwardDiff
using LinearAlgebra

## Functions to compute gradient and hessian. You can use these to complete parts of the code.
∇(f,x) = ForwardDiff.gradient(f, x)
H(f,x) = ForwardDiff.hessian(f, x)

H (generic function with 1 method)

## Line search : Golden Section method

### Input parameters: 

$\begin{align*}
&\theta: \text{ line search function}\\
&a: \text{ initial lower bound}\\ 
&b: \text{ initial upper bound}
\end{align*}$

In [2]:
##  Θ: line search function
##  a: initial lower bound
##  b: initial upper bound
function golden_ls(θ, a, b)
    l  = 1e-10                    # Tolerance (length of uncertainty)
    α  = 1/Base.MathConstants.φ   # φ = golden ratio. Here α ≈ 0.618
    
    λ  = a + (1-α)*(b - a)        # NOTE: We do not need to index a, b, λ, and μ like in the lecture 5 pseudocode
    μ  = a + α*(b - a)            #       Instead, we can keep reusing and updating the same variables for notational convenience
    
    θμ = θ(a + α*(b - a))         # Use this variable to compute function values Θ(μₖ₊₁) as in the pseudocode of Lecture 5
    θλ = θ(a + (1 - α)*(b - a))   # Use this variable to compute function values Θ(λₖ₊₁) as in the pseudocode of Lecture 5

    
    ## TODO: Implement what should be inside the while loop of Golden Section method
    while b - a > l
        if θλ > θμ
            a = λ
            λ = μ
            μ = a + α*(b - a)
            θλ = θμ
            θμ = θ(μ)
        else
            b = μ
            μ = λ
            λ = a + (1 - α)*(b - a)
            θμ = θλ
            θλ = θ(λ)
        end
    end
    
    return (a + b)/2              # Finally, the function returns the center point of the final interval
end

golden_ls (generic function with 1 method)

## Gradient Descent

### Parameters

$\begin{align*}
f:& \text{ function to minimize}\\
x:& \text{ empty solution vector with starting point } x[1,:] = [x_1^0, x_2^0]\\ 
N:& \text{ maximum number of iterations}
\end{align*}$

### Keyword arguments

$\begin{align*}
\epsilon:& \text{ stopping criterion tolerance}\\
a:& \text{ initial lower bound in Golden Section line search}\\
b:& \text{ initial upper bound in Golden Section line search}
\end{align*}$

### Output:

$\begin{align*}
x:& \text{ solution vector containing points } x^k \text{ at each iteration } 1,\dots,k\\
f(x):& \text{ objective function values at each iteration}\\
k:& \text{ total number of iterations}
\end{align*}$

In [3]:
## Gradient Descent 
function Gradient(f, x, N; ϵ = 1e-6, a = a₀, b = b₀)
    
    for k = 1:N-1                   # Main iteration loop
        
        ∇f = ∇(f, x[k,:])          # Gradient at iteration k
        
        if norm(∇f) < ϵ                              # Stopping condition: norm of the gradient < tolerance
            return (x[1:k,:], f.(x[i,:] for i = 1:k), k-1) # Return iteration points, function values, and number of iterations
        end
        
        ## TODO: set the Gradient Descent direction
        d = - ∇f / norm(∇f)
        
        θ(λ) = f(x[k,:] + λ*d)      # Define the line search function 
        λ    = golden_ls(θ, a, b)   # Call Golden Section method to compute optimal step size λ  

        ## TODO: Update the solution x[k+1,:] at this iteration accordingly
        x[k+1,:] = x[k,:] + λ*d                 
        
    end
    
    return (x, f.(x[i,:] for i = 1:N), N)           # Return iteration points, function values, and number of iterations
end

Gradient (generic function with 1 method)

## Newton's method

### Parameters

$\begin{align*}
f:& \text{ function to minimize}\\
x:& \text{ empty solution vector with starting point } x[1,:] = [x_1^0, x_2^0]\\ 
N:& \text{ maximum number of iterations}
\end{align*}$
 
### Keyword arguments

$\begin{align*}
\epsilon:& \text{ stopping criterion tolerance}\\
a:& \text{ initial lower bound in Golden Section line search}\\
b:& \text{ initial upper bound in Golden Section line search}
\end{align*}$

### Output:

$\begin{align*}
x:& \text{ solution vector containing points } x^k \text{ at each iteration } 1,\dots,k\\
f(x):& \text{ objective function values at each iteration}\\
k:& \text{ total number of iterations}
\end{align*}$

In [4]:
function Newton(f, x, N; ϵ = 1e-6, a = a₀, b = b₀)
    
    for k = 1:N-1                    # Main iteration loop
        
        ∇f = ∇(f, x[k,:])         # Gradient at iteration k
        
        if norm(∇f) < ϵ                              # Stopping condition: norm of the gradient < tolerance
            return (x[1:k,:], f.(x[i,:] for i = 1:k), k-1)  # Return iteration points, function values, and number of iterations
        end
        
        ## TODO: Update the newton direction
        # d = -inv(H(f, x[k,:]))*∇f
        d = -H(f, x[k,:])\∇f
        
        θ(λ) = f(x[k,:] + λ*d)     # Define the line search function 
        λ = golden_ls(θ, a, b)    # Call Golden Section method to compute optimal step size λ  

        ## TODO: Update the solution x[k+1,:] at this iteration accordingly
        x[k+1,:] = x[k,:] + λ*d              
    
    end
    
    return (x,  f.(x[i,:] for i = 1:N), N)          # Return iteration points, function values, and number of iterations
end

Newton (generic function with 1 method)

## Conjugate Gradient

### Parameters

$\begin{align*}
f:& \text{ function to minimize}\\
x:& \text{ empty solution vector with starting point } x[1,:] = [x_1^0, x_2^0]\\ 
N:& \text{ maximum number of iterations}
\end{align*}$

### Keyword arguments

$\begin{align*}
\epsilon:& \text{ stopping criterion tolerance}\\
a:& \text{ initial lower bound in Golden Section line search}\\
b:& \text{ initial upper bound in Golden Section line search}
\end{align*}$

### Output:

$\begin{align*}
x:& \text{ solution vector containing points } x^k \text{ at each iteration } 1,\dots,k\\
f(x):& \text{ objective function values at each iteration}\\
k:& \text{ total number of iterations}
\end{align*}$

In [5]:
function Conjugate_Gradient(f, x, N; ϵ = 1e-6, a = a₀, b = b₀)
      
    α = 0              # Coefficient for Fletcher-Reeves update
    k = 1              # Iteration number  
    n = size(x, 2)     # Dimension of x
    d = -∇(f, x[1,:])  # Initial direction vector
    
    while k <= N-1     # Go through max iterations N and return if at optimum 
        
        for j = 1:n    # Go through each element of x. NOTE: We do not need to use y variables. Instead, 
                       # we can use the empty values in the x variable vector 

            θ(λ) = f(x[k,:] + λ*d)   # Define the line search function 
            λ = golden_ls(θ, a, b)   # Call Golden Section method to compute optimal step size λ  
            
            ## TODO: Update the value of x[k+1,:] accordingly
            x[k+1,:] = x[k,:] + λ*d
           
            ## TODO: Compute value of α using the Fletcher-Reeves update formula
            α = norm(∇(f, x[k+1,:]))^2 / norm(∇(f, x[k,:]))^2
            ## TODO: Set the direction vector accordingly
            d = -∇(f, x[k+1,:]) + α*d
            
            k = k + 1   # Update number of iterations for the y values (here we use x vector instead as mentioned earlier)
            
        end
        
        d = -∇(f, x[k,:])
        
        if norm(d) < ϵ                               # Stopping condition: norm of the gradient < tolerance
            return (x[1:k,:], f.(x[i,:] for i = 1:k), k-1)  # Return iteration points, function values, and number of iterations
        end
        
    end
    
    return (x, f.(x[i,:] for i = 1:N), N)                               # Return iteration points, function values, and number of iterations
    
end

Conjugate_Gradient (generic function with 1 method)

### Functions to minimize

In [6]:
f(x) = 0.26*(x[1]^2 + x[2]^2) - 0.48*x[1]*x[2] 
g(x) = exp(x[1] + 3*x[2] - 0.1) + exp(x[1] - 3*x[2] - 0.1) + exp(-x[1] - 0.1) 

g (generic function with 1 method)

## (b)

### Test Function 1

In [7]:
N  = 10000
x  = zeros(N,2)
a₀ = -25.0
b₀ =  25.0
x[1,:] = [7.0, 3.0]
(xg, fg, kg) = Gradient(f, x, N)
## NOTE: The optimal solution is (0,0) and the optimal cost is 0.

([7.0 3.0; 4.77889 4.81727; … ; 9.29564e-6 3.98399e-6; 6.3462e-6 6.39716e-6], [5.0, 0.921232, 0.169733, 0.0312727, 0.00576185, 0.0010616, 0.000195594, 3.60372e-5, 6.63968e-6, 1.22333e-6, 2.25393e-7, 4.15278e-8, 7.65128e-9, 1.40971e-9, 2.59722e-10, 4.78508e-11, 8.81689e-12, 1.62458e-12], 17)

In [8]:
N  = 10000
x  = zeros(N,2)
a₀ = -25.0
b₀ =  25.0
x[1,:] = [7.0, 3.0]
(xn, fn, kn) = Newton(f, x, N)
## NOTE: The optimal solution is (0,0) and the optimal cost is 0.

([7.0 3.0; 1.79593e-10 7.69704e-11], [5.0, 3.29111e-21], 1)

In [9]:
N  = 10000
x  = zeros(N,2)
a₀ = -25.0
b₀ =  25.0
x[1,:] = [7.0, 3.0]
(xc, fc, kc) = Conjugate_Gradient(f, x, N)
## NOTE: The optimal solution is (0,0) and the optimal cost is 0.

([7.0 3.0; 4.77889 4.81727; -6.2293e-8 5.09639e-8], [5.0, 0.921232, 3.20806e-15], 2)

## (c)

Because the Newton's method relies on the second order approximation (Hessian) it only requires one iterations for solving (minimize) a second order function.

## (d)

### Test Function 2

In [10]:
N  = 10000
x  = zeros(N,2)
a₀ = -25.0
b₀ =  25.0
x[1,:] = [-4.0, -2.0]
(xg, fg, kg) = Gradient(g, x, N)
## NOTE: The optimal solution is approximately (-0.346574, 0.0) with cost 2.55927

([-4.0 -2.0; -0.0158711 -0.12925; … ; -0.346573 -2.03498e-7; -0.346573 2.69534e-8], [56.0884, 2.83608, 2.64966, 2.59035, 2.56854, 2.5621, 2.56009, 2.55951, 2.55934, 2.55929  …  2.55927, 2.55927, 2.55927, 2.55927, 2.55927, 2.55927, 2.55927, 2.55927, 2.55927, 2.55927], 24)

In [11]:
N  = 10000
x  = zeros(N,2)
a₀ = -25.0
b₀ =  25.0
x[1,:] = [-4.0, -2.0]
(xn, fn, kn) = Newton(g, x, N)
## NOTE: The optimal solution is approximately (-0.346574, 0.0) with cost 2.55927

([-4.0 -2.0; -0.742904 0.171378; … ; -0.346958 0.000193064; -0.346574 -1.02522e-8], [56.0884, 2.87923, 2.56265, 2.55927, 2.55927], 4)

In [12]:
N  = 10000
x  = zeros(N,2)
a₀ =  -25.0
b₀ =   25.0
x[1,:] = [-4.0, -2.0]
(xc, fc, kc) = Conjugate_Gradient(g, x, N)
## NOTE: The optimal solution is approximately (-0.346574, 0.0) with cost 2.55927

([-4.0 -2.0; -0.0158711 -0.12925; … ; -0.346572 -3.72666e-8; -0.346574 -1.94385e-8], [56.0884, 2.83608, 2.65576, 2.5884, 2.55938, 2.5593, 2.55927, 2.55927, 2.55927], 8)