In [5]:
using Plots
import LinearAlgebra
LA = LinearAlgebra

LinearAlgebra

# Exercise 2.2

Find the inverse of matrix B

In [9]:
X = [2 0 2;
     2 0 -2;
     0 1 1]

# ======================================================
# Xinv = 
# ======================================================

3×3 Matrix{Int64}:
 2  0   2
 2  0  -2
 0  1   1

# Exercise 2.4
Implement the backtracking line search algorithm.

In [None]:
# f: objective function
# df: gradient of the objective function
# x: current point
# d: step direction
# p: step size reduction p∈(0,1)
# beta: minimum downward slope is -beta*df(x)

function linesearch(f, df, x, d; p=0.8, beta = 1.0*10^-4)
    
    alpha = 1.0 # initialize step length factor
    
    # Implement the while loop where alpha is reduced using the Armijo Condition as stopping criterion.
    # ======================================================
    # while ...
    #    do something
    # end
    # ======================================================
    
    return alpha
end;

# Exercise 2.5
Implement the gradient descent algorithm

In [None]:
# f: objective function
# df: gradient of the objective function
# x0: initial point
# eps: stopping criterion on the norm of the gradient
# maxiters: max numbers of gradient descent steps

function gradient_descent(f, df, x0; eps=0.0001, maxiters = 1000)
    x = copy(x0) # Make a copy of initial point to prevent changing x0 by manipulating x
    trace = [x; f(x)] # Store initial point and evaluated function at initial point
    
    for _=1:maxiters
        
        # ======================================================
        # 1. Evaluate the gradient to get the descent step direction at the point x.
        # 2. Check if the norm "LA.norm(...)" of the gradient is close enough to zero (precision eps).
        #    If yes then break from the loop.
        # 3. Use linesearch to refine the step length.
        # 4. Take the step and update x.
        # ======================================================

        trace = hcat(trace,[x; f(x)]) # Append the new location to the trace
        @assert trace[end,end] <= trace[end,end-1] # Assert that the function value did not increase
    end
    return x,trace
end;

# Exercise 2.6

Test your implementation at the _marathon training example_ treated in lectures 1 and 2.

Use gradient descent in combination with backtracking line search to fit a linear model to the data from a training for a marathon.

<img src="figures/laufbahn.jpeg" width="350">

We train for a Marathon and measure the covered distance every 10 min. We train only for 2 hours to spare our strength for the contest. How long would we run in a real Marathon?

In this example we assume that we run at an average constant velocity $v$, i.e., the covered distance $d$ scales linearly with time $t$:
\begin{equation}
    p(t) = vt
\end{equation}

In [None]:
# Given Dataset
T = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120] # Time durations in minutes
D = [1.88, 4.47, 5.63, 8.13, 8.54, 11.23, 12.27, 14.23, 15.50, 16.93, 18.69, 21.31]; # Distances in kilometers

In [None]:
# Linear model with parameters t (time) and v (speed / velocity).
# This returns an array of predictions if the first argument is a vector.
function m(t,v)
    # The times are measured in minutes.
    # We convert unit to hours by dividing by 60.
    # So the speed has units of km/h
    return t*(v/60)
end

In [None]:
# Test different values for the velocity to see which one fits best!
# ===============================================
v = 15
# ===============================================

# Plot data and model prediction
scatter(T, D, label="Measurement", xlabel="t", ylabel="d") # Data plot
plot!(T, m(T,v), label="Linear Prediction")                # Add model prediction to the previous plot

The loss function quantifying the deviation between data  $D = \{(t_{1}, d_{1}), (t_{2}, d_{2}),\dots\, (t_{n}, d_{n})\} $ and model $m(t_i,v)$ can be defined as:
\begin{equation}
l(v) = \frac{1}{n}\sum_{i = 1}^n \big( d_{i} - m(t_{i},v) \big)^2 
\end{equation}
For a given dataset the loss function only depends on the model parameters, in our case the velocity $v$.

In [None]:
# Define the loss function
# ===============================================
# loss(v) = ...
# ===============================================

# Define the derivative of the loss function with respect to the velocity v
# ===============================================
# dloss(v) = ...
# ===============================================

In [None]:
# Plot values between 8 and 15 for the velocity

N = 20 # Number of elements for which to test
Vtest  = range(8, 15, length = N)
Ltest  = [loss(v) for v in Vtest]
dLtest = [dloss(v) for v in Vtest]

plot(Vtest, Ltest, label="l", xlabel="v")
plot!(Vtest, dLtest, label="dl")

In [None]:
# Minimize the loss to find the best-fitting velocity

v0 = 2.0 # Starting point
result,trace = gradient_descent(loss, dloss, v0)
duration_rounded = round(42/result, digits = 2)

println("Optimal velocity [km/h]: ", result)
println("Optimal loss: ", trace[end,end])
println("Number of Gradient Steps: ", size(trace,2))
println("A full marathon of 42 km would take ", duration_rounded, " hours")