# Exercise 10.2: Space Mission Planning

In [None]:
include("mplstyle.jl")
include("optimization_library.jl")
include("ad.jl")
include("nbody_simulation.jl")

using Distributions
import Dates
import DelimitedFiles
DF = DelimitedFiles;

| Planet | Mass | Distance from Sun |
| --- | --- | --- |
| Sun | 1.989e30 kg | 0 AU |
| Earth | 5.972e24 kg | 1 AU |
| Mercury | 3.30e23 kg | 0.38 AU |
| Mars | 6.4219e23 kg | 1.52 AU |
| Venus | 4.869e24 kg | 0.72 AU |

In [None]:
labels = ["Sun", "Mercury", "Venus", "Earth", "Mars","Rocket"]

# Masses of bodies in kg
m = [1.989e30, 3.30e23, 4.867e24, 5.972e24, 6.4219e23]

# Distance to the sun in m
d = [0.00, 0.38, 0.72, 1.00, 1.52] * AU

# Speed of bodies in m/s
s = [0.00, 47.9, 35.0, 29.8, 24.1] * 1000

#startpositions
p = [[0,0],[-1,0],[0,1],[1,0],[0,-1]]

#velocitydirection
v = [[0,0],[0,-1],[-1,0],[0,1],[1,0]]

In [None]:
B = [Body(d[i]*p[i], s[i]*v[i], m[i], [0.0,0.0]) for i=1:length(m)]

Δt = 3600.0 * 10
tmax = 2000

simulate_n_bodies(B,Δt, tmax, "solar_system")

In [None]:
# Do a simulation including the rocket
B = [Body(d[i]*p[i], s[i]*v[i], m[i], [0.0,0.0]) for i=1:length(m)]

theta = [10000.0, 1000.0]

# The rocket starts at earth's position and speed plus some offset
push!(B, Body(B[4].pos + ones(2) * 0.01*AU, B[4].speed + theta, 5.0 * 10^4, [0.0,0.0]))
simulate_n_bodies(B,Δt, tmax, "solar_system_with_rocket")

## Optimization Problem

The loss function is given by:

\begin{align*}
l(\theta) = \| \vec r_{\mathrm{Mars}}^{(k)} - \vec r_{\mathrm{Rocket}}^{(k)} \|^2 + \| \vec r_{\mathrm{Earth}}^{(l)} - \vec r_{\mathrm{Probe}}^{(l)} \|^2,\qquad \mathrm{with}\; l>k
\end{align*}
with $\theta$ the initial speed vector of the rocket that is added to the speed vector of earth.

In [None]:
function trajectory_loss(theta::Vector{T}) where T
    B = [Body{T}(d[i]*p[i], s[i]*v[i], m[i], [0.0,0.0]) for i=1:length(m)]
    
    # The rocket starts at earth's position and speed plus some offset
    push!(B, Body{T}(B[4].pos + ones(2) * 0.01*AU, B[4].speed + theta, 5.0 * 10^4, [0.0,0.0]))
    
    dist_mars = zeros(T, tmax)
    dist_earth = zeros(T, tmax)
    for t=1:tmax
        simulate_step!(B, Δt,t)
        dist_mars[t] = norm2(B[5].pos - B[6].pos)
        dist_earth[t] = norm2(B[4].pos - B[6].pos)
    end
    min_mars, idx = findmin(dist_mars)
    min_earth = minimum(dist_earth[idx:end])
    return min_mars + min_earth
end

## Loss function topologie

To get a feeling for the problem we will take a look on the loss in our parameterspace given by $\theta$.

In [1]:
n_points = 200 #The more points, the higher the resolution of the graph. You maybe want to reduce the number of points to reduce runtime
θ1_plot = range(-500000,500000,length = n_points)
θ2_plot = range(-500000,500000,length = n_points)

-500000.0:5025.125628140703:500000.0

As each loss evaluation implies a simulation, computing the data to plot the loss topologie might take a while, therefore lets save the data just in case.

In [None]:
using DelimitedFiles
losses_plot = [trajectory_loss([θ1,θ2]) for θ1 = θ1_plot, θ2 = θ2_plot]
writedlm( "loss_plot_lecture_problem.csv",  losses_plot, ',')

If your notebook kernel dies or you computed the values in the past, just load in the results from last time.

In [None]:
using DelimitedFiles
losses_plot = readdlm("loss_plot.csv",',')

In [None]:
#plotting
# create grid points
xgrid = repeat(θ1_plot',n_points,1)
ygrid = repeat(θ2_plot,1,n_points);

fontsize = 13

using3D()
pygui(true);
fig = figure("pyplot_surfaceplot",figsize=(15,10))
ax = fig.add_subplot(1,1,1,projection="3d")
plot_surface(xgrid', ygrid', losses_plot, rstride=2,
             edgecolors="k", cstride=2, cmap=ColorMap("jet"), alpha=0.3, linewidth=0.25)
PyPlot.zscale("log")
xlabel(L"\theta_1", fontsize=fontsize)
ylabel(L"\theta_2", fontsize=fontsize)
zlabel(L"loss", fontsize=fontsize)
#show()

In [None]:
# ========================================================================================
# Define the gradient function of trajectory_loss() using the provided methods in ad.jl
# ========================================================================================

In [None]:
# ===========================================================================
# Minimize the loss function using the gradient_descent function from optimization_library.jl
# Hint: Since the loss function is not convex, there is no guarantee that you found the global minimum. However, 
# sometimes a "more optimal" solution than the starting point is already sufficient.
# Run the simulation using your optimal solution. Do you reach Mars and come back to earth within the simulation time?
# We found the minimum in our tests for [-15073.550552647714, -55121.971458839835], if you find a better solution let us know ;D
# ===========================================================================

# Exercise 10.3: Neural Network

In [None]:
using MLDatasets
using Images
using ReverseDiff

In [None]:
train_x, train_y = MNIST.traindata()
test_x,  test_y  = MNIST.testdata();

In [None]:
# Plot the k-th image
k = 19
println(train_y[k])
Plots.plot(Gray.(train_x[:,:,k]'))

In [None]:
# define the sigmoid activation function
sigmoid(z) = 1.0 / (1.0 + exp(-z))

# define the softmax function
function softmax(z)
    res = exp.(z .- maximum(z))
    return res / sum(res)
end

In [None]:
# The input is a 28x28 matrix
# The output is a 10-vector
function classify_mnist(theta, im)    
    W1 = reshape(theta[1:100352], (128,28*28))
    b1 = theta[100353:100480]
    layer1(x) = sigmoid.(W1 * x + b1)
    
    W3 = reshape(theta[100481:101760], (10,128))
    b3 = theta[101761:101770]
    layer2(x) = softmax(W3 * x + b3)
    
    return layer2(layer1(vec(im)))
end

In [None]:
training_cycles = 10^3; 
learning_rate = 0.1;
batchsize = 128

function loss_minibatch(theta)
    loss = 0.0
    for i=1:batchsize
        n = rand(1:length(train_y))
        res = classify_mnist(theta, train_x[:,:,n])
        loss += -log(res[train_y[n]+1])
    end
    return loss
end

In [None]:
# Define the gradient function of loss_minibatch by using Reverse Mode AD
grad_loss_minibatch(theta) = ReverseDiff.gradient(loss_minibatch, theta)

In [None]:
# ADAM is an improved Gradient Descent for Neural Networks
# Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization.
function adam!(df, θ; iters=1000, α=0.001, β1 = 0.9, β2=0.999, ϵ=10.0^(-8))
    m = zeros(size(θ))
    v = zeros(size(θ))
    mhat = zeros(size(θ))
    vhat = zeros(size(θ))
    for t=1:iters
        print(".")
        g = df(θ)
        m[:] = β1 * m[:] + (1-β1) * g[:]
        v[:] = β2 * v[:] + (1-β2) * g[:].^2
        mhat[:] = m / (1-β1^t)
        vhat[:] = v / (1-β2^t)
        θ[:] = θ[:] .- (α * mhat ./ (sqrt.(vhat) .+ ϵ)) 
    end
end;

In [None]:
# start with zero for the model parameters
# otherwise the first sigmoid layer will be "all zeros"
theta = rand(101770);

# train
adam!(grad_loss_minibatch, theta, iters=500, α=0.05)

In [None]:
# write into file
DF.writedlm("theta.csv", theta, ',');

In [None]:
# read from file as a shortcut
theta2 = DF.readdlm("theta.csv");

In [None]:
# Plot the k-th image
k = rand(1:length(test_y))
res = classify_mnist(theta2, test_x[:,:,k])
println("Selected Sample: ", k)
println("Prediction Distribution: ", res)
println("Predicted Category: ", argmax(res)-1)
Plots.plot(Gray.(test_x[:,:,k]'))