# Symbolic Differentiation

In [1]:
g(x) = 4x*(1-x)*(1+x)^2

g (generic function with 1 method)

In [2]:
using SymPy
x = Sym("x")

x

In [3]:
dg = diff(g)

                                   2                    2
4⋅x⋅(1 - x)⋅(2⋅x + 2) - 4⋅x⋅(x + 1)  + 4⋅(1 - x)⋅(x + 1) 

In [4]:
dg(x=>10)

-17116

In [5]:
dg_simple = simplify(4*x*(1-x)*(2x+2)-4x*(x+1)^2+4*(1-x)*(x+1)^2)

      3       2          
- 16⋅x  - 12⋅x  + 8⋅x + 4

In [6]:
dg_simple(x=>10)

-17116

# Automatic Differentiation

In [7]:
include("ad.jl")

gradient (generic function with 2 methods)

In [8]:
gradient(g,10)

-17116.0

# Exercise 10.2: Space Mission Planning

In [9]:
include("mplstyle.jl")
include("optimization_library.jl")
include("nbody_simulation.jl")

using Distributions
import Dates
import DelimitedFiles
import ForwardDiff
DF = DelimitedFiles;

| Planet | Mass | Distance from Sun |
| --- | --- | --- |
| Sun | 1.989e30 kg | 0 AU |
| Earth | 5.972e24 kg | 1 AU |
| Mercury | 3.30e23 kg | 0.38 AU |
| Mars | 6.4219e23 kg | 1.52 AU |
| Venus | 4.869e24 kg | 0.72 AU |

In [10]:
labels = ["Sun", "Mercury", "Venus", "Earth", "Mars","Rocket"]

# Masses of bodies in kg
m = [1.989e30, 3.30e23, 4.867e24, 5.972e24, 6.4219e23]

# Distance to the sun in m
d = [0.00, 0.38, 0.72, 1.00, 1.52] * AU

# Speed of bodies in m/s
s = [0.00, 47.9, 35.0, 29.8, 24.1] * 1000

#startpositions
p = [[0,0],[-1,0],[0,1],[1,0],[0,-1]]

#velocitydirection
v = [[0,0],[0,-1],[-1,0],[0,1],[1,0]]

5-element Vector{Vector{Int64}}:
 [0, 0]
 [0, -1]
 [-1, 0]
 [0, 1]
 [1, 0]

In [11]:
B = [Body(d[i]*p[i], s[i]*v[i], m[i], [0.0,0.0]) for i=1:length(m)]

Δt = 3600.0 * 10
tmax = 2000

simulate_n_bodies(B,Δt, tmax, "solar_system")

┌ Info: Saved animation to 
│   fn = C:\Users\ben97726\Git-Reps\Optimization\exercises\10_auto_diff\solar_system.gif
└ @ Plots C:\Users\ben97726\.julia\packages\Plots\PomtQ\src\animation.jl:114


([0.0 -5.6847190866e10 … 1.495978707e11 0.0; 7.124682542265387 -5.682057150020203e10 … 1.4959402685986008e11 8.676000018531387e8; … ; -5.407429326116788e6 -5.6596328776543434e10 … -2.5540169487386642e10 2.2343281215432764e11; -5.406752671863626e6 -5.6734675105805046e10 … -2.659584268091357e10 2.235674399405609e11], [0.0 0.0 … 0.0 -2.27388763464e11; 17.606553594220184 -1.7243999879402893e9 … 1.0728000033062695e9 -2.2738709974939893e11; … ; 5.594203390255793e6 5.361198824542111e9 … 1.4753509580217035e11 -3.65371618708731e10; 5.594621308291411e6 3.642082220947251e9 … 1.4734924678059198e11 -3.567623396743897e10])

In [12]:
# Do a simulation including the rocket
B = [Body(d[i]*p[i], s[i]*v[i], m[i], [0.0,0.0]) for i=1:length(m)]

theta = [10000.0, 1000.0]

# The rocket starts at earth's position and speed plus some offset
push!(B, Body(B[4].pos + ones(2) * 0.01*AU, B[4].speed + theta, 5.0 * 10^4, [0.0,0.0]))
simulate_n_bodies(B,Δt, tmax, "solar_system_with_rocket")

┌ Info: Saved animation to 
│   fn = C:\Users\ben97726\Git-Reps\Optimization\exercises\10_auto_diff\solar_system_with_rocket.gif
└ @ Plots C:\Users\ben97726\.julia\packages\Plots\PomtQ\src\animation.jl:114


([0.0 -5.6847190866e10 … 0.0 1.51093849407e11; 7.124682542265387 -5.682057150020203e10 … 8.676000018531387e8 1.5145004105539694e11; … ; 7.43411084987386e6 -5.6584061944423096e10 … 2.2344726803365997e11 -1.8634530455609122e11; 7.440292599542336e6 -5.672231995269087e10 … 2.2358177230452945e11 -1.864769286143521e11], [0.0 0.0 … -2.27388763464e11 1.495978707e9; 17.606553594220184 -1.7243999879402893e9 … -2.2738709974939893e11 2.6047006036586e9; … ; 5.594186066805642e6 5.358554718103285e9 … -3.650590945914834e10 1.0418948637870439e11; 5.594604033545949e6 3.6394414942198215e9 … -3.564495586790921e10 1.0336715421383789e11])

## Optimization Problem

The loss function is given by:

\begin{align*}
l(\theta) = \| \vec r_{\mathrm{Mars}}^{(k)} - \vec r_{\mathrm{Rocket}}^{(k)} \|^2 + \| \vec r_{\mathrm{Earth}}^{(l)} - \vec r_{\mathrm{Rocket}}^{(l)} \|^2,\qquad \mathrm{with}\; l>k
\end{align*}
with $\theta$ the initial speed vector of the rocket that is added to the speed vector of earth.

In [12]:
function trajectory_loss(theta::Vector{T}) where T
    B = [Body{T}(d[i]*p[i], s[i]*v[i], m[i], [0.0,0.0]) for i=1:length(m)]
    
    # The rocket starts at earth's position and speed plus some offset
    push!(B, Body{T}(B[4].pos + ones(2) * 0.01*AU, B[4].speed + theta, 5.0 * 10^4, [0.0,0.0]))
    
    dist_mars = zeros(T, tmax)
    dist_earth = zeros(T, tmax)
    for t=1:tmax
        simulate_step!(B, Δt,t)
        dist_mars[t] = norm2(B[5].pos - B[6].pos)
        dist_earth[t] = norm2(B[4].pos - B[6].pos)
    end
    min_mars, idx = findmin(dist_mars)
    min_earth = minimum(dist_earth[idx:end])
    return min_mars + min_earth
end

trajectory_loss (generic function with 1 method)

In [33]:
n_points = 200
θ1_plot = range(-500000,500000,length = n_points)
θ2_plot = range(-500000,500000,length = n_points)

-500000.0:5025.125628140703:500000.0

In [52]:
using DelimitedFiles



losses_plot = [trajectory_loss([θ1,θ2]) for θ1 = θ1_plot, θ2 = θ2_plot]


writedlm( "loss_plot.csv",  losses_plot, ',')

In [27]:
using DelimitedFiles

losses_plot = readdlm("loss_plot.csv",',')


200×200 Matrix{Float64}:
 1.19036e11  1.18877e11  1.1872e11   …  1.21528e11  1.21702e11  1.21877e11
 1.1873e11   1.18571e11  1.18413e11     1.21233e11  1.21408e11  1.21583e11
 1.18425e11  1.18265e11  1.18106e11     1.20939e11  1.21114e11  1.2129e11
 1.1812e11   1.1796e11   1.178e11       1.20646e11  1.20821e11  1.20998e11
 1.17817e11  1.17655e11  1.17495e11     1.20353e11  1.2053e11   1.20707e11
 1.17514e11  1.17352e11  1.17191e11  …  1.20061e11  1.20238e11  1.20417e11
 1.17212e11  1.17049e11  1.16887e11     1.1977e11   1.19948e11  1.20127e11
 1.1691e11   1.16747e11  1.16584e11     1.1948e11   1.19659e11  1.19838e11
 1.1661e11   1.16445e11  1.16282e11     1.1919e11   1.1937e11   1.1955e11
 1.1631e11   1.16145e11  1.15981e11     1.18902e11  1.19082e11  1.19263e11
 1.16011e11  1.15845e11  1.1568e11   …  1.18614e11  1.18795e11  1.18977e11
 1.15713e11  1.15546e11  1.15381e11     1.18327e11  1.18509e11  1.18691e11
 1.15416e11  1.15248e11  1.15082e11     1.18041e11  1.18224e11  1.18407e11
 ⋮

In [25]:
# plot the loss function

# create grid points
xgrid = repeat(θ1_plot',n_points,1)
ygrid = repeat(θ2_plot,1,n_points);

fontsize = 13

using3D()
PyPlot.pygui(true);
fig = figure("pyplot_surfaceplot",figsize=(15,10))
ax = fig.add_subplot(1,1,1,projection="3d")
plot_surface(xgrid', ygrid', losses_plot, rstride=2,
             #edgecolors="k",
            cstride=2, cmap=ColorMap("jet"), alpha=0.3, linewidth=0.25)
PyPlot.zscale("log")
xlabel(L"\theta_1", fontsize=fontsize)
ylabel(L"\theta_2", fontsize=fontsize)
zlabel(L"loss", fontsize=fontsize)

show()

Traceback (most recent call last):
  File "C:\Users\ben97726\.julia\conda\3\lib\site-packages\matplotlib\cbook\__init__.py", line 270, in process
    func(*args, **kwargs)
  File "C:\Users\ben97726\.julia\conda\3\lib\site-packages\mpl_toolkits\mplot3d\axes3d.py", line 1223, in _button_release
    toolbar = getattr(self.figure.canvas, "toolbar")
AttributeError: 'NoneType' object has no attribute 'canvas'
Traceback (most recent call last):
  File "C:\Users\ben97726\.julia\conda\3\lib\site-packages\matplotlib\cbook\__init__.py", line 270, in process
    func(*args, **kwargs)
  File "C:\Users\ben97726\.julia\conda\3\lib\site-packages\mpl_toolkits\mplot3d\axes3d.py", line 1223, in _button_release
    toolbar = getattr(self.figure.canvas, "toolbar")
AttributeError: 'NoneType' object has no attribute 'canvas'


In [13]:
# ========================================================================================
# Define the gradient function of trajectory_loss() using the provided methods in ad.jl
∇trajectory_loss(θ) = gradient(trajectory_loss, θ)

# Forward Mode AD with Julia Package -> faster than our implementation but give the same result
#import ForwardDiff
#∇trajectory_loss = θ -> ForwardDiff.gradient(trajectory_loss, θ)
#H_trajectory_loss = θ -> ForwardDiff.hessian(trajectory_loss, θ)
# ========================================================================================

∇trajectory_loss (generic function with 1 method)

In [14]:
n_tests = 10

10

In [56]:
# ===========================================================================
# Minimize the loss function using the gradient_descent function from optimization_library.jl
# Hint: Since the loss function is not convex, there is no guarantee that you found the global minimum. However, 
# sometimes a "more optimal" solution than the starting point is already sufficient.

i_sorted = sortperm(reshape(losses_plot,n_points*n_points,1)[:,1]);
x_shaped = reshape(xgrid',n_points*n_points,1)
y_shaped = reshape(ygrid',n_points*n_points,1);

# Since loss function is not convex there a multiple minima
# solve optimization problem for different starting points and take the minimum of all solutions

n_tests = 10
results = Matrix(undef,n_tests,2)
costs = zeros(n_tests)
traces = Vector(undef,n_tests)

for i in 1:n_tests
    print(".")
    # initialize starting point
    θ1 = x_shaped[i_sorted[i],1]
    θ2 = y_shaped[i_sorted[i],1]
    x0 = [θ1,θ2]
    
    if LA.norm(∇trajectory_loss(x0)) < 0.01
        println("negligible gradient at ",x0)
        continue
    end

    # Compute the solution
    x_best,trace = gradient_descent(trajectory_loss, ∇trajectory_loss, x0; ϵ=0.01, maxiters = 20, p = 0.2);
    println(i, " Number of iterations: ", size(trace,2))
    results[i,:] = x_best
    costs[i] = trajectory_loss(x_best)
    traces[i] = trace
end

# ===========================================================================

.1 Number of iterations: 21
.2 Number of iterations: 21
.3 Number of iterations: 21
.4 Number of iterations: 21
.5 Number of iterations: 21
.6 Number of iterations: 21
.7 Number of iterations: 21
.8 Number of iterations: 21
.9 Number of iterations: 21
.10 Number of iterations: 21


In [57]:
# save results
time_now = Dates.format(Dates.now(), "yy-mm-dd-HH-MM")
open("Results_$time_now.csv", "w") do io
           DF.writedlm(io, hcat(costs, results))
end;

# save results
time_now = Dates.format(Dates.now(), "yy-mm-dd-HH-MM")
open("Traces_$time_now.csv", "w") do io
    for i = 1:n_tests
           DF.writedlm(io, traces[i])
    end
end;

In [15]:
# Code to read in the files that you stored in the previous code cell.
# Comment the following lines if you want to use the results from your optimization that are still in memory.
costs_results = DF.readdlm("Results.csv")
traces_read = DF.readdlm("Traces.csv")

traces = Vector(undef,n_tests)
costs = zeros(n_tests)
results = zeros(n_tests,2)

for i in 1:n_tests
    traces[i] = traces_read[(i*3)-2:(i*3)]
    costs[i] = costs_results[i,1]
    results[i,:] = costs_results[i,2:3]
end

In [16]:
# compare solutions and find the best one
i_minimizer = argmin(costs)
minimizer = results[i_minimizer,:]
println("Minimizer: ", minimizer)

Minimizer: [-15073.550552647714, -55121.971458839835]


In [17]:
tmax = 2000
Δt = 3600.0 * 10

B = [Body(d[i]*p[i], s[i]*v[i], m[i], [0.0,0.0]) for i=1:length(m)]

# The rocket starts at earth's position and speed plus some offset
push!(B, Body(B[4].pos + ones(2) * 0.01*AU, B[4].speed + minimizer, 5.0 * 10^4, [0.0,0.0]))
simulate_n_bodies(B,Δt, tmax, "optimized_rocket")

┌ Info: Saved animation to 
│   fn = C:\Users\ben97726\Git-Reps\Optimization\exercises\10_auto_diff\optimized_rocket.gif
└ @ Plots C:\Users\ben97726\.julia\packages\Plots\PomtQ\src\animation.jl:114


([0.0 -5.6847190866e10 … 0.0 1.51093849407e11; 7.124682542265387 -5.682057150020203e10 … 8.676000018531387e8 1.5054739323550165e11; … ; -5.407429326116788e6 -5.6596328776543434e10 … 2.2343281215432764e11 -8.554936778481238e10; -5.406752671863626e6 -5.6734675105805046e10 … 2.235674399405609e11 -8.519765943096837e10], [0.0 0.0 … -2.27388763464e11 1.495978707e9; 17.606553594220184 -1.7243999879402893e9 … -2.2738709974939893e11 5.843096311403657e8; … ; 5.594203390255793e6 5.361198824542111e9 … -3.65371618708731e10 8.856536688362143e10; 5.594621308291411e6 3.642082220947251e9 … -3.567623396743897e10 8.979400893461845e10])

In [34]:
# Most optimal point I found
p_opt = [-15073.550552647714, -55121.971458839835]

using3D()
PyPlot.pygui(true);
fig = figure("pyplot_surfaceplot",figsize=(15,10))
ax = fig.add_subplot(1,1,1,projection="3d")
plot_surface(xgrid', ygrid', losses_plot, rstride=2,
             edgecolors="k", cstride=2, cmap=ColorMap("jet"), alpha=0.3, linewidth=0.25)

colors= ["green","orange","blue","purple","magenta","yellow","brown","cyan","black"]
for k in 1:n_tests-1
    ax[:plot](traces[k][1,:], traces[k][2,:], traces[k][3,:], color = colors[k], zorder = 3)
    ax[:scatter](traces[k][1,:], traces[k][2,:], traces[k][3,:], color = colors[k], zorder = 3)
end
ax[:scatter](p_opt[1], p_opt[2], trajectory_loss(convert(Array{Float64,1}, p_opt)), color="red", zorder = 4)
PyPlot.zscale("log")
xlabel(L"\theta_1", fontsize=fontsize)
ylabel(L"\theta_2", fontsize=fontsize)
zlabel(L"loss", fontsize=fontsize)

PyPlot.show()

LoadError: PyError ($(Expr(:escape, :(ccall(#= C:\Users\ben97726\.julia\packages\PyCall\3fwVL\src\pyfncall.jl:43 =# @pysym(:PyObject_Call), PyPtr, (PyPtr, PyPtr, PyPtr), o, pyargsptr, kw))))) <class 'ValueError'>
ValueError('shape mismatch: objects cannot be broadcast to a single shape')
  File "C:\Users\ben97726\.julia\conda\3\lib\site-packages\matplotlib\_api\deprecation.py", line 431, in wrapper
    return func(*inner_args, **inner_kwargs)
  File "C:\Users\ben97726\.julia\conda\3\lib\site-packages\mpl_toolkits\mplot3d\axes3d.py", line 1665, in plot_surface
    X, Y, Z = np.broadcast_arrays(X, Y, Z)
  File "<__array_function__ internals>", line 5, in broadcast_arrays
  File "C:\Users\ben97726\.julia\conda\3\lib\site-packages\numpy\lib\stride_tricks.py", line 538, in broadcast_arrays
    shape = _broadcast_shape(*args)
  File "C:\Users\ben97726\.julia\conda\3\lib\site-packages\numpy\lib\stride_tricks.py", line 420, in _broadcast_shape
    b = np.broadcast(*args[:32])


# Neural Network

In [None]:
using MLDatasets
using Images
using ReverseDiff

In [None]:
train_x, train_y = MNIST.traindata()
test_x,  test_y  = MNIST.testdata();

In [None]:
# Plot the k-th image
k = 23
println(train_y[k])
Plots.plot(Gray.(train_x[:,:,k]'))

In [None]:
# define the sigmoid activation function
sigmoid(z) = 1.0 / (1.0 + exp(-z))

# define the softmax function
function softmax(z)
    res = exp.(z .- maximum(z))
    return res / sum(res)
end

In [None]:
# The input is a 28x28 matrix
# The output is a 10-vector
function classify_mnist(theta, im)    
    W1 = reshape(theta[1:100352], (128,28*28))
    b1 = theta[100353:100480]
    layer1(x) = sigmoid.(W1 * x + b1)
    
    W3 = reshape(theta[100481:101760], (10,128))
    b3 = theta[101761:101770]
    layer2(x) = softmax(W3 * x + b3)
    
    return layer2(layer1(vec(im)))
end

In [None]:
training_cycles = 10^3; 
learning_rate = 0.1;
batchsize = 128

function loss_minibatch(theta)
    loss = 0.0
    for i=1:batchsize
        n = rand(1:length(train_y))
        res = classify_mnist(theta, train_x[:,:,n])
        loss += -log(res[train_y[n]+1])
    end
    return loss
end

In [None]:
# Define the gradient function of loss_minibatch by using Reverse Mode AD
grad_loss_minibatch(theta) = ReverseDiff.gradient(loss_minibatch, theta)

In [None]:
# ADAM is an improved Gradient Descent for Neural Networks
# Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization.
function adam!(df, θ; iters=1000, α=0.001, β1 = 0.9, β2=0.999, ϵ=10.0^(-8))
    m = zeros(size(θ))
    v = zeros(size(θ))
    mhat = zeros(size(θ))
    vhat = zeros(size(θ))
    for t=1:iters
        print(".")
        g = df(θ)
        m[:] = β1 * m[:] + (1-β1) * g[:]
        v[:] = β2 * v[:] + (1-β2) * g[:].^2
        mhat[:] = m / (1-β1^t)
        vhat[:] = v / (1-β2^t)
        θ[:] = θ[:] .- (α * mhat ./ (sqrt.(vhat) .+ ϵ)) 
    end
end;

In [None]:
# start with zero for the model parameters
# otherwise the first sigmoid layer will be "all zeros"
theta = rand(101770);

#train
adam!(grad_loss_minibatch, theta, iters=500, α=0.05)

In [None]:
# save results
open("theta.csv", "w") do io
           DF.writedlm(io, theta)
end;

In [None]:
# read from file as a shortcut
theta2 = DF.readdlm("theta.csv");

In [None]:
# Plot the k-th image
k = rand(1:length(test_y))
res = classify_mnist(theta2, test_x[:,:,k])
println("Selected Sample: ", k)
println("Prediction Distribution: ", res)
println("Predictd Category: ", argmax(res)-1)
Plots.plot(Gray.(test_x[:,:,k]'))