In [1]:
include("../src/JuTrack.jl")



Main.JuTrack

In [2]:
using .JuTrack
using Enzyme
using Test
using BenchmarkTools
# Enzyme.API.runtimeActivity!(true)

In [3]:
Threads.nthreads()

64

In [8]:
function create_sbend(BendingAngle)
        SBD = SBEND(name="BD", len=0.72, angle=BendingAngle/2, e1=BendingAngle/2, e2=0.0 , rad=1)
        return [SBD]
end        

function sbend_track_mthread(BendingAngle)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001
        beam = Beam(particles)
        line = create_sbend(BendingAngle)
        plinepass!(line, beam)
        return beam.r
end

function sbend_track(BendingAngle)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001
        beam = Beam(particles)
        line = create_sbend(BendingAngle)
        linepass!(line, beam)
        return beam.r
end

function create_rbend(BendingAngle)
        RBD = RBEND(name="BD", len=0.72, angle=BendingAngle/2, rad=1)
        return [RBD]
end        

function rbend_track_mthread(BendingAngle)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = create_rbend(BendingAngle)
        plinepass!(line, beam)
        return beam.r
end

function rbend_track(BendingAngle)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = create_rbend(BendingAngle)
        linepass!(line, beam)
        return beam.r
end

function hcorrector_track(hkick)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [HKICKER(name="HKICK", len=1.5, xkick=hkick)]
        linepass!(line, beam)
        return beam.r
end

function hcorrector_track_mthread(hkick)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [HKICKER(name="HKICK", len=1.5, xkick=hkick)]
        plinepass!(line, beam)
        return beam.r
end

function vcorrector_track(vkick)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [VKICKER(name="VKICK", len=1.5, ykick=vkick)]
        linepass!(line, beam)
        return beam.r
end

function vcorrector_track_mthread(vkick)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [VKICKER(name="VKICK", len=1.5, ykick=vkick)]
        plinepass!(line, beam)
        return beam.r
end

function create_drift(l)
        dr = DRIFT(len=l)
        return dr
end

function drift_track(l)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_drift(l)]
        linepass!(line, beam)
        return beam.r
end

function drift_track_mthread(l)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_drift(l)]
        plinepass!(line, beam)
        return beam.r
end

function create_quad(k)
        return KQUAD(len=0.5, k1=k)
end

function quad_track(k)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_quad(k)]
        linepass!(line, beam)
        return beam.r
end

function quad_track_mthread(k)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_quad(k)]
        plinepass!(line, beam)
        return beam.r
end

function create_sext(k)
        return KSEXT(len=0.5, k2=k)
end

function sext_track(k)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_sext(k)]
        linepass!(line, beam)
        return beam.r
end

function sext_track_mthread(k)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_sext(k)]
        plinepass!(line, beam)
        return beam.r
end

function create_oct(k)
        return KOCT(len=0.5, k3=k)
end

function oct_track(k)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_oct(k)]
        linepass!(line, beam)
        return beam.r
end

function oct_track_mthread(k)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_oct(k)]
        plinepass!(line, beam)
        return beam.r
end

function create_RFCA(f)
        return RFCA(len=1.034, volt=2.2, freq=f, energy = 30e4)
end

function RFCA_track(f)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_RFCA(f)]
        linepass!(line, beam)
        return beam.r
end

function RFCA_track_mthread(f)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_RFCA(f)]
        plinepass!(line, beam)
        return beam.r
end


function create_sol(k)
        return SOLENOID(len=0.5, ks=k)
end

function sol_track(k)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_sol(k)]
        linepass!(line, beam)
        return beam.r
end

function sol_track_mthread(k)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_sol(k)]
        plinepass!(line, beam)
        return beam.r
end

function create_thinMulti(k)
        return thinMULTIPOLE(len=0.5, PolynomB = [0.0, k, 1.43, -1.32])
end

function thinMulti_track(k)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_thinMulti(k)]
        linepass!(line, beam)
        return beam.r
end

function thinMulti_track_mthread(k)
        particles = zeros(Float64, 10000, 6)
        particles[:,1] .= .001
        particles[:,2] .= .0001 
        beam = Beam(particles)
        line = [create_thinMulti(k)]
        plinepass!(line, beam)
        return beam.r
end

thinMulti_track_mthread (generic function with 1 method)

In [20]:
particles = zeros(Float64, 10000, 6)
particles[:,1] .= .001
particles[:,2] .= .0001
beam = Beam(particles)
BendingAngle=pi/3
line = create_sbend(BendingAngle)
@btime plinepass!(line, beam)

  367.770 μs (367 allocations: 986.97 KiB)


In [4]:
bend_angle = pi/2
@btime sbend_track(bend_angle)
@btime sbend_track_mthread(bend_angle)
grad1 = autodiff(Forward, sbend_track, DuplicatedNoNeed, Duplicated(bend_angle, 1.0))
grad2 = autodiff(Forward, sbend_track_mthread, DuplicatedNoNeed, Duplicated(bend_angle, 1.0))
@btime autodiff(Forward, sbend_track, DuplicatedNoNeed, Duplicated(bend_angle, 1.0))
@btime autodiff(Forward, sbend_track_mthread, DuplicatedNoNeed, Duplicated(bend_angle, 1.0))
println("Single Thread AutoDiff = Multithread Autodiff?  ", grad1 == grad2)


#After modifying the function to pass particles into it, these are the Results
# 56.041 ms (3397483 allocations: 61.76 MiB)
# 4.534 ms (3397857 allocations: 61.81 MiB)
# 921.773 ms (10557535 allocations: 211.38 MiB)
# 36.662 ms (10557925 allocations: 211.44 MiB)
# Single Thread AutoDiff = Multithread Autodiff?  true


#In order for AD to work, we need to have the Beam() unchanging, but passing the particles into it makes it change. By using the runtimeActivity(True) it can work (which is seen above), but it runs even slower, with more allocations
#If we did not have to do AD, we wouldn't have to make a function, and it would run much faster (see cell above) 

  51.749 ms (3397485 allocations: 62.22 MiB)
  5.940 ms (3397862 allocations: 62.27 MiB)
  666.793 ms (9017518 allocations: 188.79 MiB)
  28.964 ms (9017905 allocations: 188.86 MiB)
Single Thread AutoDiff = Multithread Autodiff?  true


In [21]:
bb = @benchmark autodiff(Forward, sbend_track_mthread, DuplicatedNoNeed, Duplicated(bend_angle, 1.0))
dump(bb)


BenchmarkTools.Trial
  params: BenchmarkTools.Parameters
    seconds: Float64 5.0
    samples: Int64 10000
    evals: Int64 1
    evals_set: Bool false
    overhead: Float64 0.0
    gctrial: Bool true
    gcsample: Bool false
    time_tolerance: Float64 0.05
    memory_tolerance: Float64 0.01
  times: Array{Float64}((78,)) [3.7546879e7, 3.4677947e7, 3.346356e7, 3.5149201e7, 3.6006279e7, 3.3498905e7, 2.50037838e8, 3.4701411e7, 3.2619145e7, 3.5001193e7  …  3.1514413e7, 2.9466802e7, 2.8726343e7, 2.9873566e7, 3.0434587e7, 2.71694732e8, 3.2233723e7, 3.035081e7, 3.0759868e7, 3.0779564e7]
  gctimes: Array{Float64}((78,)) [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.18237068e8, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 2.42532039e8, 0.0, 0.0, 0.0, 0.0]
  memory: Int64 198034416
  allocs: Int64 9017909


In [9]:
const bend_angle = pi/2
@btime rbend_track(bend_angle)
@btime rbend_track_mthread(bend_angle)
grad1 = autodiff(Forward, rbend_track, DuplicatedNoNeed, Duplicated(bend_angle, 1.0))
grad2 = autodiff(Forward, rbend_track_mthread, DuplicatedNoNeed, Duplicated(bend_angle, 1.0))
@btime autodiff(Forward, rbend_track, DuplicatedNoNeed, Duplicated(bend_angle, 1.0))
@btime autodiff(Forward, rbend_track_mthread, DuplicatedNoNeed, Duplicated(bend_angle, 1.0))
println("Single Thread AutoDiff = Multithread Autodiff?  ", grad1 == grad2)

# 52.541 ms (3397485 allocations: 62.22 MiB)
# 4.838 ms (3397862 allocations: 62.27 MiB)
# 646.843 ms (9017518 allocations: 188.79 MiB)
# 28.035 ms (9017908 allocations: 188.86 MiB)
# Single Thread AutoDiff = Multithread Autodiff?  true

  71.453 ms (3397485 allocations: 62.22 MiB)
  5.075 ms (3397808 allocations: 62.27 MiB)
  686.359 ms (9057518 allocations: 197.95 MiB)
  21.113 ms (9057843 allocations: 198.01 MiB)
Single Thread AutoDiff = Multithread Autodiff?  true


In [12]:
hkick = 0.02
@btime hcorrector_track(hkick)
@btime hcorrector_track_mthread(hkick)
grad1 = autodiff(Forward, hcorrector_track, DuplicatedNoNeed, Duplicated(hkick, 1.0))
grad2 = autodiff(Forward, hcorrector_track_mthread, DuplicatedNoNeed, Duplicated(hkick, 1.0))
@btime autodiff(Forward, hcorrector_track, DuplicatedNoNeed, Duplicated(hkick, 1.0))
@btime autodiff(Forward, hcorrector_track_mthread, DuplicatedNoNeed, Duplicated(hkick, 1.0))
println("Single Thread AutoDiff = Multithread Autodiff?  ", grad1 == grad2)

# 2.611 ms (100028 allocations: 11.90 MiB)
# 852.449 μs (100408 allocations: 11.94 MiB)
# 15.044 ms (220054 allocations: 16.41 MiB)
# 1.462 ms (220408 allocations: 16.45 MiB)
# Single Thread AutoDiff = Multithread Autodiff?  true

  2.611 ms (100028 allocations: 11.90 MiB)
  852.449 μs (100408 allocations: 11.94 MiB)
  15.044 ms (220054 allocations: 16.41 MiB)
  1.462 ms (220408 allocations: 16.45 MiB)
Single Thread AutoDiff = Multithread Autodiff?  true


In [13]:
vkick = 0.02
@btime vcorrector_track(vkick)
@btime vcorrector_track_mthread(vkick)
grad1 = autodiff(Forward, vcorrector_track, DuplicatedNoNeed, Duplicated(vkick, 1.0))
grad2 = autodiff(Forward, vcorrector_track_mthread, DuplicatedNoNeed, Duplicated(vkick, 1.0))
@btime autodiff(Forward, vcorrector_track, DuplicatedNoNeed, Duplicated(vkick, 1.0))
@btime autodiff(Forward, vcorrector_track_mthread, DuplicatedNoNeed, Duplicated(vkick, 1.0))
println("Single Thread AutoDiff = Multithread Autodiff?  ", grad1 == grad2)


  2.614 ms (100028 allocations: 11.90 MiB)
  816.672 μs (100381 allocations: 11.94 MiB)
  14.945 ms (220054 allocations: 16.41 MiB)
  1.502 ms (220398 allocations: 16.45 MiB)
Single Thread AutoDiff = Multithread Autodiff?  true


In [14]:
l = 1.23
@btime drift_track(l)
@btime drift_track_mthread(l)
grad1 = autodiff(Forward, drift_track, DuplicatedNoNeed, Duplicated(l, 1.0))
grad2 = autodiff(Forward, drift_track_mthread, DuplicatedNoNeed, Duplicated(l, 1.0))
@btime autodiff(Forward, drift_track, DuplicatedNoNeed, Duplicated(l, 1.0))
@btime autodiff(Forward, drift_track_mthread, DuplicatedNoNeed, Duplicated(l, 1.0))
println("Single Thread AutoDiff = Multithread Autodiff?  ", grad1 == grad2)

  2.734 ms (100030 allocations: 11.90 MiB)
  943.861 μs (100413 allocations: 11.94 MiB)
  17.868 ms (230058 allocations: 16.71 MiB)
  1.636 ms (230399 allocations: 16.76 MiB)
Single Thread AutoDiff = Multithread Autodiff?  true


In [15]:
k1 = 1.0627727
@btime quad_track(k1)
@btime quad_track_mthread(k1)
grad1 = autodiff(Forward, quad_track, DuplicatedNoNeed, Duplicated(k1, 1.0))
grad2 = autodiff(Forward, quad_track_mthread, DuplicatedNoNeed, Duplicated(k1, 1.0))
@btime autodiff(Forward, quad_track, DuplicatedNoNeed, Duplicated(k1, 1.0))
@btime autodiff(Forward, quad_track_mthread, DuplicatedNoNeed, Duplicated(k1, 1.0))
println("Single Thread AutoDiff = Multithread Autodiff?  ", grad1 == grad2)

  12.122 ms (100036 allocations: 11.90 MiB)
  1.435 ms (100389 allocations: 11.95 MiB)
  105.090 ms (650070 allocations: 35.94 MiB)
  5.594 ms (630457 allocations: 28.98 MiB)
Single Thread AutoDiff = Multithread Autodiff?  true


In [16]:
k2 = 1.0627727
@btime sext_track(k2)
@btime sext_track_mthread(k2)
grad1 = autodiff(Forward, sext_track, DuplicatedNoNeed, Duplicated(k2, 1.0))
grad2 = autodiff(Forward, sext_track_mthread, DuplicatedNoNeed, Duplicated(k2, 1.0))
@btime autodiff(Forward, sext_track, DuplicatedNoNeed, Duplicated(k2, 1.0))
@btime autodiff(Forward, sext_track_mthread, DuplicatedNoNeed, Duplicated(k2, 1.0))
println("Single Thread AutoDiff = Multithread Autodiff?  ", grad1 == grad2)

  12.365 ms (100036 allocations: 11.90 MiB)
  1.717 ms (100419 allocations: 11.95 MiB)
  105.338 ms (650070 allocations: 35.94 MiB)
  5.765 ms (630430 allocations: 28.98 MiB)
Single Thread AutoDiff = Multithread Autodiff?  true


In [17]:
k3 = 1.0627727
@btime oct_track(k3)
@btime oct_track_mthread(k3)
grad1 = autodiff(Forward, oct_track, DuplicatedNoNeed, Duplicated(k3, 1.0))
grad2 = autodiff(Forward, oct_track_mthread, DuplicatedNoNeed, Duplicated(k3, 1.0))
@btime autodiff(Forward, oct_track, DuplicatedNoNeed, Duplicated(k3, 1.0))
@btime autodiff(Forward, oct_track_mthread, DuplicatedNoNeed, Duplicated(k3, 1.0))
println("Single Thread AutoDiff = Multithread Autodiff?  ", grad1 == grad2)

  12.421 ms (100036 allocations: 11.90 MiB)
  1.603 ms (100416 allocations: 11.95 MiB)
  105.099 ms (650070 allocations: 35.94 MiB)
  5.857 ms (630430 allocations: 28.98 MiB)
Single Thread AutoDiff = Multithread Autodiff?  true


In [5]:
f = 60.
@btime RFCA_track(f)
@btime RFCA_track_mthread(f)
grad1 = autodiff(Forward, RFCA_track, DuplicatedNoNeed, Duplicated(f, 1.0))
grad2 = autodiff(Forward, RFCA_track_mthread, DuplicatedNoNeed, Duplicated(f, 1.0))
@btime autodiff(Forward, RFCA_track, DuplicatedNoNeed, Duplicated(f, 1.0))
@btime autodiff(Forward, RFCA_track_mthread, DuplicatedNoNeed, Duplicated(f, 1.0))
println("Single Thread AutoDiff = Multithread Autodiff?  ", grad1 == grad2)



  1.475 ms (60024 allocations: 2.75 MiB)
  626.595 μs (60397 allocations: 2.79 MiB)
  17.447 ms (200046 allocations: 7.86 MiB)
  1.386 ms (200395 allocations: 7.91 MiB)
Single Thread AutoDiff = Multithread Autodiff?  true


In [None]:
ks = 1.0627727
@btime sol_track(ks)
@btime sol_track_mthread(ks)
grad1 = autodiff(Forward, sol_track, DuplicatedNoNeed, Duplicated(ks, 1.0))
grad2 = autodiff(Forward, sol_track_mthread, DuplicatedNoNeed, Duplicated(ks, 1.0))
@btime autodiff(Forward, sol_track, DuplicatedNoNeed, Duplicated(ks, 1.0))
@btime autodiff(Forward, sol_track_mthread, DuplicatedNoNeed, Duplicated(ks, 1.0))
println("Single Thread AutoDiff = Multithread Autodiff?  ", grad1 == grad2)

#Something in this cell consistently causes the cell to run forever

In [18]:
k1 = 1.0627727
@btime thinMulti_track(k1)
@btime thinMulti_track_mthread(k1)
grad1 = autodiff(Forward, thinMulti_track, DuplicatedNoNeed, Duplicated(k1, 1.0))
grad2 = autodiff(Forward, thinMulti_track_mthread, DuplicatedNoNeed, Duplicated(k1, 1.0))
@btime autodiff(Forward, thinMulti_track, DuplicatedNoNeed, Duplicated(k1, 1.0))
@btime autodiff(Forward, thinMulti_track_mthread, DuplicatedNoNeed, Duplicated(k1, 1.0))
println("Single Thread AutoDiff = Multithread Autodiff?  ", grad1 == grad2)

  2.823 ms (100035 allocations: 11.90 MiB)
  723.938 μs (100389 allocations: 11.94 MiB)
  15.170 ms (220068 allocations: 16.41 MiB)
  1.521 ms (220431 allocations: 16.46 MiB)
Single Thread AutoDiff = Multithread Autodiff?  true
