In [1]:
using Pkg; Pkg.DEFAULT_IO[] = stdout; Pkg.activate("."); Pkg.instantiate();

[32m[1m  Activating[22m[39m environment at `~/Research/ToyModel.jl/Project.toml`


In [2]:
Pkg.status()

[32m[1m      Status[22m[39m `~/Research/ToyModel.jl/Project.toml`
 [90m [6e4b80f9] [39m[37mBenchmarkTools v1.1.0[39m
 [90m [91a5bcdd] [39m[37mPlots v1.16.7[39m


In [3]:
using BenchmarkTools

In [4]:
g = 9.81

Lx = 100
nx = 25
dx = Lx/nx
x = LinRange(0, Lx, nx+1)
x_trunc = x[1:nx];

In [5]:
function partial_x(f, dx)

    nx, = size(phi_true)
    dfdx = zeros(nx)
    dfdx[1] = 1/(dx) * (f[1] - f[nx])
    for i in 2:nx
        dfdx[i] = 1/(dx) * (f[i]-f[i-1])
    end
    return dfdx
end

function adv_x_Euler(f1, a, dx)
    nx, = size(phi_true)
    f2 = zeros(nx)
    f2 .= a .* partial_x(f1, dx)
    return f2
end;

In [6]:
#Initialising variables

phi_old = sin.(2*π*x/(Lx))
phi_old = phi_old[1:nx]
phi_new = zeros(nx)

a = 1 #Speed of advection

#Time variables
t_start = 0
t_end = 100
dt = 1

a = 1

#True solution
phi_true = sin.(2*π*x/(Lx))[1:nx]

#Needed for making animations
n_steps = Int(t_end/dt)
phi_data = zeros(nx, n_steps + 1)
phi_true_data = zeros(nx, n_steps + 1)

#Advecting the variable for fixed number of time steps


function stepforward_original(phi_old, t_start, t_end, dt)
    count = 1
    for t in t_start:dt:t_end
        phi_new = phi_old - dt*adv_x_Euler(phi_old, a, dx)
        @. phi_true = sin(2*π*(x_trunc - a*(t))/Lx)
        phi_old = phi_new
#         phi_data[:, count] = phi_old
#         phi_true_data[:, count] = phi_true
        count = count+1
    end
end

stepforward_original (generic function with 1 method)

In [7]:
@btime partial_x(phi_old, dx);

  3.831 μs (153 allocations: 3.06 KiB)


In [8]:
@btime adv_x_Euler(phi_old, a, dx);

  4.325 μs (158 allocations: 3.45 KiB)


In [9]:
@btime stepforward_original(phi_old, t_start, t_end, dt);

  689.288 μs (17170 allocations: 468.70 KiB)


Now let's try to optimize.

In [10]:
function partial_x!(dfdx, f, dx, nx)
    dfdx[1] = (f[1] - f[nx]) / dx
    for i in 2:nx
        dfdx[i] = (f[i] - f[i-1]) / dx
    end
end

function adv_x_Euler!(u∇φ, dfdx, f, a, dx, nx)
    partial_x!(dfdx, f, dx, nx)
    @. u∇φ = a * dfdx
end;

In [11]:
dφdx = similar(phi_old)
u∇φ = similar(phi_old)
φtrue = similar(phi_old);

In [12]:
@btime partial_x!(dφdx, phi_old, dx, nx);

  41.029 ns (0 allocations: 0 bytes)


In [13]:
@btime adv_x_Euler!(u∇φ, dφdx, phi_old, a, dx, nx);

  55.812 ns (0 allocations: 0 bytes)


In [14]:
function stepforward_optimized!(u∇φ, dφdx, phi_old, φtrue, t_start, t_end, dt, x_trunc, Lx, a)
    for t in t_start:dt:t_end
        adv_x_Euler!(u∇φ, dφdx, phi_old, a, dx, nx)
        @. phi_old -= dt * u∇φ
        @. φtrue = sin(2π * (x_trunc - a*t)/Lx)
#         phi_data[:, count] = phi_old
#         phi_true_data[:, count] = phi_true
    end
end

stepforward_optimized! (generic function with 1 method)

In [15]:
@btime stepforward_optimized!(u∇φ, dφdx, phi_old, φtrue, t_start, t_end, dt, x_trunc, Lx,  a)

  34.783 μs (0 allocations: 0 bytes)


Can we do even better?

In [26]:
function partial_x_noboundschecking!(dfdx, f, dx, nx)
    @inbounds dfdx[1] = (f[1] - f[nx]) / dx
    for i in 2:nx
        @inbounds dfdx[i] = (f[i] - f[i-1]) / dx
    end
end

function adv_x_Euler_noboundschecking!(u∇φ, dfdx, f, a, dx, nx)
    partial_x_noboundschecking!(dfdx, f, dx, nx)
    @. u∇φ = a * dfdx
end;

In [27]:
@btime partial_x_noboundschecking!(dφdx, phi_old, dx, nx);

  23.573 ns (0 allocations: 0 bytes)


In [28]:
@btime adv_x_Euler_noboundschecking!(u∇φ, dφdx, phi_old, a, dx, nx);

  34.835 ns (0 allocations: 0 bytes)


In [29]:
function stepforward_moreoptimized!(u∇φ, dφdx, phi_old, φtrue, t_start, t_end, dt, x_trunc, Lx, a)
    for t in t_start:dt:t_end
        adv_x_Euler_noboundschecking!(u∇φ, dφdx, phi_old, a, dx, nx)
        @. phi_old -= dt * u∇φ
        @. φtrue = sin(2π * (x_trunc - a*t)/Lx)
#         phi_data[:, count] = phi_old
#         phi_true_data[:, count] = phi_true
    end
end

stepforward_moreoptimized! (generic function with 1 method)

In [30]:
@btime stepforward_moreoptimized!(u∇φ, dφdx, phi_old, φtrue, t_start, t_end, dt, x_trunc, Lx,  a)

  32.924 μs (0 allocations: 0 bytes)
