In [1]:
@noinline @fastmath function f_fastmath(x_prev, v, dt)
    x_prev + v*dt
end

"""
Solve x(0) = 0, x'(t) = x(t) + 1, 0 ≤ t ≤ 1.

Exact solution: x(t) = exp(t) - 1, x(1) = exp(1) - 1 = 1.718281828⋯.
"""
function solveode(f, n=10^6, x0=0.0, dt=1/n)
    x = x0
    for _ in 1:n
        x = f(x, x+1, dt)
    end
    x
end

function solveode_inline(f, n=10^6, x0=0.0, dt=1/n)
    x = x0
    for _ in 1:n
        x = @inline f(x, x+1, dt)
    end
    x
end

@show solveode(f_fastmath)
@show solveode_inline(f_fastmath)
@show exp(1) - 1;

solveode(f_fastmath) = 1.7182804693193663
solveode_inline(f_fastmath) = 1.7182804693193663
exp(1) - 1 = 1.718281828459045


In [2]:
using BenchmarkTools

@btime solveode(f_fastmath)
@btime solveode_inline(f_fastmath);

  1.750 ms (0 allocations: 0 bytes)
  54.400 μs (0 allocations: 0 bytes)


In [3]:
@code_native debuginfo=:none dump_module=false solveode(f_fastmath)

	[0m.text
	[96m[1mpush[22m[39m	[0mrbp
	[96m[1mmov[22m[39m	[0mrbp[0m, [0mrsp
	[96m[1mpush[22m[39m	[0mrsi
	[96m[1mpush[22m[39m	[0mrdi
	[96m[1msub[22m[39m	[0mrsp[0m, [33m64[39m
	[96m[1mvmovaps[22m[39m	[95mxmmword[39m [95mptr[39m [33m[[39m[0mrbp [0m- [33m32[39m[33m][39m[0m, [0mxmm7
	[96m[1mvmovaps[22m[39m	[95mxmmword[39m [95mptr[39m [33m[[39m[0mrbp [0m- [33m48[39m[33m][39m[0m, [0mxmm6
	[96m[1mmov[22m[39m	[0mrax[0m, [95mqword[39m [95mptr[39m [33m[[39m[0mr13 [0m+ [33m16[39m[33m][39m
	[96m[1mmov[22m[39m	[0mrax[0m, [95mqword[39m [95mptr[39m [33m[[39m[0mrax [0m+ [33m16[39m[33m][39m
	[96m[1mmov[22m[39m	[0mrax[0m, [95mqword[39m [95mptr[39m [33m[[39m[0mrax[33m][39m
	[96m[1mvxorpd[22m[39m	[0mxmm0[0m, [0mxmm0[0m, [0mxmm0
	[96m[1mmov[22m[39m	[0mesi[0m, [33m1000000[39m
	[96m[1mmovabs[22m[39m	[0mrax[0m, [95moffset[39m [0m.rodata.cst8
	[96m[1mvmovsd[22m[39m	

In [4]:
@code_native debuginfo=:none dump_module=false solveode_inline(f_fastmath)

	[0m.text
	[96m[1mpush[22m[39m	[0mrbp
	[96m[1mmov[22m[39m	[0mrbp[0m, [0mrsp
	[96m[1mmov[22m[39m	[0mrax[0m, [95mqword[39m [95mptr[39m [33m[[39m[0mr13 [0m+ [33m16[39m[33m][39m
	[96m[1mmov[22m[39m	[0mrax[0m, [95mqword[39m [95mptr[39m [33m[[39m[0mrax [0m+ [33m16[39m[33m][39m
	[96m[1mmov[22m[39m	[0mrax[0m, [95mqword[39m [95mptr[39m [33m[[39m[0mrax[33m][39m
	[96m[1mvxorpd[22m[39m	[0mxmm0[0m, [0mxmm0[0m, [0mxmm0
	[96m[1mmov[22m[39m	[0meax[0m, [33m1000000[39m
	[96m[1mmovabs[22m[39m	[0mrcx[0m, [95moffset[39m [0m.rodata.cst8
	[96m[1mvmovsd[22m[39m	[0mxmm1[0m, [95mqword[39m [95mptr[39m [33m[[39m[0mrcx[33m][39m           [90m# xmm1 = mem[0],zero[39m
	[96m[1mmovabs[22m[39m	[0mrcx[0m, [33m2272748022136[39m
	[96m[1mvmovsd[22m[39m	[0mxmm2[0m, [95mqword[39m [95mptr[39m [33m[[39m[0mrcx[33m][39m           [90m# xmm2 = mem[0],zero[39m
	[96m[1mnop[22m[39m	[95mword[39m [