In [2]:
using LinearAlgebra
using LoopVectorization

In [3]:
function calc_pi(n::Int)
    s = 0.0
    for i in 1:n
        s += (rand()^2 + rand()^2 < 1)
    end
    return 4s/n
end

calc_pi (generic function with 1 method)

In [4]:
@noinline function calc_pi_fast(n::Int)
    s = 0.0
    @inbounds @fastmath @simd for i in 1:n
         s += (rand()^2 + rand()^2 < 1)
    end
    return 4s/n
end

calc_pi_fast (generic function with 1 method)

In [18]:
rand_pi(i) = Float64(rand()^2 + rand()^2 < 1)

rand_pi (generic function with 1 method)

In [20]:
function calc_pi_turbo(n::Int)
    s = 0.0
    @turbo for i in 1:n
        s += rand_pi(i)
    end
    return 4s/n
end

calc_pi_turbo (generic function with 1 method)

In [1]:
using BenchmarkTools

In [23]:
@time println(calc_pi(100000))

3.13176
  0.000500 seconds (23 allocations: 848 bytes)


In [31]:
@benchmark calc_pi_fast(100000)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m386.875 μs[22m[39m … [35m544.625 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m390.584 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m392.786 μs[22m[39m ± [32m  7.092 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m [39m▁[39m▄[39m▇[39m█[39m█[39m▇[34m▆[39m[39m▆[39m▆[32m▅[39m[39m▄[39m▃[39m▃[39m▂[39m▂[39m▁[39m [39m [39m [39m [39m [39m▁[39m▁[39m▁[39m [39m▁[39m▁[39m [39m [39m▁[39m▁[39m [39m▁[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂
  [39m▅[39m█[39m█[3

In [35]:
@benchmark calc_pi_turbo(100000)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m45.875 μs[22m[39m … [35m120.792 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m46.917 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m47.503 μs[22m[39m ± [32m  3.069 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m▂[39m [39m [39m█[34m▇[39m[39m [32m [39m[39m [39m [39m [39m [39m [39m [39m▂[39m▃[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁
  [39m█[39m▇[39m▇[39m█[34m█