In [1]:
using Random

module O

export TaskLocalXorshift64, XOS

using Random: Random, AbstractRNG, RandomDevice, SamplerType, SamplerTrivial, CloseOpen01_64

struct TaskLocalXorshift64 <: AbstractRNG end

const XOS = TaskLocalXorshift64()

@inline getstate(::TaskLocalXorshift64) = current_task().rngState0

@inline function setstate!(xos::TaskLocalXorshift64, seed::Integer)
    current_task().rngState0 = mod(seed, UInt64)
    xos
end

@inline function Random.rand(xos::TaskLocalXorshift64, ::SamplerType{UInt64})
    x = res = getstate(xos)
    x ⊻= x << 13
    x ⊻= x >> 7
    x ⊻= x << 17
    setstate!(xos, x)
    res
end

@inline function Random.rand(xos::TaskLocalXorshift64, ::SamplerTrivial{CloseOpen01_64})
    (rand(xos, UInt64) & UInt64(2^52 - 1)) / 2^52
end

Random.seed!(xos::TaskLocalXorshift64) =
    setstate!(xos, rand(RandomDevice(), UInt64))

Random.seed!(xos::TaskLocalXorshift64, seed::Integer) =
    setstate!(xos, seed)

end

using .O

@show O.getstate(XOS)
A = [rand(XOS) for _ in 1:20]

O.getstate(XOS) = 0xe895e3cf616ca5fd


20-element Vector{Float64}:
 0.36811769539269723
 0.9823116951831685
 0.7953780891725268
 0.9865700746125368
 0.6765482724562919
 0.8790339547655168
 0.23527892634400338
 0.16840915731282724
 0.3235462263230444
 0.3762622575525465
 0.7921930665187822
 0.09299340198602368
 0.531018974897693
 0.9067349128235234
 0.9163230551734789
 0.3910409652356919
 0.4764271465942118
 0.3749847575926668
 0.17898187010988353
 0.3039288828602087

In [2]:
Random.seed!(XOS)
@show O.getstate(XOS)
Random.seed!(XOS, 0x1234567)
@show O.getstate(XOS);

O.getstate(XOS) = 0xf2beb5b294d322c2
O.getstate(XOS) = 0x0000000001234567


In [3]:
function mcpi(L=10^9, rng=XOS)
    c = 0
    for i in 1:L
        c += rand(rng)^2 + rand(rng)^2 ≤ 1
    end
    4c/L
end

@time mcpi()
@time mcpi()
@time mcpi()

  3.305454 seconds
  3.308680 seconds
  3.293647 seconds


3.1416377

In [4]:
using LoopVectorization

@inline isinside(i, rng=XOS) =
    rand(rng)^2 + rand(rng)^2 ≤ 1

function mcpi_turbo(L=10^9, isinsiderng=isinside)
    c = 0
    @turbo for i in 1:L
        c += isinsiderng(i)
    end
    4c/L
end

@time mcpi_turbo()
@time mcpi_turbo()
@time mcpi_turbo()

  0.423676 seconds
  0.424649 seconds
  0.418601 seconds


3.14158016

In [5]:
function mcpi_tturbo(L=10^9, isinsiderng=isinside)
    c = 0
    @tturbo for i in 1:L
        c += isinsiderng(i)
    end
    4c/L
end

mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()

  0.088986 seconds
  0.097386 seconds
  0.087289 seconds


3.141736