In [1]:
using Random

module O

export TaskLocalXorshift64, XOS

using Random: Random, AbstractRNG, RandomDevice, SamplerType, SamplerTrivial, CloseOpen01_64

struct TaskLocalXorshift64 <: AbstractRNG end

const XOS = TaskLocalXorshift64()

@inline getstate(::TaskLocalXorshift64) = current_task().rngState0

@inline function setstate!(xos::TaskLocalXorshift64, seed::Integer)
    current_task().rngState0 = mod(seed, UInt64)
    xos
end

@inline function Random.rand(xos::TaskLocalXorshift64, ::SamplerType{UInt64})
    x = res = getstate(xos)
    x ⊻= x << 13
    x ⊻= x >> 7
    x ⊻= x << 17
    setstate!(xos, x)
    res
end

@inline function Random.rand(xos::TaskLocalXorshift64, ::SamplerTrivial{CloseOpen01_64})
    (rand(xos, UInt64) & UInt64(2^52 - 1)) / 2^52
end

Random.seed!(xos::TaskLocalXorshift64) =
    setstate!(xos, rand(RandomDevice(), UInt64))

Random.seed!(xos::TaskLocalXorshift64, seed::Integer) =
    setstate!(xos, seed)

end

using .O

@show O.getstate(XOS)
A = [rand(XOS) for _ in 1:20]

O.getstate(XOS) = 0x252bea83cf16e57e


20-element Vector{Float64}:
 0.7447546090537283
 0.29870176301251394
 0.21951420347560324
 0.8435278214827784
 0.9387687468516317
 0.653040706810184
 0.5066733675803736
 0.490702272775569
 0.9483757694995381
 0.871607106785971
 0.375199756883112
 0.1749593172938071
 0.9091683072740089
 0.6868737297510432
 0.5763157299913575
 0.909301177668359
 0.6578039607802599
 0.8598749543892088
 0.6738401218057901
 0.9892859199254946

In [2]:
Random.seed!(XOS)
@show O.getstate(XOS)
Random.seed!(XOS, 0x1234567)
@show O.getstate(XOS);

O.getstate(XOS) = 0x1b69f11c53471434
O.getstate(XOS) = 0x0000000001234567


In [3]:
function mcpi(L=10^9, rng=XOS)
    c = 0
    for i in 1:L
        c += rand(rng)^2 + rand(rng)^2 ≤ 1
    end
    4c/L
end

@time mcpi()
@time mcpi()
@time mcpi()
@time mcpi()
@time mcpi()
@time mcpi()

  3.499087 seconds
  3.524789 seconds
  3.511741 seconds
  3.506279 seconds
  3.469305 seconds
  3.486716 seconds


3.141649596

In [4]:
using LoopVectorization

@inline isinside(i, rng=XOS) =
    rand(rng)^2 + rand(rng)^2 ≤ 1

function mcpi_turbo(L=10^9, isinsiderng=isinside)
    c = 0
    @turbo for i in 1:L
        c += isinsiderng(i)
    end
    4c/L
end

@time mcpi_turbo()
@time mcpi_turbo()
@time mcpi_turbo()
@time mcpi_turbo()
@time mcpi_turbo()
@time mcpi_turbo()

  0.441369 seconds
  0.441495 seconds
  0.447223 seconds
  0.445339 seconds
  0.446744 seconds
  0.441775 seconds


3.141373088

In [5]:
function mcpi_tturbo(L=10^9, isinsiderng=isinside)
    c = 0
    @tturbo for i in 1:L
        c += isinsiderng(i)
    end
    4c/L
end

mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()

  0.089596 seconds (2 allocations: 32 bytes)
  0.107096 seconds (4 allocations: 192 bytes)
  0.108801 seconds (4 allocations: 192 bytes)
  0.089094 seconds
  0.113913 seconds (4 allocations: 152 bytes)
  0.106947 seconds (4 allocations: 192 bytes)


3.141572512