In [1]:
using Random

module O

export TaskLocalXorshift64, XOS

using Random: Random, AbstractRNG, RandomDevice, SamplerType, SamplerTrivial, CloseOpen01_64

struct TaskLocalXorshift64 <: AbstractRNG end

const XOS = TaskLocalXorshift64()

@inline getstate(::TaskLocalXorshift64) = current_task().rngState0

@inline setstate!(xos::TaskLocalXorshift64, seed::Integer) =
    (current_task().rngState0 = mod(seed, UInt64); xos)

@inline function Random.rand(xos::TaskLocalXorshift64, ::SamplerType{UInt64})
    x = res = getstate(xos)
    x ⊻= x << 13
    x ⊻= x >> 7
    x ⊻= x << 17
    setstate!(xos, x)
    res
end

@inline Random.rand(xos::TaskLocalXorshift64, ::SamplerTrivial{CloseOpen01_64}) =
    Float64(rand(xos, UInt64) >>> 11) * 0x1.0p-53

Random.seed!(xos::TaskLocalXorshift64) =
    setstate!(xos, rand(RandomDevice(), UInt64))

Random.seed!(xos::TaskLocalXorshift64, seed::Integer) =
    setstate!(xos, seed)

end

using .O

@show O.getstate(XOS)
A = [rand(XOS) for _ in 1:20]

O.getstate(XOS) = 0xcf5cb69b00e5796e


20-element Vector{Float64}:
 0.8100084427286641
 0.7945790302334755
 0.4527537345030621
 0.3883190569924725
 0.7849263208397615
 0.5494120941449921
 0.9967663009734609
 0.20769453475623612
 0.6091616080484087
 0.7785419203933468
 0.45538790238573457
 0.25460736078389556
 0.012481761822934034
 0.6496374599922397
 0.7919924993864689
 0.7456972440390945
 0.7953481040013348
 0.5830927997625643
 0.6205491924943326
 0.7756648510222477

In [2]:
Random.seed!(XOS)
@show O.getstate(XOS)
Random.seed!(XOS, 0x1234567)
@show O.getstate(XOS);

O.getstate(XOS) = 0xef6e5329f86ffb29
O.getstate(XOS) = 0x0000000001234567


In [3]:
function mcpi(L=10^9, rng=XOS)
    c = 0
    for i in 1:L
        c += rand(rng)^2 + rand(rng)^2 ≤ 1
    end
    4c/L
end

@time mcpi()
@time mcpi()
@time mcpi()
@time mcpi()
@time mcpi()
@time mcpi()

  3.363440 seconds
  3.332102 seconds
  3.303405 seconds
  3.307343 seconds
  3.292089 seconds
  3.342178 seconds


3.141454796

In [4]:
using LoopVectorization

@inline isinside(i, rng=XOS) =
    rand(rng)^2 + rand(rng)^2 ≤ 1

function mcpi_turbo(L=10^9, isinsiderng=isinside)
    c = 0
    @turbo for i in 1:L
        c += isinsiderng(i)
    end
    4c/L
end

@time mcpi_turbo()
@time mcpi_turbo()
@time mcpi_turbo()
@time mcpi_turbo()
@time mcpi_turbo()
@time mcpi_turbo()

  0.427697 seconds
  0.428507 seconds
  0.446790 seconds
  0.447515 seconds
  0.448349 seconds
  0.438973 seconds


3.1417432

In [5]:
function mcpi_tturbo(L=10^9, isinsiderng=isinside)
    c = 0
    @tturbo for i in 1:L
        c += isinsiderng(i)
    end
    4c/L
end

mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()
@time mcpi_tturbo()

  0.121960 seconds
  0.125975 seconds (113 allocations: 7.078 KiB)
  0.095930 seconds (4 allocations: 320 bytes)
  0.083965 seconds
  0.093329 seconds
  0.083467 seconds (114 allocations: 7.359 KiB)


3.141508064

In [6]:
versioninfo()

Julia Version 1.12.1
Commit ba1e628ee4 (2025-10-17 13:02 UTC)
Build Info:
  Official https://julialang.org release
Platform Info:
  OS: Windows (x86_64-w64-mingw32)
  CPU: 12 × Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz
  WORD_SIZE: 64
  LLVM: libLLVM-18.1.7 (ORCJIT, skylake)
  GC: Built with stock GC
Threads: 12 default, 1 interactive, 12 GC (on 12 virtual cores)
Environment:
  JULIA_DEPOT_PATH = D:\.julia
  JULIA_NUM_PRECOMPILE_TASKS = 4
  JULIA_NUM_THREADS = auto
  JULIA_PYTHONCALL_EXE = D:\.julia\conda\3\python.exe
