In [1]:
@show VERSION;

VERSION = v"1.7.0-DEV.650"


In [2]:
if isfile("Project.toml")
    using Pkg
    Pkg.activate(".")
    using Revise
end

In [3]:
using MyUtils
using Base.Threads
using Distributed
using Random

@show nthreads();

nthreads() = 8


In [4]:
?MyUtils.@my_threads

```
@my_threads
```

A macro to parallelize a `for` loop to run with multiple threads.  It splits the iteration space among multiple tasks with `prebody` and `prebody`. It runs those tasks on threads according to a scheduling policy.

Usage:

```julia
@my_threads [schedule] begin
    prebody
end for ...
    ...
end begin
    postbody
end
```


In [5]:
?MyUtils.@my_distributed

```
@my_distributed
```

A distributed memory, parallel for loop of the form:

```julia
@my_distributed begin
    prebody
end [reducer] for var = range
    body
end
```


In [6]:
function mcpi(N)
    rng = Random.default_rng()
    c = 0
    for i in 1:N
        c += ifelse(rand(rng)^2 + rand(rng)^2 ≤ 1, 1, 0)
    end
    4c/N
end

@time mcpi(10^8)
@time mcpi(10^8)
@time mcpi(10^8)

  0.321139 seconds (12 allocations: 19.656 KiB)
  0.319071 seconds
  0.324314 seconds


3.14172888

In [7]:
function mcpi_my_threads(N)
    a = Atomic{Int}(0)
    MyUtils.@my_threads begin
        rng = Random.default_rng()
        c = 0
    end for i in 1:N
        c += ifelse(rand(rng)^2 + rand(rng)^2 ≤ 1, 1, 0)
    end begin
        atomic_add!(a, c)
    end
    4a[]/N
end

@time mcpi_my_threads(10^8)
@time mcpi_my_threads(10^8)
@time mcpi_my_threads(10^8)

  0.175083 seconds (54.86 k allocations: 3.421 MiB, 46.41% compilation time)
  0.090665 seconds (60 allocations: 4.750 KiB)
  0.093240 seconds (55 allocations: 4.453 KiB)


3.14175776

In [8]:
rmprocs(procs()[2:end])
addprocs(8)
@show workers()

@everywhere begin
    if isfile("Project.toml")
        using Pkg
        Pkg.activate(".")
    end
    using MyUtils
    using Random
end

workers() = [2, 3, 4, 5, 6, 7, 8, 9]


[32m[1m  Activating[22m[39m[32m[1m  Activating[22m[39m[32m[1m  Activating[22m[39m [32m[1m  Activating[22m[39menvironment at `C:\Users\genkuroki\OneDrive\work\MyUtils.jl\Project.toml`[32m[1m  Activating[22m[39m[32m[1m  Activating[22m[39m
  environment at `C:\Users\genkuroki\OneDrive\work\MyUtils.jl\Project.toml`
environment at `C:\Users\genkuroki\OneDrive\work\MyUtils.jl\Project.toml` 
environment at `C:\Users\genkuroki\OneDrive\work\MyUtils.jl\Project.toml`
 environment at `C:\Users\genkuroki\OneDrive\work\MyUtils.jl\Project.toml`
[32m[1m  Activating[22m[39m[32m[1m  Activating[22m[39m environment at `C:\Users\genkuroki\OneDrive\work\MyUtils.jl\Project.toml`
 environment at `C:\Users\genkuroki\OneDrive\work\MyUtils.jl\Project.toml`
 environment at `C:\Users\genkuroki\OneDrive\work\MyUtils.jl\Project.toml`


In [9]:
function mcpi_my_distributed(N)
    c = MyUtils.@my_distributed begin
        rng = Random.default_rng()
    end (+) for i in 1:N
        ifelse(rand(rng)^2 + rand(rng)^2 ≤ 1, 1, 0)
    end
    4c/N
end

@time mcpi_my_distributed(10^8)
@time mcpi_my_distributed(10^8)
@time mcpi_my_distributed(10^8)

  1.677908 seconds (950.04 k allocations: 56.823 MiB, 0.84% gc time, 18.40% compilation time)
  0.093394 seconds (655 allocations: 30.984 KiB)
  0.087794 seconds (651 allocations: 28.047 KiB)


3.14169016

In [10]:
using BenchmarkTools

@btime mcpi(10^8)
@btime mcpi_my_threads(10^8)
@btime mcpi_my_distributed(10^8)

  314.491 ms (0 allocations: 0 bytes)
  89.570 ms (49 allocations: 4.27 KiB)
  86.329 ms (645 allocations: 27.80 KiB)


3.14174876

In [11]:
rmprocs(procs()[2:end])
@show workers();

workers() = [1]
