Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallel simulations using MPI.jl #141

Draft
wants to merge 39 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
c2b0db6
double ghost working and solver fix
Apr 23, 2024
07aca6f
working mpi Isend/Irecv! with WaterLily
Jun 9, 2024
ffad91a
add new WaterLily.BC! function
Jun 10, 2024
6d41236
update examples/Project.toml
Jun 10, 2024
c404a52
Merge branch 'weymouth:master' into MPI
marinlauber Jun 10, 2024
66d485b
add BC! function for scalar and vector fields
Jun 11, 2024
ef3316a
update examples/Project.toml
Jun 11, 2024
e168ba2
working conv_diff! but not project!
Jun 12, 2024
8994832
fix loc function for double halos
Jun 12, 2024
054711e
add MPI functions and Poisson working
Jun 24, 2024
6977c1f
add MPI as extension
Jun 24, 2024
511cc44
try and fix extension, not working
Jun 25, 2024
2d4f4bf
merge master into MPI
Jun 25, 2024
cae2d1e
type dispatch for BC! functions
Jun 25, 2024
3294511
working mom_step! with Poisson solver
marinlauber Jul 3, 2024
4d2cc50
running with MG solver, but no convegence
Jul 3, 2024
b94babe
start proper testing
marinlauber Jul 5, 2024
23a96d9
proper tesing and psolver issue
Jul 5, 2024
2945552
fix solver inside/inside_u
Jul 6, 2024
cd1948f
fixed inside function for double halos
Jul 7, 2024
312b1b8
working parallel poisson solver
Jul 11, 2024
50c8695
add MPIArray type
Jul 11, 2024
a8abd84
Merge branch 'WaterLily-jl:master' into MPI
marinlauber Jul 11, 2024
1ee9341
fix type dispatch for MPIArray
Jul 11, 2024
f09736d
fix type dispatch for MPIArray
Jul 11, 2024
4a5eb0e
AllReduce in residuals!(pois)
marinlauber Jul 11, 2024
281818b
perBC! in Jacobi
marinlauber Jul 12, 2024
7241d39
tidy solver and MPI
marinlauber Jul 12, 2024
ec72ec1
add exitBC! function
marinlauber Jul 12, 2024
7fdbdd8
MPIArray working with sims
marinlauber Jul 12, 2024
b62cde4
clean MPIArray methods
marinlauber Jul 12, 2024
13388de
add Parallel VTK function
marinlauber Jul 13, 2024
883e17b
fix pvtk export with MPI
marinlauber Jul 15, 2024
8bf893a
MPI as an extension running
Jul 16, 2024
283a46b
MPI in extension and remove old files
Jul 16, 2024
9af4ad2
move all write file to their extentions and tidy examples
marinlauber Jul 17, 2024
0bd4e68
remove unused tests
marinlauber Jul 17, 2024
cdc1ba6
move some test into proper testing file
marinlauber Jul 17, 2024
5b6de27
reduce mpi swaps in BC!
Jul 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ReadVTK = "dc215faf-f008-4882-a9f7-a79a826fadc3"
WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"

[extensions]
WaterLilyAMDGPUExt = "AMDGPU"
WaterLilyCUDAExt = "CUDA"
WaterLilyReadVTKExt = "ReadVTK"
WaterLilyWriteVTKExt = "WriteVTK"
WaterLilyMPIExt = "MPI"

[compat]
AMDGPU = "^0.4.13,0.6"
Expand Down Expand Up @@ -53,6 +55,7 @@ ReadVTK = "dc215faf-f008-4882-a9f7-a79a826fadc3"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"

[targets]
test = ["Test", "BenchmarkTools", "CUDA", "AMDGPU", "GPUArrays", "WriteVTK", "ReadVTK"]
5 changes: 4 additions & 1 deletion examples/Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
[deps]
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
Meshing = "e6723b4c-ebff-59f1-b4b7-d97aa5274f73"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
ReadVTK = "dc215faf-f008-4882-a9f7-a79a826fadc3"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Expand Down
57 changes: 57 additions & 0 deletions examples/TwoD_CircleMPI.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#mpiexecjl --project=. -n 2 julia TwoD_CircleMPI.jl
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just FYI --project=. and just --project are equivalent.


using WaterLily
using WriteVTK
using MPI
using StaticArrays

# make a writer with some attributes, now we need to apply the BCs when writting
velocity(a::Simulation) = a.flow.u |> Array;
pressure(a::Simulation) = a.flow.p |> Array;
_body(a::Simulation) = (measure_sdf!(a.flow.σ, a.body);
a.flow.σ |> Array;)
vorticity(a::Simulation) = (@inside a.flow.σ[I] =
WaterLily.curl(3,I,a.flow.u)*a.L/a.U;
WaterLily.perBC!(a.flow.σ,());
a.flow.σ |> Array;)
_vbody(a::Simulation) = a.flow.V |> Array;
mu0(a::Simulation) = a.flow.μ₀ |> Array;
ranks(a::Simulation) = (a.flow.σ.=0;
@inside a.flow.σ[I] = me()+1;
WaterLily.perBC!(a.flow.σ,());
a.flow.σ |> Array;)

custom_attrib = Dict(
"u" => velocity,
"p" => pressure,
"d" => _body,
"ω" => vorticity,
"v" => _vbody,
"μ₀" => mu0,
"rank" => ranks
)# this maps what to write to the name in the file

"""Flow around a circle"""
function circle(dims,center,radius;Re=250,U=1,psolver=MultiLevelPoisson,mem=Array)
body = AutoBody((x,t)->√sum(abs2, x .- center) - radius)
Simulation(dims, (U,0), radius; ν=U*radius/Re, body, mem=mem, psolver=psolver)
end

# last one standing...
WaterLily.grid_loc() = mpi_grid().global_loc

# local grid size
L = 2^6

# init the MPI grid and the simulation
r = init_mpi((L,L))
sim = circle((L,L),SA[L/2,L/2+2],L/8;mem=MPIArray) #use MPIArray to use extension

wr = vtkWriter("WaterLily-circle-2";attrib=custom_attrib,dir="vtk_data",
extents=get_extents(sim.flow.p))
for _ in 1:50
sim_step!(sim,sim_time(sim)+1.0,verbose=true)
write!(wr,sim)
end
close(wr)
finalize_mpi()
14 changes: 14 additions & 0 deletions examples/TwoD_CircleMPIArray.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using WaterLily
# using MPI

# circle simulation
function circle(n,m;Re=250,U=1)
radius, center = m/8, m/2
body = AutoBody((x,t)->√sum(abs2, x .- center) - radius)
Simulation((n,m), (U,0), radius; ν=U*radius/Re, body, mem=MPIArray)
end

# test on sim
include("TwoD_plots.jl")
sim = circle(3*2^6,2^7)
sim_gif!(sim,duration=10,clims=(-5,5),plotbody=true)
243 changes: 243 additions & 0 deletions ext/WaterLilyMPIExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
module WaterLilyMPIExt

if isdefined(Base, :get_extension)
using MPI
else
using ..MPI
end

using StaticArrays
using WaterLily
import WaterLily: init_mpi,me,mpi_grid,finalize_mpi,get_extents
import WaterLily: BC!,perBC!,exitBC!,L₂,L∞,loc,grid_loc,_dot,CFL,residual!,sim_step!

const NDIMS_MPI = 3 # Internally, we set the number of dimensions always to 3 for calls to MPI. This ensures a fixed size for MPI coords, neigbors, etc and in general a simple, easy to read code.
const NNEIGHBORS_PER_DIM = 2

"""
halos(dims,d)

Return the CartesianIndices of the halos in dimension `±d` of an array of size `dims`.
"""
function halos(dims::NTuple{N},j) where N
CartesianIndices(ntuple( i-> i==abs(j) ? j<0 ? (1:2) : (dims[i]-1:dims[i]) : (1:dims[i]), N))
end
"""
buff(dims,d)

Return the CartesianIndices of the buffer in dimension `±d` of an array of size `dims`.
"""
function buff(dims::NTuple{N},j) where N
CartesianIndices(ntuple( i-> i==abs(j) ? j<0 ? (3:4) : (dims[i]-3:dims[i]-2) : (1:dims[i]), N))
end

"""
mpi_swap!(send1,recv1,send2,recv2,neighbor,comm)

This function swaps the data between two MPI processes. The data is sent from `send1` to `neighbor[1]` and received in `recv1`.
The data is sent from `send2` to `neighbor[2]` and received in `recv2`. The function is non-blocking and returns when all data
has been sent and received.
"""
function mpi_swap!(send1,recv1,send2,recv2,neighbor,comm)
reqs=MPI.Request[]
# Send to / receive from neighbor 1 in dimension d
push!(reqs,MPI.Isend(send1, neighbor[1], 0, comm))
push!(reqs,MPI.Irecv!(recv1, neighbor[1], 1, comm))
# Send to / receive from neighbor 2 in dimension d
push!(reqs,MPI.Irecv!(recv2, neighbor[2], 0, comm))
push!(reqs,MPI.Isend(send2, neighbor[2], 1, comm))
# wair for all transfer to be done
MPI.Waitall!(reqs)
end

"""
perBC!(a)

This function sets the boundary conditions of the array `a` using the MPI grid.
"""
perBC!(a::MPIArray,::Tuple{}) = _perBC!(a, size(a), true)
perBC!(a::MPIArray, perdir, N = size(a)) = _perBC!(a, N, true)
_perBC!(a, N, mpi::Bool) = for d ∈ eachindex(N)
# get data to transfer @TODO use @views
send1 = a[buff(N,-d)]; send2 =a[buff(N,+d)]
recv1 = zero(send1); recv2 = zero(send2)
# swap
mpi_swap!(send1,recv1,send2,recv2,neighbors(d),mpi_grid().comm)

# this sets the BCs
!mpi_wall(d,1) && (a[halos(N,-d)] .= recv1) # halo swap
!mpi_wall(d,2) && (a[halos(N,+d)] .= recv2) # halo swap
end

"""
BC!(a)

This function sets the boundary conditions of the array `a` using the MPI grid.
"""
# function BC!(a::MPIArray,A,saveexit=false,perdir=())
# N,n = WaterLily.size_u(a)
# for i ∈ 1:n, d ∈ 1:n
# # get data to transfer @TODO use @views
# send1 = a[buff(N,-d),i]; send2 = a[buff(N,+d),i]
# recv1 = zero(send1); recv2 = zero(send2)
# # swap
# mpi_swap!(send1,recv1,send2,recv2,neighbors(d),mpi_grid().comm)

# # this sets the BCs on the domain boundary and transfers the data
# if mpi_wall(d,1) # left wall
# if i==d # set flux
# a[halos(N,-d),i] .= A[i]
# a[WaterLily.slice(N,3,d),i] .= A[i]
# else # zero gradient
# a[halos(N,-d),i] .= reverse(send1; dims=d)
# end
# else # neighbor on the left
# a[halos(N,-d),i] .= recv1
# end
# if mpi_wall(d,2) # right wall
# if i==d && (!saveexit || i>1) # convection exit
# a[halos(N,+d),i] .= A[i]
# else # zero gradient
# a[halos(N,+d),i] .= reverse(send2; dims=d)
# end
# else # neighbor on the right
# a[halos(N,+d),i] .= recv2
# end
# end
# end
using EllipsisNotation
function BC!(a::MPIArray,A,saveexit=false,perdir=())
N,n = WaterLily.size_u(a)
for d ∈ 1:n # transfer full halos in each direction
# get data to transfer @TODO use @views
send1 = a[buff(N,-d),:]; send2 = a[buff(N,+d),:]
recv1 = zero(send1); recv2 = zero(send2)
# swap
mpi_swap!(send1,recv1,send2,recv2,neighbors(d),mpi_grid().comm)

# mpi boundary swap
!mpi_wall(d,1) && (a[halos(N,-d),:] .= recv1) # halo swap
!mpi_wall(d,2) && (a[halos(N,+d),:] .= recv2) # halo swap

for i ∈ 1:n # this sets the BCs on the physical boundary
if mpi_wall(d,1) # left wall
if i==d # set flux
a[halos(N,-d),i] .= A[i]
a[WaterLily.slice(N,3,d),i] .= A[i]
else # zero gradient
a[halos(N,-d),i] .= reverse(send1[..,i]; dims=d)
end
end
if mpi_wall(d,2) # right wall
if i==d && (!saveexit || i>1) # convection exit
a[halos(N,+d),i] .= A[i]
else # zero gradient
a[halos(N,+d),i] .= reverse(send2[..,i]; dims=d)
end
end
end
end
end

function exitBC!(u::MPIArray,u⁰,U,Δt)
N,_ = WaterLily.size_u(u)
exitR = WaterLily.slice(N.-2,N[1]-2,1,3) # exit slice excluding ghosts
# ∮udA = 0
# if mpi_wall(1,2) #right wall
@WaterLily.loop u[I,1] = u⁰[I,1]-U[1]*Δt*(u⁰[I,1]-u⁰[I-δ(1,I),1]) over I ∈ exitR
∮u = sum(u[exitR,1])/length(exitR)-U[1] # mass flux imbalance
# end
# ∮u = MPI.Allreduce(∮udA,+,mpi_grid().comm) # domain imbalance
# mpi_wall(1,2) && (@WaterLily.loop u[I,1] -= ∮u over I ∈ exitR) # correct flux only on right wall
@WaterLily.loop u[I,1] -= ∮u over I ∈ exitR # correct flux only on right wall
end

struct MPIGrid #{I,C<:MPI.Comm,N<:AbstractVector,M<:AbstractArray,G<:AbstractVector}
me::Int # rank
comm::MPI.Comm # communicator
coords::AbstractVector # coordinates
neighbors::AbstractArray # neighbors
global_loc::AbstractVector # the location of the lower left corner in global index space
end
const MPI_GRID_NULL = MPIGrid(-1,MPI.COMM_NULL,[-1,-1,-1],[-1 -1 -1; -1 -1 -1],[0,0,0])

let
global MPIGrid, set_mpi_grid, mpi_grid, mpi_initialized, check_mpi

# allows to access the global mpi grid
_mpi_grid::MPIGrid = MPI_GRID_NULL
mpi_grid()::MPIGrid = (check_mpi(); _mpi_grid::MPIGrid)
set_mpi_grid(grid::MPIGrid) = (_mpi_grid = grid;)
mpi_initialized() = (_mpi_grid.comm != MPI.COMM_NULL)
check_mpi() = !mpi_initialized() && error("MPI not initialized")
end

function init_mpi(Dims::NTuple{D};dims=[0, 0, 0],periods=[0, 0, 0],comm::MPI.Comm=MPI.COMM_WORLD,
disp::Integer=1,reorder::Bool=true) where D
# MPI
MPI.Init()
nprocs = MPI.Comm_size(comm)
# create cartesian communicator
MPI.Dims_create!(nprocs, dims)
comm_cart = MPI.Cart_create(comm, dims, periods, reorder)
me = MPI.Comm_rank(comm_cart)
coords = MPI.Cart_coords(comm_cart)
# make the cart comm
neighbors = fill(MPI.PROC_NULL, NNEIGHBORS_PER_DIM, NDIMS_MPI);
for i = 1:NDIMS_MPI
neighbors[:,i] .= MPI.Cart_shift(comm_cart, i-1, disp);
end
# global index coordinate in grid space
global_loc = SVector([coords[i]*Dims[i] for i in 1:D]...)
set_mpi_grid(MPIGrid(me,comm_cart,coords,neighbors,global_loc))
return me; # this is the most usefull MPI vriable to have in the local space
end
finalize_mpi() = MPI.Finalize()

# global coordinate in grid space
# grid_loc(grid::MPIGrid=mpi_grid()) = grid.global_loc
me()= mpi_grid().me
neighbors(dim) = mpi_grid().neighbors[:,dim]
mpi_wall(dim,i) = mpi_grid().neighbors[i,dim]==MPI.PROC_NULL

L₂(a::MPIArray{T}) where T = MPI.Allreduce(sum(T,abs2,@inbounds(a[I]) for I ∈ inside(a)),+,mpi_grid().comm)
function L₂(p::Poisson{T,S}) where {T,S<:MPIArray{T}} # should work on the GPU
MPI.Allreduce(sum(T,@inbounds(p.r[I]*p.r[I]) for I ∈ inside(p.r)),+,mpi_grid().comm)
end
L∞(a::MPIArray) = MPI.Allreduce(maximum(abs.(a)),Base.max,mpi_grid().comm)
L∞(p::Poisson{T,S}) where {T,S<:MPIArray{T}} = MPI.Allreduce(maximum(abs.(p.r)),Base.max,mpi_grid().comm)
function _dot(a::MPIArray{T},b::MPIArray{T}) where T
MPI.Allreduce(sum(T,@inbounds(a[I]*b[I]) for I ∈ inside(a)),+,mpi_grid().comm)
end

function CFL(a::Flow{D,T,S};Δt_max=10) where {D,T,S<:MPIArray{T}}
@inside a.σ[I] = WaterLily.flux_out(I,a.u)
MPI.Allreduce(min(Δt_max,inv(maximum(a.σ)+5a.ν)),Base.min,mpi_grid().comm)
end
# this actually add a global comminutation every time residual is called
function residual!(p::Poisson{T,S}) where {T,S<:MPIArray{T}}
WaterLily.perBC!(p.x,p.perdir)
@inside p.r[I] = ifelse(p.iD[I]==0,0,p.z[I]-WaterLily.mult(I,p.L,p.D,p.x))
# s = sum(p.r)/length(inside(p.r))
s = MPI.Allreduce(sum(p.r)/length(inside(p.r)),+,mpi_grid().comm)
abs(s) <= 2eps(eltype(s)) && return
@inside p.r[I] = p.r[I]-s
end

function sim_step!(sim::Simulation{D,T,S},t_end;remeasure=true,
max_steps=typemax(Int),verbose=false) where {D,T,S<:MPIArray{T}}
steps₀ = length(sim.flow.Δt)
while sim_time(sim) < t_end && length(sim.flow.Δt) - steps₀ < max_steps
sim_step!(sim; remeasure)
(verbose && me()==0) && println("tU/L=",round(sim_time(sim),digits=4),
", Δt=",round(sim.flow.Δt[end],digits=3))
end
end

# hepler function for vtk writer
function get_extents(a::MPIArray)
xs = Tuple(ifelse(x==0,1,x+1):ifelse(x==0,n+4,n+x+4) for (n,x) in zip(size(inside(a)),grid_loc()))
MPI.Allgather(xs, mpi_grid().comm)
end

end # module
Loading