Skip to content

Commit

Permalink
introduce functional check and make select_device extension compatible
Browse files Browse the repository at this point in the history
  • Loading branch information
omlins committed Jan 18, 2024
1 parent 180f060 commit 4d9ebea
Show file tree
Hide file tree
Showing 12 changed files with 51 additions and 19 deletions.
1 change: 1 addition & 0 deletions ext/ImplicitGlobalGrid_AMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
module ImplicitGlobalGrid_AMDGPUExt
include(joinpath(@__DIR__, "..", "src", "AMDGPUExt", "shared.jl"))
include(joinpath(@__DIR__, "..", "src", "AMDGPUExt", "select_device.jl"))
include(joinpath(@__DIR__, "..", "src", "AMDGPUExt", "update_halo.jl"))
end
1 change: 1 addition & 0 deletions ext/ImplicitGlobalGrid_CUDAExt.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
module ImplicitGlobalGrid_CUDAExt
include(joinpath(@__DIR__, "..", "src", "CUDAExt", "shared.jl"))
include(joinpath(@__DIR__, "..", "src", "CUDAExt", "select_device.jl"))
include(joinpath(@__DIR__, "..", "src", "CUDAExt", "update_halo.jl"))
end
6 changes: 6 additions & 0 deletions src/AMDGPUExt/defaults.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
is_rocarray(A::GGArray) = false


# select_device.jl

function nb_rocdevices end
function rocdevice! end


# update_halo.jl

function free_update_halo_rocbuffers end
Expand Down
2 changes: 2 additions & 0 deletions src/AMDGPUExt/select_device.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ImplicitGlobalGrid.nb_rocdevices() = length(AMDGPU.devices())
ImplicitGlobalGrid.rocdevice!(device_id) = AMDGPU.device_id!(device_id)
1 change: 1 addition & 0 deletions src/AMDGPUExt/shared.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const ROCField{T,N} = GGField{T,N,ROCArray{T,N}}
## HANDLING OF CUDA AND AMDGPU SUPPORT

ImplicitGlobalGrid.is_loaded(::Val{:ImplicitGlobalGrid_AMDGPUExt}) = true
ImplicitGlobalGrid.is_functional(::Val{:AMDGPU}) = AMDGPU.functional()


##-------------
Expand Down
6 changes: 6 additions & 0 deletions src/CUDAExt/defaults.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
is_cuarray(A::GGArray) = false


# select_device.jl

function nb_cudevices end
function cudevice! end


# update_halo.jl

function free_update_halo_cubuffers end
Expand Down
2 changes: 2 additions & 0 deletions src/CUDAExt/select_device.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ImplicitGlobalGrid.nb_cudevices() = length(CUDA.devices())
ImplicitGlobalGrid.cudevice!(device_id) = CUDA.device!(device_id)
1 change: 1 addition & 0 deletions src/CUDAExt/shared.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const CuField{T,N} = GGField{T,N,CuArray{T,N}}
## HANDLING OF CUDA AND AMDGPU SUPPORT

ImplicitGlobalGrid.is_loaded(::Val{:ImplicitGlobalGrid_CUDAExt}) = true
ImplicitGlobalGrid.is_functional(::Val{:CUDA}) = CUDA.functional(true)


##-------------
Expand Down
6 changes: 6 additions & 0 deletions src/defaults_shared.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# shared.jl

is_loaded(arg) = false #TODO: this would not work as it should be the caller module...: (Base.get_extension(@__MODULE__, ext) !== nothing)
is_functional(arg) = false


# update_halo.jl

function gpusendbuf end
Expand Down
8 changes: 5 additions & 3 deletions src/init_global_grid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ See also: [`finalize_global_grid`](@ref), [`select_device`](@ref)
function init_global_grid(nx::Integer, ny::Integer, nz::Integer; dimx::Integer=0, dimy::Integer=0, dimz::Integer=0, periodx::Integer=0, periody::Integer=0, periodz::Integer=0, overlaps::Tuple{Int,Int,Int}=(2,2,2), halowidths::Tuple{Int,Int,Int}=max.(1,overlaps2), disp::Integer=1, reorder::Integer=1, comm::MPI.Comm=MPI.COMM_WORLD, init_MPI::Bool=true, device_type::String=DEVICE_TYPE_AUTO, select_device::Bool=true, quiet::Bool=false)
if grid_is_initialized() error("The global grid has already been initialized.") end
set_cuda_loaded()
set_cuda_functional()
set_amdgpu_loaded()
set_amdgpu_functional()
nxyz = [nx, ny, nz];
dims = [dimx, dimy, dimz];
periods = [periodx, periody, periodz];
Expand Down Expand Up @@ -71,10 +73,10 @@ function init_global_grid(nx::Integer, ny::Integer, nz::Integer; dimx::Integer=0
if haskey(ENV, "IGG_LOOPVECTORIZATION_DIMZ") loopvectorization[3] = (parse(Int64, ENV["IGG_LOOPVECTORIZATION_DIMZ"]) > 0); end
end
if !(device_type in [DEVICE_TYPE_NONE, DEVICE_TYPE_AUTO, DEVICE_TYPE_CUDA, DEVICE_TYPE_AMDGPU]) error("Argument `device_type`: invalid value obtained ($device_type). Valid values are: $DEVICE_TYPE_CUDA, $DEVICE_TYPE_AMDGPU, $DEVICE_TYPE_NONE, $DEVICE_TYPE_AUTO") end
if ((device_type == DEVICE_TYPE_AUTO) && cuda_loaded() && amdgpu_loaded()) error("Automatic detection of the device type to be used not possible: both CUDA and AMDGPU extensions are loaded. Set keyword argument `device_type` to $DEVICE_TYPE_CUDA or $DEVICE_TYPE_AMDGPU.") end
if ((device_type == DEVICE_TYPE_AUTO) && cuda_loaded() && cuda_functional() && amdgpu_loaded() && amdgpu_functional()) error("Automatic detection of the device type to be used not possible: both CUDA and AMDGPU extensions are loaded and functional. Set keyword argument `device_type` to $DEVICE_TYPE_CUDA or $DEVICE_TYPE_AMDGPU.") end
if (device_type != DEVICE_TYPE_NONE)
if (device_type in [DEVICE_TYPE_CUDA, DEVICE_TYPE_AUTO]) cuda_enabled = cuda_loaded() end # NOTE: cuda could be enabled/disabled depending on some additional criteria.
if (device_type in [DEVICE_TYPE_AMDGPU, DEVICE_TYPE_AUTO]) amdgpu_enabled = amdgpu_loaded() end # NOTE: amdgpu could be enabled/disabled depending on some additional criteria.
if (device_type in [DEVICE_TYPE_CUDA, DEVICE_TYPE_AUTO]) cuda_enabled = cuda_loaded() && cuda_functional() end # NOTE: cuda could be enabled/disabled depending on some additional criteria.
if (device_type in [DEVICE_TYPE_AMDGPU, DEVICE_TYPE_AUTO]) amdgpu_enabled = amdgpu_loaded() && amdgpu_functional() end # NOTE: amdgpu could be enabled/disabled depending on some additional criteria.
end
if (any(nxyz .< 1)) error("Invalid arguments: nx, ny, and nz cannot be less than 1."); end
if (any(dims .< 0)) error("Invalid arguments: dimx, dimy, and dimz cannot be negative."); end
Expand Down
14 changes: 7 additions & 7 deletions src/select_device.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ Select the device (GPU) corresponding to the node-local MPI rank and return its
See also: [`init_global_grid`](@ref)
"""
function select_device()
check_initialized()
if (cuda_enabled() && amdgpu_enabled()) error("Cannot select a device because both CUDA and AMDGPU are enabled (meaning that both modules were imported before ImplicitGlobalGrid).") end
if cuda_enabled() || amdgpu_enabled()
check_initialized();
if cuda_enabled()
@assert CUDA.functional(true)
nb_devices = length(CUDA.devices())
@assert cuda_functional()
nb_devices = nb_cudevices()
elseif amdgpu_enabled()
@assert AMDGPU.functional()
nb_devices = length(AMDGPU.devices())
@assert amdgpu_functional()
nb_devices = nb_rocdevices()
end
comm_l = MPI.Comm_split_type(comm(), MPI.COMM_TYPE_SHARED, me())
if (MPI.Comm_size(comm_l) > nb_devices) error("More processes have been launched per node than there are GPUs available."); end
me_l = MPI.Comm_rank(comm_l)
device_id = amdgpu_enabled() ? me_l+1 : me_l
if cuda_enabled() CUDA.device!(device_id)
elseif amdgpu_enabled() AMDGPU.device_id!(device_id)
if cuda_enabled() cudevice!(device_id)
elseif amdgpu_enabled() rocdevice!(device_id)
end
return device_id
else
Expand Down
22 changes: 13 additions & 9 deletions src/shared.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,20 @@ using Base.Threads
##------------------------------------
## HANDLING OF CUDA AND AMDGPU SUPPORT

is_loaded(arg) = false #TODO: this would not work as it should be the caller module...: (Base.get_extension(@__MODULE__, ext) !== nothing)

let
global cuda_loaded, amdgpu_loaded, set_cuda_loaded, set_amdgpu_loaded
_cuda_loaded::Bool = false
_amdgpu_loaded::Bool = false
cuda_loaded()::Bool = _cuda_loaded
amdgpu_loaded()::Bool = _amdgpu_loaded
set_cuda_loaded() = (_cuda_loaded = is_loaded(Val(:ImplicitGlobalGrid_CUDAExt)))
set_amdgpu_loaded() = (_amdgpu_loaded = is_loaded(Val(:ImplicitGlobalGrid_AMDGPUExt)))
global cuda_loaded, cuda_functional, amdgpu_loaded, amdgpu_functional, set_cuda_loaded, set_cuda_functional, set_amdgpu_loaded, set_amdgpu_functional
_cuda_loaded::Bool = false
_cuda_functional::Bool = false
_amdgpu_loaded::Bool = false
_amdgpu_functional::Bool = false
cuda_loaded()::Bool = _cuda_loaded
cuda_functional()::Bool = _cuda_functional
amdgpu_loaded()::Bool = _amdgpu_loaded
amdgpu_functional()::Bool = _amdgpu_functional
set_cuda_loaded() = (_cuda_loaded = is_loaded(Val(:ImplicitGlobalGrid_CUDAExt)))
set_cuda_functional() = (_cuda_functional = is_functional(Val(:CUDA)))
set_amdgpu_loaded() = (_amdgpu_loaded = is_loaded(Val(:ImplicitGlobalGrid_AMDGPUExt)))
set_amdgpu_functional() = (_amdgpu_functional = is_functional(Val(:AMDGPU)))
end


Expand Down

0 comments on commit 4d9ebea

Please sign in to comment.