From 4d9ebea32dab4f6199d5b0b1a4529ac11edcaa3f Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Thu, 18 Jan 2024 19:19:18 +0100 Subject: [PATCH] introduce functional check and make select_device extension compatible --- ext/ImplicitGlobalGrid_AMDGPUExt.jl | 1 + ext/ImplicitGlobalGrid_CUDAExt.jl | 1 + src/AMDGPUExt/defaults.jl | 6 ++++++ src/AMDGPUExt/select_device.jl | 2 ++ src/AMDGPUExt/shared.jl | 1 + src/CUDAExt/defaults.jl | 6 ++++++ src/CUDAExt/select_device.jl | 2 ++ src/CUDAExt/shared.jl | 1 + src/defaults_shared.jl | 6 ++++++ src/init_global_grid.jl | 8 +++++--- src/select_device.jl | 14 +++++++------- src/shared.jl | 22 +++++++++++++--------- 12 files changed, 51 insertions(+), 19 deletions(-) create mode 100644 src/AMDGPUExt/select_device.jl create mode 100644 src/CUDAExt/select_device.jl diff --git a/ext/ImplicitGlobalGrid_AMDGPUExt.jl b/ext/ImplicitGlobalGrid_AMDGPUExt.jl index dbc5bf3..5ac806f 100644 --- a/ext/ImplicitGlobalGrid_AMDGPUExt.jl +++ b/ext/ImplicitGlobalGrid_AMDGPUExt.jl @@ -1,4 +1,5 @@ module ImplicitGlobalGrid_AMDGPUExt include(joinpath(@__DIR__, "..", "src", "AMDGPUExt", "shared.jl")) + include(joinpath(@__DIR__, "..", "src", "AMDGPUExt", "select_device.jl")) include(joinpath(@__DIR__, "..", "src", "AMDGPUExt", "update_halo.jl")) end \ No newline at end of file diff --git a/ext/ImplicitGlobalGrid_CUDAExt.jl b/ext/ImplicitGlobalGrid_CUDAExt.jl index 381fd59..58775fd 100644 --- a/ext/ImplicitGlobalGrid_CUDAExt.jl +++ b/ext/ImplicitGlobalGrid_CUDAExt.jl @@ -1,4 +1,5 @@ module ImplicitGlobalGrid_CUDAExt include(joinpath(@__DIR__, "..", "src", "CUDAExt", "shared.jl")) + include(joinpath(@__DIR__, "..", "src", "CUDAExt", "select_device.jl")) include(joinpath(@__DIR__, "..", "src", "CUDAExt", "update_halo.jl")) end \ No newline at end of file diff --git a/src/AMDGPUExt/defaults.jl b/src/AMDGPUExt/defaults.jl index 50a1523..9fec08b 100644 --- a/src/AMDGPUExt/defaults.jl +++ b/src/AMDGPUExt/defaults.jl @@ -3,6 +3,12 @@ is_rocarray(A::GGArray) = false +# select_device.jl + +function nb_rocdevices end +function rocdevice! end + + # update_halo.jl function free_update_halo_rocbuffers end diff --git a/src/AMDGPUExt/select_device.jl b/src/AMDGPUExt/select_device.jl new file mode 100644 index 0000000..cb8cce3 --- /dev/null +++ b/src/AMDGPUExt/select_device.jl @@ -0,0 +1,2 @@ +ImplicitGlobalGrid.nb_rocdevices() = length(AMDGPU.devices()) +ImplicitGlobalGrid.rocdevice!(device_id) = AMDGPU.device_id!(device_id) \ No newline at end of file diff --git a/src/AMDGPUExt/shared.jl b/src/AMDGPUExt/shared.jl index 50f7d3c..402cdc2 100644 --- a/src/AMDGPUExt/shared.jl +++ b/src/AMDGPUExt/shared.jl @@ -14,6 +14,7 @@ const ROCField{T,N} = GGField{T,N,ROCArray{T,N}} ## HANDLING OF CUDA AND AMDGPU SUPPORT ImplicitGlobalGrid.is_loaded(::Val{:ImplicitGlobalGrid_AMDGPUExt}) = true +ImplicitGlobalGrid.is_functional(::Val{:AMDGPU}) = AMDGPU.functional() ##------------- diff --git a/src/CUDAExt/defaults.jl b/src/CUDAExt/defaults.jl index 5389086..187f4c5 100644 --- a/src/CUDAExt/defaults.jl +++ b/src/CUDAExt/defaults.jl @@ -3,6 +3,12 @@ is_cuarray(A::GGArray) = false +# select_device.jl + +function nb_cudevices end +function cudevice! end + + # update_halo.jl function free_update_halo_cubuffers end diff --git a/src/CUDAExt/select_device.jl b/src/CUDAExt/select_device.jl new file mode 100644 index 0000000..bcffa29 --- /dev/null +++ b/src/CUDAExt/select_device.jl @@ -0,0 +1,2 @@ +ImplicitGlobalGrid.nb_cudevices() = length(CUDA.devices()) +ImplicitGlobalGrid.cudevice!(device_id) = CUDA.device!(device_id) \ No newline at end of file diff --git a/src/CUDAExt/shared.jl b/src/CUDAExt/shared.jl index 3f6930a..93fc6dc 100644 --- a/src/CUDAExt/shared.jl +++ b/src/CUDAExt/shared.jl @@ -14,6 +14,7 @@ const CuField{T,N} = GGField{T,N,CuArray{T,N}} ## HANDLING OF CUDA AND AMDGPU SUPPORT ImplicitGlobalGrid.is_loaded(::Val{:ImplicitGlobalGrid_CUDAExt}) = true +ImplicitGlobalGrid.is_functional(::Val{:CUDA}) = CUDA.functional(true) ##------------- diff --git a/src/defaults_shared.jl b/src/defaults_shared.jl index 7fe1e81..334dae9 100644 --- a/src/defaults_shared.jl +++ b/src/defaults_shared.jl @@ -1,3 +1,9 @@ +# shared.jl + +is_loaded(arg) = false #TODO: this would not work as it should be the caller module...: (Base.get_extension(@__MODULE__, ext) !== nothing) +is_functional(arg) = false + + # update_halo.jl function gpusendbuf end diff --git a/src/init_global_grid.jl b/src/init_global_grid.jl index 0e3ed41..4f8ba63 100644 --- a/src/init_global_grid.jl +++ b/src/init_global_grid.jl @@ -41,7 +41,9 @@ See also: [`finalize_global_grid`](@ref), [`select_device`](@ref) function init_global_grid(nx::Integer, ny::Integer, nz::Integer; dimx::Integer=0, dimy::Integer=0, dimz::Integer=0, periodx::Integer=0, periody::Integer=0, periodz::Integer=0, overlaps::Tuple{Int,Int,Int}=(2,2,2), halowidths::Tuple{Int,Int,Int}=max.(1,overlaps.รท2), disp::Integer=1, reorder::Integer=1, comm::MPI.Comm=MPI.COMM_WORLD, init_MPI::Bool=true, device_type::String=DEVICE_TYPE_AUTO, select_device::Bool=true, quiet::Bool=false) if grid_is_initialized() error("The global grid has already been initialized.") end set_cuda_loaded() + set_cuda_functional() set_amdgpu_loaded() + set_amdgpu_functional() nxyz = [nx, ny, nz]; dims = [dimx, dimy, dimz]; periods = [periodx, periody, periodz]; @@ -71,10 +73,10 @@ function init_global_grid(nx::Integer, ny::Integer, nz::Integer; dimx::Integer=0 if haskey(ENV, "IGG_LOOPVECTORIZATION_DIMZ") loopvectorization[3] = (parse(Int64, ENV["IGG_LOOPVECTORIZATION_DIMZ"]) > 0); end end if !(device_type in [DEVICE_TYPE_NONE, DEVICE_TYPE_AUTO, DEVICE_TYPE_CUDA, DEVICE_TYPE_AMDGPU]) error("Argument `device_type`: invalid value obtained ($device_type). Valid values are: $DEVICE_TYPE_CUDA, $DEVICE_TYPE_AMDGPU, $DEVICE_TYPE_NONE, $DEVICE_TYPE_AUTO") end - if ((device_type == DEVICE_TYPE_AUTO) && cuda_loaded() && amdgpu_loaded()) error("Automatic detection of the device type to be used not possible: both CUDA and AMDGPU extensions are loaded. Set keyword argument `device_type` to $DEVICE_TYPE_CUDA or $DEVICE_TYPE_AMDGPU.") end + if ((device_type == DEVICE_TYPE_AUTO) && cuda_loaded() && cuda_functional() && amdgpu_loaded() && amdgpu_functional()) error("Automatic detection of the device type to be used not possible: both CUDA and AMDGPU extensions are loaded and functional. Set keyword argument `device_type` to $DEVICE_TYPE_CUDA or $DEVICE_TYPE_AMDGPU.") end if (device_type != DEVICE_TYPE_NONE) - if (device_type in [DEVICE_TYPE_CUDA, DEVICE_TYPE_AUTO]) cuda_enabled = cuda_loaded() end # NOTE: cuda could be enabled/disabled depending on some additional criteria. - if (device_type in [DEVICE_TYPE_AMDGPU, DEVICE_TYPE_AUTO]) amdgpu_enabled = amdgpu_loaded() end # NOTE: amdgpu could be enabled/disabled depending on some additional criteria. + if (device_type in [DEVICE_TYPE_CUDA, DEVICE_TYPE_AUTO]) cuda_enabled = cuda_loaded() && cuda_functional() end # NOTE: cuda could be enabled/disabled depending on some additional criteria. + if (device_type in [DEVICE_TYPE_AMDGPU, DEVICE_TYPE_AUTO]) amdgpu_enabled = amdgpu_loaded() && amdgpu_functional() end # NOTE: amdgpu could be enabled/disabled depending on some additional criteria. end if (any(nxyz .< 1)) error("Invalid arguments: nx, ny, and nz cannot be less than 1."); end if (any(dims .< 0)) error("Invalid arguments: dimx, dimy, and dimz cannot be negative."); end diff --git a/src/select_device.jl b/src/select_device.jl index fc3d5c0..5df62cf 100644 --- a/src/select_device.jl +++ b/src/select_device.jl @@ -13,22 +13,22 @@ Select the device (GPU) corresponding to the node-local MPI rank and return its See also: [`init_global_grid`](@ref) """ function select_device() + check_initialized() if (cuda_enabled() && amdgpu_enabled()) error("Cannot select a device because both CUDA and AMDGPU are enabled (meaning that both modules were imported before ImplicitGlobalGrid).") end if cuda_enabled() || amdgpu_enabled() - check_initialized(); if cuda_enabled() - @assert CUDA.functional(true) - nb_devices = length(CUDA.devices()) + @assert cuda_functional() + nb_devices = nb_cudevices() elseif amdgpu_enabled() - @assert AMDGPU.functional() - nb_devices = length(AMDGPU.devices()) + @assert amdgpu_functional() + nb_devices = nb_rocdevices() end comm_l = MPI.Comm_split_type(comm(), MPI.COMM_TYPE_SHARED, me()) if (MPI.Comm_size(comm_l) > nb_devices) error("More processes have been launched per node than there are GPUs available."); end me_l = MPI.Comm_rank(comm_l) device_id = amdgpu_enabled() ? me_l+1 : me_l - if cuda_enabled() CUDA.device!(device_id) - elseif amdgpu_enabled() AMDGPU.device_id!(device_id) + if cuda_enabled() cudevice!(device_id) + elseif amdgpu_enabled() rocdevice!(device_id) end return device_id else diff --git a/src/shared.jl b/src/shared.jl index f75a135..1e6a4a9 100644 --- a/src/shared.jl +++ b/src/shared.jl @@ -5,16 +5,20 @@ using Base.Threads ##------------------------------------ ## HANDLING OF CUDA AND AMDGPU SUPPORT -is_loaded(arg) = false #TODO: this would not work as it should be the caller module...: (Base.get_extension(@__MODULE__, ext) !== nothing) - let - global cuda_loaded, amdgpu_loaded, set_cuda_loaded, set_amdgpu_loaded - _cuda_loaded::Bool = false - _amdgpu_loaded::Bool = false - cuda_loaded()::Bool = _cuda_loaded - amdgpu_loaded()::Bool = _amdgpu_loaded - set_cuda_loaded() = (_cuda_loaded = is_loaded(Val(:ImplicitGlobalGrid_CUDAExt))) - set_amdgpu_loaded() = (_amdgpu_loaded = is_loaded(Val(:ImplicitGlobalGrid_AMDGPUExt))) + global cuda_loaded, cuda_functional, amdgpu_loaded, amdgpu_functional, set_cuda_loaded, set_cuda_functional, set_amdgpu_loaded, set_amdgpu_functional + _cuda_loaded::Bool = false + _cuda_functional::Bool = false + _amdgpu_loaded::Bool = false + _amdgpu_functional::Bool = false + cuda_loaded()::Bool = _cuda_loaded + cuda_functional()::Bool = _cuda_functional + amdgpu_loaded()::Bool = _amdgpu_loaded + amdgpu_functional()::Bool = _amdgpu_functional + set_cuda_loaded() = (_cuda_loaded = is_loaded(Val(:ImplicitGlobalGrid_CUDAExt))) + set_cuda_functional() = (_cuda_functional = is_functional(Val(:CUDA))) + set_amdgpu_loaded() = (_amdgpu_loaded = is_loaded(Val(:ImplicitGlobalGrid_AMDGPUExt))) + set_amdgpu_functional() = (_amdgpu_functional = is_functional(Val(:AMDGPU))) end