-
Notifications
You must be signed in to change notification settings - Fork 7
/
cuda.jl
45 lines (34 loc) · 981 Bytes
/
cuda.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
using .CUDAKernels, .CUDAKernels.CUDA
import ModelParameters.Flatten
export CuGPU
"""
CuGPU <: GPU
CuGPU()
CuGPU{threads_per_block}()
```julia
ruleset = Ruleset(rule; proc=CuGPU())
# or
output = sim!(output, rule; proc=CuGPU())
```
"""
struct CuGPU{X} <: GPU end
CuGPU() = CuGPU{32}()
# CUDA setup
kernel_setup(::CuGPU{N}) where N = CUDAKernels.CUDADevice(), (N, N)
# _proc_setup
# Convert all arrays in SimData to CuArrays
@noinline function _proc_setup(::CuGPU, simdata::AbstractSimData)
Adapt.adapt(CuArray, simdata)
end
_copyto_output!(outgrid, grid::GridData, proc::GPU) = copyto!(outgrid, gridview(grid))
# Thread-safe CUDA atomic ops
for (f, op) in atomic_ops
atomic_f = Symbol(:atomic_, f)
@eval begin
function ($f)(d::WritableGridData{<:Any,R}, ::CuGPU, x, I...) where R
A = parent(dest(d))
i = Base._to_linear_index(A, (I .+ R)...)
(CUDA.$atomic_f)(pointer(A, i), x)
end
end
end