Numba provides access to some of the atomic operations supported in CUDA, in the numba.cuda.atomic class.

- ary[idx] += val
- atomic.compare_and_swap(ary, old, val)
- atomic.max(ary, idx, val)
- atomic.min(ary, idx, val)

In [7]:
from numba import cuda
import math
import numpy as np

# Example: [numba.cuda.atomic.add](https://numba.pydata.org/numba-doc/dev/cuda-reference/kernel.html?highlight=cuda.atomic.add#numba.cuda.atomic.add)

In [14]:
@cuda.jit
def add_example(values):
    """Find the maximum value in values and store in result[0]"""
    tx = cuda.threadIdx.x
    ty = cuda.blockIdx.x
    bw = cuda.blockDim.x
    i = tx + ty * bw
    # =============================
    cuda.atomic.add(values, i, 1)

In [11]:
arr = np.arange(10)

threads_per_block = 4
blocks_per_grid = math.ceil(arr.shape[0] / threads_per_block)

print("threads per block", threads_per_block)
print("blocks  per grid ", blocks_per_grid)

threads per block 4
blocks  per grid  3


In [15]:
print(arr, "\n")
add_example[blocks_per_grid, threads_per_block](arr)
print(arr)

[0 1 2 3 4 5 6 7 8 9] 

[ 1  2  3  4  5  6  7  8  9 10]


# Example: [numba.cuda.atomic.max](https://numba.pydata.org/numba-doc/dev/cuda-reference/kernel.html#numba.cuda.atomic.max)

In [2]:
@cuda.jit
def max_example(result, values):
    """Find the maximum value in values and store in result[0]"""
    tid = cuda.threadIdx.x
    bid = cuda.blockIdx.x
    bdim = cuda.blockDim.x
    i = (bid * bdim) + tid
    cuda.atomic.max(result, 0, values[i])

In [3]:
arr = np.random.rand(16384)
result = np.zeros(1, dtype=np.float64)

In [4]:
max_example[256,64](result, arr)
print(result[0]) # Found using cuda.atomic.max
print(max(arr))  # Print max(arr) for comparision (should be equal!)

0.9999673589346986
0.9999673589346986
