In [1]:
import math
import numpy as np
import numpy.random as rand
import scipy.stats as stat
from scipy.interpolate import Rbf
# ------------------------------
from numba import cuda, vectorize
# ------------------------------
import matplotlib.pyplot as plt

# try custom dtype

### define type of point

In [2]:
point_dtype = np.dtype({
    'names':   ['x', 'y'],
    'formats': [np.float32, np.float32]})

### a cuda kernel function that increment x

In [23]:
@cuda.jit
def increment_x(an_array):
    pos = cuda.grid(1)
    if pos < an_array.size:
        an_array[pos]["x"] += 1

### Specify grid and block size

In [6]:
n = 10000
threadsperblock = 16
blockspergrid   = math.ceil(n / threadsperblock)

print("threads per block:", threadsperblock)
print("blocks  per grid: ", blockspergrid)
print("Total:            ", blockspergrid * threadsperblock)

threads per block: 16
blocks  per grid:  625
Total:             10000


### prepare data

In [28]:
arr_pts = np.array([(i, j) for i, j in zip(np.random.random(n), np.random.random(n))], dtype = point_dtype)
print("num of points", arr_pts.shape)
print("============================")
print("Before...")
print("The first point:", arr_pts[0])
print("The last  point:", arr_pts[n-1])

increment_x[blockspergrid, threadsperblock](arr_pts)
print("After...")
print("The first point:", arr_pts[0])
print("The last  point:", arr_pts[n-1])

num of points (10000,)
Before...
The first point: (0.14415103, 0.8756483)
The last  point: (0.17451333, 0.43440962)
After...
The first point: (1.144151, 0.8756483)
The last  point: (1.1745133, 0.43440962)


# try create numpy array inside the kernel function

In [42]:
@cuda.jit
def test_arr_in_kernel(an_array, tmp):
    pos = cuda.grid(1)
    
    # do nothing actually
    for idx in range(an_array.shape[0]):
        tmp[idx] = idx
        
    if pos < an_array.size:
        an_array[pos]["x"] += tmp[10]

In [43]:
n = 10000
threadsperblock = 16
blockspergrid   = math.ceil(n / threadsperblock)

print("threads per block:", threadsperblock)
print("blocks  per grid: ", blockspergrid)
print("Total:            ", blockspergrid * threadsperblock)

threads per block: 16
blocks  per grid:  625
Total:             10000


In [44]:
tmp     = np.empty(n)
arr_pts = np.array([(i, j) for i, j in zip(np.random.random(n), np.random.random(n))], dtype = point_dtype)
print("num of points", arr_pts.shape)
print("============================")
print("Before...")
print("The first point:", arr_pts[0])
print("The last  point:", arr_pts[n-1])

test_arr_in_kernel[blockspergrid, threadsperblock](arr_pts, tmp)
print("After...")
print("The first point:", arr_pts[0])
print("The last  point:", arr_pts[n-1])

num of points (10000,)
Before...
The first point: (0.27851704, 0.55893713)
The last  point: (0.46436018, 0.39885944)
After...
The first point: (10.278517, 0.55893713)
The last  point: (10.46436, 0.39885944)
