Skip to content

Commit d665050

Browse files
committed
low and high-level versions of add in CUDA
1 parent b479bda commit d665050

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

cuda/cuda_add.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from numbapro import cuda, void, float32
2+
import numpy as np
3+
4+
@cuda.jit('void(float32[:], float32[:], float32[:])')
5+
def cu_add(a, b, c):
6+
# i = cuda.grid(1)
7+
tx = cuda.threadIdx.x
8+
bx = cuda.blockIdx.x
9+
bw = cuda.blockDim.x
10+
i = tx + bx * bw
11+
12+
if i > c.size:
13+
return
14+
c[i] = a[i] + b[i]
15+
16+
if __name__ == '__main__':
17+
gpu = cuda.get_current_device()
18+
19+
n = 100
20+
a = np.arange(n, dtype=np.float32)
21+
b = np.arange(n, dtype=np.float32)
22+
c = np.empty_like(a)
23+
24+
nthreads = gpu.WARP_SIZE
25+
nblocks = int(np.ceil(float(n)/nthreads))
26+
print 'Blocks per grid:', nblocks
27+
print 'Threads per block', nthreads
28+
29+
cu_add[nblocks, nthreads](a, b, c)
30+
print c

cuda/vect_add.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from numbapro import cuda, vectorize, guvectorize
2+
from numbapro import void, int64, float32, float64
3+
import numpy as np
4+
5+
@vectorize(['int64(int64, int64)',
6+
'float64(float32, float32)',
7+
'float64(float64, float64)'],
8+
target='gpu')
9+
def cu_add(a, b):
10+
return a + b
11+
12+
if __name__ == '__main__':
13+
n = 100
14+
A = np.arange(n)
15+
B = np.arange(n)
16+
C = cu_add(A, B)
17+
print C
18+

0 commit comments

Comments
 (0)