File tree Expand file tree Collapse file tree 2 files changed +48
-0
lines changed
Expand file tree Collapse file tree 2 files changed +48
-0
lines changed Original file line number Diff line number Diff line change 1+ from numbapro import cuda , void , float32
2+ import numpy as np
3+
4+ @cuda .jit ('void(float32[:], float32[:], float32[:])' )
5+ def cu_add (a , b , c ):
6+ # i = cuda.grid(1)
7+ tx = cuda .threadIdx .x
8+ bx = cuda .blockIdx .x
9+ bw = cuda .blockDim .x
10+ i = tx + bx * bw
11+
12+ if i > c .size :
13+ return
14+ c [i ] = a [i ] + b [i ]
15+
16+ if __name__ == '__main__' :
17+ gpu = cuda .get_current_device ()
18+
19+ n = 100
20+ a = np .arange (n , dtype = np .float32 )
21+ b = np .arange (n , dtype = np .float32 )
22+ c = np .empty_like (a )
23+
24+ nthreads = gpu .WARP_SIZE
25+ nblocks = int (np .ceil (float (n )/ nthreads ))
26+ print 'Blocks per grid:' , nblocks
27+ print 'Threads per block' , nthreads
28+
29+ cu_add [nblocks , nthreads ](a , b , c )
30+ print c
Original file line number Diff line number Diff line change 1+ from numbapro import cuda , vectorize , guvectorize
2+ from numbapro import void , int64 , float32 , float64
3+ import numpy as np
4+
5+ @vectorize (['int64(int64, int64)' ,
6+ 'float64(float32, float32)' ,
7+ 'float64(float64, float64)' ],
8+ target = 'gpu' )
9+ def cu_add (a , b ):
10+ return a + b
11+
12+ if __name__ == '__main__' :
13+ n = 100
14+ A = np .arange (n )
15+ B = np .arange (n )
16+ C = cu_add (A , B )
17+ print C
18+
You can’t perform that action at this time.
0 commit comments