In [2]:
import numpy as np
from numbapro import cuda, vectorize
import numbapro.cudalib.cufft as cufft 

In [3]:
%precision 4

u'%.4f'

从 [使用 CUDA 加速 R 应用](http://devblogs.nvidia.com/parallelforall/accelerate-r-applications-cuda/) 复制示例
----

### FFT 示例

In [4]:
num = 4
v = np.random.normal(0, 1, (num, 2))
z = v[:,0] + 1j*v[:,1]
print "{:<20}".format('Original'), z

x_gpu = np.zeros(num, dtype='complex')
cufft.fft(z, x_gpu)
print "{:<20}".format('CUDA FFT'), x_gpu

x_cpu = np.fft.fft(z)
print "{:<20}".format('CPU  FFT'), x_cpu

# NVidia IFFT returns unnormalzied results
cufft.ifft(x_gpu, z) 
print "{:<20}".format('CUDA IFFT'), z/num

x_cpu = np.fft.ifft(x_cpu)
print "{:<20}".format('CPU  IFFT'), x_cpu

Original             [-0.2414+1.2555j -0.0947-2.2246j  0.5452-1.7055j  0.9129+0.8522j]
CUDA FFT             [ 1.1220-1.8225j -3.8633+3.9687j -0.5144+0.9224j  2.2902+1.9534j]
CPU  FFT             [ 1.1220-1.8225j -3.8633+3.9687j -0.5144+0.9224j  2.2902+1.9534j]
CUDA IFFT            [-0.2414+1.2555j -0.0947-2.2246j  0.5452-1.7055j  0.9129+0.8522j]
CPU  IFFT            [-0.2414+1.2555j -0.0947-2.2246j  0.5452-1.7055j  0.9129+0.8522j]


### 向量加法示例

#### 使用 CUDA Python 显式编译到 PTX

In [19]:
@cuda.jit('void(float64[:,], float64[:], float64[:], int64)')
def gvectorAdd(A, B, C, n):
    i = cuda.grid(1)
    if i < n:
        C[i] = A[i] + B[i]

In [16]:
n = 50
A = np.arange(n, dtype=np.float64)
B = np.arange(n, dtype=np.float64)
C = np.empty_like(A)

grid_dim = cuda.get_current_device().WARP_SIZE
block_dim = (n + grid_dim - 1)/grid_dim

print grid_dim
print block_dim

gvectorAdd[grid_dim, block_dim](A, B, C, n)
C

32
2


array([  0.,   2.,   4.,   6.,   8.,  10.,  12.,  14.,  16.,  18.,  20.,
        22.,  24.,  26.,  28.,  30.,  32.,  34.,  36.,  38.,  40.,  42.,
        44.,  46.,  48.,  50.,  52.,  54.,  56.,  58.,  60.,  62.,  64.,
        66.,  68.,  70.,  72.,  74.,  76.,  78.,  80.,  82.,  84.,  86.,
        88.,  90.,  92.,  94.,  96.,  98.])

#### 使用向量化工具

In [17]:
@vectorize('float64(float64, float64)', target='gpu')
def gvectorAdd2(A, B):
    return A + B

In [18]:
gvectorAdd2(A, B)

array([  0.,   2.,   4.,   6.,   8.,  10.,  12.,  14.,  16.,  18.,  20.,
        22.,  24.,  26.,  28.,  30.,  32.,  34.,  36.,  38.,  40.,  42.,
        44.,  46.,  48.,  50.,  52.,  54.,  56.,  58.,  60.,  62.,  64.,
        66.,  68.,  70.,  72.,  74.,  76.,  78.,  80.,  82.,  84.,  86.,
        88.,  90.,  92.,  94.,  96.,  98.])