# 利用cuBLAS库处理第1级AXPY运算
ax+y的运算

In [2]:
import pycuda.autoinit
from pycuda import gpuarray
import numpy as np
from skcuda import cublas
from time import time

a = np.float32(10)
x = np.float32([1, 2, 3])
y = np.float32([-.345, 8.15, -15.867])
x_gpu = gpuarray.to_gpu(x)
y_gpu = gpuarray.to_gpu(y)
# 创建上下文
cublas_context_h = cublas.cublasCreate()

cublas.cublasSaxpy(cublas_context_h, x_gpu.size, a, x_gpu.gpudata, 1, y_gpu.gpudata, 1)

cublas.cublasDestroy(cublas_context_h)

print('This is close to the Numpy approximation: %s' % np.allclose(a*x + y, y_gpu.get()))

This is close to the Numpy approximation: True


计算两个单精度浮点整数的点积，也就是逐元素相乘之后求和

In [8]:
u = np.float32([1, 2, 3])
w = np.float32([1, 2, 3])
u_gpu = gpuarray.to_gpu(u)
w_gpu = gpuarray.to_gpu(w)

cublas_context_h = cublas.cublasCreate()
dot_output = cublas.cublasSdot(cublas_context_h, u_gpu.size, u_gpu.gpudata, 1, w_gpu.gpudata, 1)
cublas.cublasDestroy(cublas_context_h)

print(dot_output)

14.0


我们也可以像下面这样计算向量的L2范数

In [10]:
u = np.float32([1, 2, 3])
w = np.float32([1, 2, 3])
u_gpu = gpuarray.to_gpu(u)
w_gpu = gpuarray.to_gpu(w)

cublas_context_h = cublas.cublasCreate()
l2_output = cublas.cublasSnrm2(cublas_context_h, u_gpu.size, u_gpu.gpudata, 1)
cublas.cublasDestroy(cublas_context_h)

print(l2_output)

3.7416575
