-
Notifications
You must be signed in to change notification settings - Fork 298
Closed
Description
I run the code which can be found in How can I accelerate a sparse matrix by dense vector product, currently implemented via scipy.sparse.csc_matrix.dot, using CUDA?
import numpy as np
import pycuda.autoinit
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
from pycuda.sparse.packeted import PacketedSpMV
from pycuda.tools import DeviceMemoryPool
from scipy.sparse import csr_matrix
from time import time
def spmv_cuda(a_sparse, b, count):
dtype = a_sparse.dtype
m = a_sparse.shape[0]
print('moving objects to GPU...')
spmv = PacketedSpMV(a_sparse, is_symmetric=False, dtype=dtype)
dev_pool = DeviceMemoryPool()
d_b = gpuarray.to_gpu(b, dev_pool.allocate)
d_c = gpuarray.zeros(m, dtype=dtype, allocator=d_b.allocator)
print('executing spmv operation...\n')
tic = time()
for ii in range(count):
d_c.fill(0)
d_c = spmv(d_b, d_c)
toc = time()
return d_c.get(), toc - tic
# run benchmark
COUNT = 100
N = 5000
P = 0.1
DTYPE = np.float32
print('Constructing objects...\n\n')
np.random.seed(0)
a_dense = np.random.rand(N, N).astype(DTYPE)
a_dense[np.random.rand(N, N) >= P] = 0
a_sparse = csr_matrix(a_dense)
b = np.random.rand(N, 1).astype(DTYPE)
# numpy dense
tic = time()
for ii in range(COUNT):
c = np.dot(a_dense, b)
toc = time()
print('numpy dense matrix multiplication took {} seconds\n'.format(toc - tic))
print('c = {}\n'.format(c[:5, 0]))
# scipy sparse
tic = time()
for ii in range(COUNT):
c = a_sparse.dot(b)
toc = time()
print('scipy sparse matrix multiplication took {} seconds\n'.format(toc - tic))
print('c = {}\n'.format(c[:5, 0]))
# pycuda sparse
c, t = spmv_cuda(a_sparse, b, COUNT)
print('pycuda sparse matrix multiplication took {} seconds\n'.format(t))
print('c = {}\n'.format(c[:5]))
When N is small, like 5,000, i could get the correct result, but when a large N(eg. 20,000) is set, the above code will raise the following ERROR:
Traceback (most recent call last):
File "/home/lihk11/.virtualenv/pycuda/lib/python3.7/site-packages/pytools-2019.1-py3.7.egg/pytools/__init__.py", line 571, in wrapper
return getattr(obj, cache_dict_name)[key]
AttributeError: 'CoordinateSpMV' object has no attribute '_memoize_dic_get_flat_kernel'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test_cuda.py", line 68, in <module>
c, t = spmv_cuda(a_sparse, b, COUNT)
File "test_cuda.py", line 29, in spmv_cuda
d_c = spmv(d_b, d_c)
File "/home/lihk11/.virtualenv/pycuda/lib/python3.7/site-packages/pycuda-2018.1.1-py3.7-linux-x86_64.egg/pycuda/sparse/packeted.py", line 336, in __call__
self.remaining_coo_gpu(x, y)
File "/home/lihk11/.virtualenv/pycuda/lib/python3.7/site-packages/pycuda-2018.1.1-py3.7-linux-x86_64.egg/pycuda/sparse/coordinate.py", line 223, in __call__
flat_func, x_texref = self.get_flat_kernel()
File "/home/lihk11/.virtualenv/pycuda/lib/python3.7/site-packages/pytools-2019.1-py3.7.egg/pytools/__init__.py", line 573, in wrapper
result = function(obj, *args, **kwargs)
File "/home/lihk11/.virtualenv/pycuda/lib/python3.7/site-packages/pycuda-2018.1.1-py3.7-linux-x86_64.egg/pycuda/sparse/coordinate.py", line 194, in get_flat_kernel
"warp_size": drv.Context.get_device().warp_size,
File "/home/lihk11/.virtualenv/pycuda/lib/python3.7/site-packages/pycuda-2018.1.1-py3.7-linux-x86_64.egg/pycuda/compiler.py", line 291, in __init__
arch, code, cache_dir, include_dirs)
File "/home/lihk11/.virtualenv/pycuda/lib/python3.7/site-packages/pycuda-2018.1.1-py3.7-linux-x86_64.egg/pycuda/compiler.py", line 254, in compile
return compile_plain(source, options, keep, nvcc, cache_dir, target)
File "/home/lihk11/.virtualenv/pycuda/lib/python3.7/site-packages/pycuda-2018.1.1-py3.7-linux-x86_64.egg/pycuda/compiler.py", line 137, in compile_plain
stderr=stderr.decode("utf-8", "replace"))
pycuda.driver.CompileError: nvcc compilation of /tmp/tmppjocq3v5/kernel.cu failed
[command: nvcc --cubin -arch sm_61 -I/home/lihk11/.virtualenv/pycuda/lib/python3.7/site-packages/pycuda-2018.1.1-py3.7-linux-x86_64.egg/pycuda/cuda kernel.cu]
[stderr:
kernel.cu(13): error: function "atomicAdd(float *, float)" has already been defined
kernel.cu(28): error: function "atomicAdd(double *, double)" has already been defined
2 errors detected in the compilation of "/tmp/tmpxft_00004b93_00000000-6_kernel.cpp1.ii".
]
terminate called after throwing an instance of 'pycuda::error'
what(): explicit_context_dependent failed: invalid device context - no currently active context?
[1] 19294 abort (core dumped) python test_cuda.py
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels