In [None]:
!pip3 install pycuda

Collecting pycuda
[?25l  Downloading https://files.pythonhosted.org/packages/5e/3f/5658c38579b41866ba21ee1b5020b8225cec86fe717e4b1c5c972de0a33c/pycuda-2019.1.2.tar.gz (1.6MB)
[K     |████████████████████████████████| 1.6MB 2.5MB/s 
[?25hCollecting pytools>=2011.2
[?25l  Downloading https://files.pythonhosted.org/packages/73/d5/989a1d2bba90f5c085e4929a4b703bbd8cc6b4a4218f1671fadab2abe966/pytools-2020.4.tar.gz (67kB)
[K     |████████████████████████████████| 71kB 8.6MB/s 
Collecting appdirs>=1.4.0
  Downloading https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl
Collecting mako
[?25l  Downloading https://files.pythonhosted.org/packages/a6/37/0e706200d22172eb8fa17d68a7ae22dec7631a0a92266634fb518a88a5b2/Mako-1.1.3-py2.py3-none-any.whl (75kB)
[K     |████████████████████████████████| 81kB 8.7MB/s 
Building wheels for collected packages: pycuda, pytools
  Building wheel for pycuda (setup.py) ... 

In [None]:
import os

In [None]:
os.mkdir("cuda")

In [None]:
import math
import numpy as np
import time
import pycuda.gpuarray as gpuarray
import pycuda.driver as drv
from pycuda.compiler import SourceModule

In [None]:
import pycuda.autoinit

In [None]:
cuda_file_path = os.path.abspath("./cuda")

In [None]:
module = SourceModule("""
#include "kernel_functions_for_math_1d.cu"
""", include_dirs=[cuda_file_path])

In [None]:
plus_one_kernel = module.get_function("plus_one_kernel")

In [None]:
num_components = np.int32(1e6)
x = np.arange(num_components, dtype=np.int32)

In [None]:
x_gpu = gpuarray.to_gpu(x)
y_gpu = gpuarray.zeros(num_components, dtype=np.int32)

In [None]:
threads_per_block = (256, 1, 1)
blocks_per_grid = (math.ceil(num_components / threads_per_block[0]), 1, 1)

In [None]:
plus_one_kernel(num_components, y_gpu, x_gpu, block=threads_per_block, grid=blocks_per_grid)

In [None]:
y_gpu.get()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=int32)

In [None]:
time_start_cpu = time.time()
x = x + 1
time_end_cpu = time.time()

In [None]:
print("CPU calculation {0} [msec]".format(1000 * (time_end_cpu - time_start_cpu)))

CPU calculation 3.698587417602539 [msec]


In [None]:
time_start_gpu = drv.Event()
time_end_gpu = drv.Event()

In [None]:
time_start_gpu.record()
plus_one_kernel(num_components, y_gpu, x_gpu, block=threads_per_block, grid=blocks_per_grid)
time_end_gpu.record()
time_end_gpu.synchronize()

<pycuda._driver.Event at 0x7f35f7736030>

In [None]:
print("GPU calulation {0} [msec]".format(time_start_gpu.time_till(time_end_gpu)))

GPU calulation 0.2502079904079437 [msec]


In [None]:
time_start_gpu.record()
y_gpu.get()
time_end_gpu.record()
time_end_gpu.synchronize()

<pycuda._driver.Event at 0x7f35f7736030>

In [None]:
print("GPU calulation {0} [msec]".format(time_start_gpu.time_till(time_end_gpu)))

GPU calulation 2.6345279216766357 [msec]
