<a href="https://colab.research.google.com/github/chiyanglin-AStar/2025_physics_note/blob/main/02_Pycuda_ex0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook is reference from [Linking Python to CUDA with PyCUDA: A Beginner’s Guide](https://medium.com/@mahmoudalyosify/linking-python-to-cuda-with-pycuda-a-beginners-guide-d128da0ed460)

## PyCUDA ref:

[PyCUDA Tutorial(翻譯)](https://hackmd.io/@shaoeChen/SkbmZOXbB/https%3A%2F%2Fhackmd.io%2F%40shaoeChen%2FSkKb0fX-H)

[pycuda tutorial](https://documen.tician.de/pycuda/tutorial.html)

[PyCUDA Tutorial Introduction](https://github.com/berlinguyinca/pycuda/blob/master/doc/source/tutorial.rst)

[GPU程式設計(5) -- Python](https://ithelp.ithome.com.tw/articles/10283144)

In [1]:
!pip install pycuda

Collecting pycuda
  Downloading pycuda-2024.1.2.tar.gz (1.7 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━[0m [32m1.0/1.7 MB[0m [31m28.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2024.1.21-py3-none-any.whl.metadata (2.9 kB)
Collecting mako (from pycuda)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading pytools-2024.1.21-py3-none-any.whl (92 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.4/92.4 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading M

In [None]:
import pycuda.autoinit
import pycuda.driver as cuda

In [None]:
cuda.init()

In [None]:
device = cuda.Device(0)
context = device.make_context()

In [None]:
import numpy as np
from pycuda import driver, compiler, gpuarray

# Initialize PyCUDA
driver.init()

# Create a CUDA context
device = driver.Device(0)
context = device.make_context()

# Define the CUDA kernel
kernel_code = """
__global__ void add_arrays(float *a, float *b, float *c) {
    int i = threadIdx.x;
    c[i] = a[i] + b[i];
}
"""

# Compile the CUDA kernel
module = compiler.SourceModule(kernel_code)

# Allocate memory on the GPU
a_gpu = gpuarray.to_gpu(np.random.randn(100).astype(np.float32))
b_gpu = gpuarray.to_gpu(np.random.randn(100).astype(np.float32))
c_gpu = gpuarray.empty_like(a_gpu)

# Launch the CUDA kernel
add_arrays = module.get_function("add_arrays")
add_arrays(a_gpu, b_gpu, c_gpu, block=(100,1,1))

# Copy the result back to the CPU
c_cpu = c_gpu.get()

# Clean up
context.pop()

## GPU hello Example:
from [pycuda example](https://github.com/inducer/pycuda/blob/main/examples/hello_gpu.py)


In [2]:
import pycuda.driver as drv
import pycuda.tools
import pycuda.autoinit
import numpy
import numpy.linalg as la
from pycuda.compiler import SourceModule

In [4]:
mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
  const int i = threadIdx.x;
  dest[i] = a[i] * b[i];
}
""")

multiply_them = mod.get_function("multiply_them")

a = numpy.random.randn(400).astype(numpy.float32)
b = numpy.random.randn(400).astype(numpy.float32)

dest = numpy.zeros_like(a)
multiply_them(
        drv.Out(dest), drv.In(a), drv.In(b),
        block=(400,1,1))

print(a)
#print(dest-a*b)

[-6.00312710e-01  1.09082997e+00 -9.61279511e-01  1.99801815e+00
  1.87168762e-01 -1.59787643e+00 -9.38770175e-01  5.30920684e-01
 -7.77707770e-02  2.08425581e-01 -4.17834461e-01 -6.47751331e-01
 -1.22867239e+00 -2.46032029e-01  4.85852957e-01 -5.05015373e-01
 -1.38649687e-01 -3.74396116e-01 -3.27335000e-02 -6.45646632e-01
 -3.40562202e-02 -1.66247821e+00 -4.00329679e-02 -4.69341457e-01
  6.75082505e-01  2.11158895e+00  1.31349897e+00  2.51191115e+00
 -1.58552969e+00 -1.26076207e-01  4.60679889e-01 -7.52355933e-01
  1.08525026e+00  3.84548247e-01 -4.64157254e-01 -2.32166588e-01
 -4.17279929e-01  8.80857766e-01  4.21863377e-01  1.14033341e+00
  7.99457133e-01 -1.09655213e+00  9.80055153e-01  7.16505885e-01
  1.00676727e+00 -1.34893492e-01 -1.50890112e+00  9.36189771e-01
 -8.51261139e-01  8.16078484e-01  4.14352000e-01  1.02352965e+00
 -1.29581606e+00  7.14482605e-01  4.67086852e-01 -3.43307525e-01
 -3.01929712e-02 -1.38214099e+00 -9.14232954e-02  1.80774188e+00
  1.10560811e+00  1.40999

  globals().clear()


In [5]:
print(b)

[ 5.06887324e-02  1.14537150e-01  1.53834403e+00  1.54906189e+00
  1.59669197e+00  4.22016084e-01 -1.17549181e+00  5.26516736e-01
 -3.50963980e-01  3.59771580e-01  8.80639628e-02 -9.42193687e-01
 -1.19373310e+00 -8.93168032e-01  7.19354972e-02  5.94703615e-01
 -2.81113327e-01 -1.00331254e-01  3.60397100e-01 -3.81318629e-02
  1.21545327e+00  4.11112040e-01 -1.57357585e+00  2.53348440e-01
  7.41853774e-01 -1.70689747e-01  1.45721543e+00  4.49277222e-01
  1.30490506e+00 -1.78812504e+00  3.04293990e-01 -4.86791253e-01
 -9.03780106e-03  1.84671611e-01  8.97028804e-01  1.19769104e-01
  1.36696982e+00  1.23333538e+00  7.37150788e-01 -3.12371373e-01
  1.03842330e+00  1.48633289e+00  1.10025287e+00 -7.69540489e-01
 -9.33151171e-02 -1.05599463e+00  2.73652583e-01  1.51185119e+00
  2.80223280e-01 -7.29162514e-01  3.84301215e-01 -1.09107411e+00
 -8.79253149e-01 -2.89905161e-01 -7.67439604e-01  2.65317678e-01
  5.18317111e-02  1.49157822e-01  6.24176860e-01 -1.59957373e+00
  8.90029490e-01 -1.98903

In [6]:
print(dest)

[-3.04290894e-02  1.24940552e-01 -1.47877860e+00  3.09505367e+00
  2.98850864e-01 -6.74329579e-01  1.10351670e+00  2.79538631e-01
  2.72947419e-02  7.49856010e-02 -3.67961600e-02  6.10307217e-01
  1.46670687e+00  2.19747946e-01  3.49500738e-02 -3.00334454e-01
  3.89762744e-02  3.75636332e-02 -1.17970584e-02  2.46197097e-02
 -4.13937457e-02 -6.83464825e-01  6.29949123e-02 -1.18906923e-01
  5.00812531e-01 -3.60426575e-01  1.91405094e+00  1.12854445e+00
 -2.06896567e+00  2.25440025e-01  1.40182123e-01  3.66240293e-01
 -9.80827585e-03  7.10151419e-02 -4.16362435e-01 -2.78063845e-02
 -5.70409060e-01  1.08639300e+00  3.10976923e-01 -3.56207520e-01
  8.30174923e-01 -1.62984145e+00  1.07830846e+00 -5.51380277e-01
 -9.39466059e-02  1.42446801e-01 -4.12914693e-01  1.41537964e+00
 -2.38543183e-01 -5.95053852e-01  1.59235984e-01 -1.11674666e+00
  1.13935041e+00 -2.07132190e-01 -3.58460963e-01 -9.10855532e-02
 -1.56495336e-03 -2.06157148e-01 -5.70643060e-02 -2.89161634e+00
  9.84023809e-01 -2.80453

In [7]:
print(dest-a*b)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.