# PyCUDA Environment Test

This notebook is designed to verify that your PyCUDA installation and GPU environment are working correctly before starting the GPU programming course. If all cells execute and the final message is printed, your setup is ready for the course.

In [None]:
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np

print(f"CUDA driver version: {cuda.get_version()}")
print(f"Number of CUDA devices: {cuda.Device.count()}")
for i in range(cuda.Device.count()):
    dev = cuda.Device(i)
    print(f"Device {i}: {dev.name()} (Compute Capability: {dev.compute_capability()})")

In [None]:
N = 2**25
h_a = np.full(N, 1, dtype=np.int32)
h_b = np.full(N, 2, dtype=np.int32)
h_c = np.zeros_like(h_a)

print(f"Working with {len(h_a):,} elements with {h_a.nbytes:,} bytes.")

mod = SourceModule("""
__global__ void add_vectors(int *a, int *b, int *c, int N) {
    int idx = threadIdx.x + blockDim.x * blockIdx.x;
    if (idx < N) {
        c[idx] = 2 * a[idx] + b[idx];
    }
}
""")
add_vectors = mod.get_function("add_vectors")

In [None]:
import pycuda.gpuarray as gpuarray
import time

d_a = gpuarray.to_gpu(h_a)
d_b = gpuarray.to_gpu(h_b)
d_c = gpuarray.empty_like(d_a)

block_size = 256
grid_size = (N + block_size - 1) // block_size

start = cuda.Event()
end = cuda.Event()

start.record()
add_vectors(d_a, d_b, d_c, np.int32(N), block=(block_size,1,1), grid=(grid_size,1))
end.record()
end.synchronize()
elapsed_ms = start.time_till(end)

h_c = d_c.get()
print(f"Kernel execution time: {elapsed_ms:.4f} ms")

In [None]:
def compute_linear_equations_cpu(a, b):
    return 2 * a + b

numpy_res = compute_linear_equations_cpu(h_a, h_b)
np.testing.assert_array_equal(numpy_res, h_c)

print("If this message got printed in the output cell then everything worked correctly for PyCUDA.")