# Low-level Python

# PyCUDA (and PyOpenCL): arbitrary GPU programming

The lowest of low-level programming— programming and running accelerator devices— can be done from the comfort of a notebook.

In [None]:
import pycuda
import pycuda.autoinit
import pycuda.driver
import pycuda.compiler

compiled_cuda = pycuda.compiler.SourceModule("""
__global__ void runs_on_gpu(float* p, float* px, float* py, float* pz) {
    const int i = blockDim.x * blockIdx.x + threadIdx.x;
    if (i < 1000000) {
        p[i] = sqrt(px[i]*px[i] + py[i]*py[i] + pz[i]*pz[i]);
    }
}""")
runs_on_gpu = compiled_cuda.get_function("runs_on_gpu")

In [None]:
import numpy

px = pycuda.driver.In(numpy.random.normal(0, 10, 1000000).astype(numpy.float32))
py = pycuda.driver.In(numpy.random.normal(0, 10, 1000000).astype(dtype=numpy.float32))
pz = pycuda.driver.In(numpy.random.normal(0, 100, 1000000).astype(dtype=numpy.float32))
p = numpy.zeros(1000000, dtype=numpy.float32)

runs_on_gpu(pycuda.driver.Out(p), px, py, pz, block=(1024, 1, 1), grid=(1000000 // 1024 + 1, 1))

In [None]:
p

Working with Numpy arrays is already GPU-like.

In [None]:
import pycuda.gpuarray

px = pycuda.gpuarray.to_gpu(numpy.random.normal(0, 10, 1000000))
py = pycuda.gpuarray.to_gpu(numpy.random.normal(0, 10, 1000000))
pz = pycuda.gpuarray.to_gpu(numpy.random.normal(0, 100, 1000000))

type(px)

In [None]:
p = (px**2 + py**2 + pz**2)**0.5
type(p)

In [None]:
p.get()

Isn't that what CuPy does? Yes, yes it is.

Once, there was also a "gnumpy" (U Toronto CS dept). Like Numeric and numarray, this is an active area of development.

# Numba-CUDA: compile *Python* code on the GPU

Numba can compile a subset of Python for CPUs with LLVM or GPUs with CUDA.

In [None]:
import math
import numba.cuda

@numba.cuda.jit
def runs_on_gpu(p, px, py, pz):
    i = numba.cuda.grid(1)
    p[i] = math.sqrt(px[i]**2 + py[i]**2 + pz[i]**2)

In [None]:
px = numpy.random.normal(0, 10, 1000000)
py = numpy.random.normal(0, 10, 1000000)
pz = numpy.random.normal(0, 100, 1000000)
p = numpy.zeros(1000000)

runs_on_gpu(p, px, py, pz)

In [None]:
p

# 