In [3]:
!pip install pycuda

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pycuda
  Downloading pycuda-2022.2.2.tar.gz (1.7 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m52.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2
  Downloading pytools-2022.1.14.tar.gz (74 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.6/74.6 KB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mako
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 KB[0m [31m10.9 MB/s[0m eta [36

In [4]:
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import matplotlib.pyplot as plt

# CUDA kernel code
kernel = """
__global__ void mandelbrot(float *image, int width, int height, float xmin, float xmax, float ymin, float ymax)
{
    int gid_x = blockIdx.x * blockDim.x + threadIdx.x;
    int gid_y = blockIdx.y * blockDim.y + threadIdx.y;
    int offset = gid_x + gid_y * width;
    float x = xmin + (xmax - xmin) * gid_x / width;
    float y = ymin + (ymax - ymin) * gid_y / height;
    float real = x;
    float imag = y;
    float r2, i2, temp;
    int value = 0;
    for (int i = 0; i < 256; i++) {
        r2 = real * real;
        i2 = imag * imag;
        temp = 2 * real * imag + y;
        imag = i2 - r2 + x;
        real = temp;
        if (r2 + i2 > 4.0f) {
            value = i;
            break;
        }
    }
    image[offset] = (float)value / 256.0f;
}
"""

# Compile the kernel
mod = SourceModule(kernel)
mandelbrot = mod.get_function("mandelbrot")

# Set grid and block dimensions
block = (16, 16, 1)
grid = (32, 32)

# Create input and output arrays
width = 1024
height = 1024
image = np.zeros((width * height,), dtype=np.float32)

# Allocate memory on the GPU
image_gpu = cuda.mem_alloc(image.nbytes)

# Copy input data to the GPU
cuda.memcpy_htod(image_gpu, image)

# Execute the kernel
mandelbrot(image_gpu, np.int32(width), np.int32(height), np.float32(-2.0), np.float32(1.0), np.float32(-1.5), np.float32(1.5), block=block, grid=grid)

# Copy the result back to the host
cuda.memcpy_dtoh(image, image_gpu)

# Save the output to file
plt.savefig('fractal.jpg', format='jpg')

<Figure size 432x288 with 0 Axes>