In [None]:
height = 2_000
width = 3_000

In [None]:
import cupy as cp
import numpy as np
import math
import matplotlib.pyplot as plt

In [None]:
def prepare(height, width, xp=np):
    x,y = xp.ogrid[-1.5j:1.5j:height*1j, -2:2:width*1j]
    c = x + y
    fractal = xp.zeros(c.shape, dtype=xp.int32)
    return c, fractal

In [None]:
def fractal_x(c, f, maxiterations):
    xp = cp.get_array_module(c)
    f *= 0 # set to 0
    z = c.copy()

    for i in range(1, maxiterations+1):
        z = z**2 + c                    # Compute z
        diverge = xp.abs(z**2)  > 2**2  # Divergence criteria

        z[diverge] = 2               # Keep number size small
        f[~diverge] = i              # Fill in non-diverged iteration number
        
    return f

Let's try a Numpy run:

In [None]:
c, fractal = prepare(height, width, np)

In [None]:
%%timeit
_ = fractal_x(c, fractal, 20)
cp.cuda.Stream.null.synchronize()

Now, let's try a CuPy run:

In [None]:
c, fractal = prepare(height, width, cp)

In [None]:
%%timeit
_ = fractal_x(c, fractal, 20)
cp.cuda.Stream.null.synchronize()

Now, let's try a custom elementwise kernel.

In [None]:
cupy_single = cp.ElementwiseKernel(
    "complex128 cpx, int32 maxiterations",
    "int32 res",
    """
    res = 0;
    complex<double> z = cpx;

    for (int i=0; i<maxiterations; i++) {
        z = z*z + cpx;

        if(z.real()*z.real() + z.imag()*z.imag() > 4)
            break;

        res = i;
    }
    
    """,                                
    "fract_el")

In [None]:
%%timeit
_ = cupy_single(c, 20)
cp.cuda.Stream.null.synchronize()

We could also try writing everything ourselves with a pure, raw CUDA kernel:

In [None]:
cupy_kernel = cp.RawKernel("""
extern "C" 
__global__ void fractal(double* c, int* fractal, int height, int width, int maxiterations) {
    const int x = threadIdx.x + blockIdx.x*blockDim.x;
    const int y = threadIdx.y + blockIdx.y*blockDim.y;
    double creal = c[2 * (x + height*y)];
    double cimag = c[2 * (x + height*y) + 1];
    double zreal = creal;
    double zimag = cimag;
    fractal[x + height*y] = 0;
    for (int i = 0;  i < maxiterations;  i++) {
        double zreal2 = zreal*zreal - zimag*zimag + creal;
        double zimag2 = 2*zreal*zimag + cimag;
        zreal = zreal2;
        zimag = zimag2;
        if (zreal*zreal + zimag*zimag > 4) {
            break;
        }
        fractal[x + height*y] = i;
    }
}
""", "fractal")

In [None]:
def run_pycuda(height, width, maxiterations=20):
    y, x = cp.ogrid[-1:0:height*1j, -1.5:0:width*1j]
    grid = (int(math.ceil(height / 32)), int(math.ceil(width / 32)))
    c = x + y*1j
    fractal = cp.empty(c.shape, dtype=cp.int32) + maxiterations
    return (grid, (32,32,1),
                [c.view(cp.double), fractal,
                cp.int32(height),
                cp.int32(width),
                cp.int32(maxiterations)])

In [None]:
args = run_pycuda(height, width)

In [None]:
%%timeit
cupy_kernel(*args)
cp.cuda.Stream.null.synchronize()

In [None]:
fractal = args[2][1]
plt.imshow(fractal.get())