# Cupy

CuPy is a GPU array backend that implements a subset of NumPy interface. In the following code, `cp` is an abbreviation of `cupy`, following the standard convention of abbreviating `numpy` as `np`:

<a href="https://colab.research.google.com/github/Ziaeemehr/workshop_hpcpy/blob/main/notebooks/cupy/note.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import os
import sys

# Check if running on Google Colab
try:
    from google.colab import drive
    IN_COLAB = True
    print("Running on Google Colab")
except ImportError:
    IN_COLAB = False
    print("Running locally")

# Clone repository if on Colab and not already cloned
if IN_COLAB:
    if not os.path.exists('/content/workshop_hpcpy'):
        print("Cloning workshop_hpcpy repository...")
        os.system('git clone https://github.com/Ziaeemehr/workshop_hpcpy.git /content/workshop_hpcpy')
    
    # Change to notebook directory
    os.chdir('/content/workshop_hpcpy/notebooks/cupy')
    print(f"Working directory: {os.getcwd()}")

Running locally


In [8]:
import numpy as np

# Check if CuPy is available
try:
    import cupy as cp
    print(f"CuPy version: {cp.__version__}")
    
    # Check if CUDA/GPU is available
    try:
        device_count = cp.cuda.runtime.getDeviceCount()
        if device_count > 0:
            print(f"CUDA available: {device_count} GPU device(s) found")
            # Print GPU info
            for i in range(device_count):
                props = cp.cuda.runtime.getDeviceProperties(i)
                print(f"GPU {i}: {props['name'].decode('utf-8')}")
        else:
            print("CUDA available but no GPU devices found")
    except Exception as e:
        print(f"CUDA not available: {e}")
        
except ImportError:
    print("CuPy not available. Install with: pip install cupy-cuda12x (or cupy-cuda11x)")
    cp = None

CuPy version: 13.3.0
CUDA available: 1 GPU device(s) found
GPU 0: NVIDIA RTX A5000


In [9]:
x_cpu = np.array([1, 2, 3])
x_gpu = cp.array([1, 2, 3])


l2_cpu = np.linalg.norm(x_cpu)
l2_gpu = cp.linalg.norm(x_gpu)

print(f"{type(x_cpu)=}")
print(f"{type(x_gpu.get())=}")
print(f"{type(x_gpu)=}") 
print(f"{x_gpu.dtype=}")

type(x_cpu)=<class 'numpy.ndarray'>
type(x_gpu.get())=<class 'numpy.ndarray'>
type(x_gpu)=<class 'cupy.ndarray'>
x_gpu.dtype=dtype('int64')


In [10]:
def tohost(x):
    '''
    move data to cpu
    '''
    return cp.asnumpy(x)


def todevice(x):
    '''
    move data to gpu
    '''
    return cp.asarray(x)

In [18]:
import tqdm
import numpy as np
import cupy as cp

def run_neural_simulation(use_gpu=True, nn=68, ns=10_000, nt=20_000, dt=0.01, seed=2, dtype=np.float32):
    """
    Run neural simulation on CPU or GPU
    
    Parameters:
    use_gpu (bool): Whether to use GPU (CuPy) or CPU (NumPy)
    nn (int): Number of neurons
    ns (int): Number of samples
    nt (int): Number of time steps
    dt (float): Time step size
    seed (int): Random seed
    dtype (numpy.dtype): Data type for arrays (np.float32 for single precision, np.float64 for double precision)
    
    Returns:
    list: List of x states at every 1000th time step
    """
    if use_gpu:
        xp = cp
        print(f"Running simulation on GPU with {dtype.__name__}")
    else:
        xp = np
        print(f"Running simulation on CPU with {dtype.__name__}")
    
    # Set seeds
    np.random.seed(seed)
    if use_gpu:
        cp.random.seed(seed)
    
    # Initialize parameters
    SC = xp.random.randn(nn, nn).astype(dtype)
    x = xp.random.randn(nn, ns).astype(dtype)
    y = xp.random.randn(nn, ns).astype(dtype)
    eta = xp.random.randn(nn, ns).astype(dtype) + 1.01
    tau = 3.0
    rtau = 1 / tau
    
    xs = []
    
    for t in tqdm.trange(nt):
        gx = SC @ x  # (nn x nn) (nn x ns) = (nn x ns)
        dx = tau * (x - x**3 / 3 + y)
        dy = rtau * (eta - x + 1e-2 * gx)
        x += dt * dx
        y += dt * dy
        if t % 1000 == 0:
            # Convert to numpy for storage if on GPU
            xs.append(x.get() if use_gpu else x.copy())
    
    return xs



In [19]:
# Example usage:
xs_gpu = run_neural_simulation(use_gpu=True, ns=10_000)

Running simulation on GPU with float32


100%|██████████| 20000/20000 [00:08<00:00, 2241.42it/s]
100%|██████████| 20000/20000 [00:08<00:00, 2241.42it/s]


In [None]:
xs_cpu = run_neural_simulation(use_gpu=False, ns=10_000) # about 5 minutes