In [1]:
#References
#https://www.youtube.com/watch?v=uUEHuF5i_qI
#https://github.com/olcf-tutorials/vector_addition_cuda

import numpy as np
from numba import cuda
import time

def vector_add_python(a, b):
    c = np.zeros_like(a)
    for i in range(len(a)):
        c[i] = a[i] + b[i]
    return c

# Numba CUDA Implementation
@cuda.jit
def vector_add_kernel(a, b, c):
    idx = cuda.grid(1)
    if idx < len(a):
        c[idx] = a[idx] + b[idx]

def vector_add_numba(a, b):
    # Transfer NumPy arrays to device arrays
    a_device = cuda.to_device(a)
    b_device = cuda.to_device(b)

    #initializng blocks and threads
    c = np.zeros_like(a)
    threadsperblock = 32
    blockspergrid = (len(a) + (threadsperblock - 1)) // threadsperblock
    vector_add_kernel[blockspergrid, threadsperblock](a_device, b_device, c)

    # Copy result back
    return c
size = 100000
# Generate random input arrays
a = np.random.rand(size)
b = np.random.rand(size)

# Numba CUDA implementation
start_time = time.time()
numba = vector_add_numba(a, b)
numba_time = time.time() - start_time
print(f"Numba CUDA implementation took {numba_time:.6f} seconds with input size {size}")

# Regular Python implementation
start_time = time.time()
python = vector_add_python(a, b)
python_time = time.time() - start_time
print(f"Regular Python implementation took {python_time:.6f} seconds with input size {size}")






CudaSupportError: Error at driver init: 

CUDA driver library cannot be found.
If you are sure that a CUDA driver is installed,
try setting environment variable NUMBA_CUDA_DRIVER
with the file path of the CUDA driver shared library.
: