In [37]:
import sys
import time
import numba as nb
import numpy as np


def ser_divisors(num: np.uint32) -> np.uint32:
    """Returns the number of divisors of a number.
    (serial version)
    """
    count: np.uint32 = 0
    for val in range(1, num + 1):
        if num % val == 0:
            count += 1
    return count


@nb.jit
def chatgpt_divisors(n):
    return np.sum(np.mod(n, np.arange(1, n + 1)) == 0)


@nb.vectorize([nb.uint32(nb.uint32)], nopython=True, fastmath=True, target="parallel")
def chatgpt_divisors_vec(n):
    return np.sum(n % np.arange(1, n + 1) == 0)


@nb.njit(nb.uint32(nb.uint32), parallel=True, fastmath=True)
def par_divisors(num: np.uint32) -> np.uint32:
    """Returns the number of divisors of a number.
    (parallel version)
    """
    count: np.uint32 = 0
    for val in nb.prange(1, num + 1):
        if num % val == 0:
            count += 1
    return count


@nb.njit(nb.uint32[:](nb.uint32[:]), parallel=True, fastmath=True)
def para_divisors(num):
    """Returns the number of divisors of a number.
    (parallel version)
    """
    c = []
    for n in nb.prange(num[0], num[-1] + 1):
        count: np.uint32 = 0
        for val in range(1, n + 1):
            if n % val == 0:
                count += 1
        c.append(count)
    return np.array(c, dtype=nb.uint32)


@nb.vectorize([nb.uint32(nb.uint32)], nopython=True, fastmath=True, target="parallel")
def vec_divisors(num: np.uint32) -> np.uint32:
    """Returns an array with the number of divisors from a vector of integers.
    (vectorized [ufunc] version)
    """
    count: np.uint32 = 0
    for val in range(1, num + 1):
        if num % val == 0:
            count += 1
    return count


@nb.vectorize([nb.uint32(nb.uint32)], target="cuda")
def gpu_divisors(num: np.uint32) -> np.uint32:
    """Returns an array with the number of divisors from a vector of integers.
    (gpu version)
    """
    count: np.uint32 = 0
    for val in range(1, num + 1):
        if num % val == 0:
            count += 1
    return count

In [38]:
# detect Google's GPU hardware capabilities 
nb.cuda.detect()

Found 1 CUDA devices
id 0             b'Tesla T4'                              [SUPPORTED]
                      Compute Capability: 7.5
                           PCI Device ID: 4
                              PCI Bus ID: 0
                                    UUID: GPU-9eca05b3-f109-9d8c-a0a6-0d214465aee3
                                Watchdog: Disabled
             FP32/FP64 Performance Ratio: 32
Summary:
	1/1 devices are supported


True

In [39]:
# generate all numbers in order to find its (number of) divisors 
start_number, end_number = 1, int(1e4)
x = np.arange(start_number, end_number + 1, dtype=np.uint32)

In [40]:
# warm-up to avoid accounting the compilation time in the following (below timeit)
chatgpt_divisors_vec(np.array([1], dtype=np.uint32))
vec_divisors(np.array([1], dtype=np.uint32))
para_divisors(np.array([1], dtype=np.uint32))
par_divisors(1)
chatgpt_divisors(28)

6

In [41]:
%%timeit
# chatgpt code
chatgpt_divisors_vec(x)

170 ms ± 2.03 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [42]:
%%timeit
# chatgpt code
[chatgpt_divisors(n) for n in x]

231 ms ± 7.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [43]:
%%timeit
# my cpu code (parallel)
[par_divisors(n) for n in x]

184 ms ± 2.16 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [35]:
%%timeit
# chatgpt cpu code (parallel)
vec_divisors(x)

129 ms ± 20.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [36]:
%%timeit
# chatgpt cpu code (parallel)
para_divisors(x)

174 ms ± 1.35 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
