## Reduction: the sum of the elements of an array

In [5]:
import os
import numpy as np
import time
from multiprocessing import Pool, cpu_count
from numba import njit, prange

def get_env_int(name, default):
    v = os.environ.get(name, None)
    if v is None:
        return default
    try:
        return int(float(v))
    except Exception:
        return default

VALUE  = get_env_int("VALUE", 5*10**7)
NPROCS = get_env_int("NPROCS", 1)

print(f"Tamaño del array (VALUE) = {VALUE:,}")
print(f"Número de procesos (NPROCS) = {NPROCS}")

rng = np.random.default_rng(42)
X = rng.random(VALUE, dtype=np.float64)

Tamaño del array (VALUE) = 50,000,000
Número de procesos (NPROCS) = 1


In [6]:
import numpy as np

def reduc_operation(A):
    """Compute the sum of the elements of Array A (naive)."""
    s = 0.0
    for i in range(A.size):
        s += A[i]
    return s

# Medición naive
t0 = time.perf_counter()
s_naive = reduc_operation(X)
t1 = time.perf_counter()
print(f"[Naive] tiempo={t1 - t0:.3f}s | suma={s_naive:.6f}")

# Referencia con numpy.sum
t0 = time.perf_counter()
s_np = np.sum(X)
t1 = time.perf_counter()
print(f"[numpy.sum] tiempo={t1 - t0:.3f}s | suma={s_np:.6f}")

[Naive] tiempo=4.932s | suma=24998883.211768
[numpy.sum] tiempo=0.019s | suma=24998883.211760


Paralelización con multiprocessing.Pool

In [10]:
# %%
import time
import numpy as np
from multiprocessing import Pool, cpu_count

def reduc_chunk(chunk):
    s = 0.0
    for x in chunk:
        s += x
    return s

def reduc_operation_pool(A, nproc=None):
    if nproc is None:
        nproc = cpu_count()
    chunks = np.array_split(A, nproc)
    t0 = time.perf_counter()
    with Pool(processes=nproc) as pool:
        partials = pool.map(reduc_chunk, chunks)
    total = float(sum(partials))
    t1 = time.perf_counter()
    return total, (t1 - t0)

if __name__ == "__main__":
    s_pool, t_pool = reduc_operation_pool(X, nproc=NPROCS)
    print(f"Pool nproc={NPROCS}: {t_pool:.3f} s | suma={s_pool:.6f}")

Pool nproc=1: 3.681 s | suma=24998883.211768


Numba: versión @njit y @njit(parallel=True) con prange

In [8]:
# %%
import time
from numba import njit, prange

@njit
def reduc_operation_numba(A):
    s = 0.0
    for i in range(A.shape[0]):
        s += A[i]
    return s

@njit(parallel=True)
def reduc_operation_numba_par(A):
    s = 0.0
    for i in prange(A.shape[0]):
        s += A[i]
    return s

# Compilación inicial (warm-up)
_ = reduc_operation_numba(X[:1000])
_ = reduc_operation_numba_par(X[:1000])

t0 = time.perf_counter()
s_nb = reduc_operation_numba(X)
t1 = time.perf_counter()

t2 = time.perf_counter()
s_nb_par = reduc_operation_numba_par(X)
t3 = time.perf_counter()
print(f"Numba @njit (sec): {t1 - t0:.3f} s | suma={s_nb:.6f}")
print(f"Numba @njit(parallel=True): {t3 - t2:.3f} s | suma={s_nb_par:.6f}")

Numba @njit (sec): 0.050 s | suma=24998883.211768
Numba @njit(parallel=True): 0.012 s | suma=24998883.211760


Celda de resultados 

### Resultados de ejecución en la cola mendel

```text
===============================================
VALUE=100000000 | NPROCS=1 | srun -c 1
===============================================
Tamaño del array (VALUE) = 100,000,000
Número de procesos (NPROCS) = 1
[Naive] tiempo=19.696s | suma=49999287.316669
[numpy.sum] tiempo=0.065s | suma=49999287.316659
Pool nproc=1: 13.218 s | suma=49999287.316669
Numba @njit (sec): 0.115 s | suma=49999287.316669
Numba @njit(parallel=True): 0.115 s | suma=49999287.316669
===============================================
VALUE=100000000 | NPROCS=2 | srun -c 2
===============================================
Tamaño del array (VALUE) = 100,000,000
Número de procesos (NPROCS) = 2
[Naive] tiempo=19.445s | suma=49999287.316669
[numpy.sum] tiempo=0.066s | suma=49999287.316659
Pool nproc=2: 7.366 s | suma=49999287.316664
Numba @njit (sec): 0.115 s | suma=49999287.316669
Numba @njit(parallel=True): 0.058 s | suma=49999287.316664
===============================================
VALUE=100000000 | NPROCS=4 | srun -c 4
===============================================
Tamaño del array (VALUE) = 100,000,000
Número de procesos (NPROCS) = 4
[Naive] tiempo=19.719s | suma=49999287.316669
[numpy.sum] tiempo=0.065s | suma=49999287.316659
Pool nproc=4: 4.246 s | suma=49999287.316658
Numba @njit (sec): 0.118 s | suma=49999287.316669
Numba @njit(parallel=True): 0.030 s | suma=49999287.316658
===============================================
VALUE=100000000 | NPROCS=8 | srun -c 8
===============================================
Tamaño del array (VALUE) = 100,000,000
Número de procesos (NPROCS) = 8
[Naive] tiempo=20.231s | suma=49999287.316669
[numpy.sum] tiempo=0.066s | suma=49999287.316659
Pool nproc=8: 2.702 s | suma=49999287.316656
Numba @njit (sec): 0.115 s | suma=49999287.316669
Numba @njit(parallel=True): 0.025 s | suma=49999287.316656
===============================================
VALUE=1000000000 | NPROCS=1 | srun -c 1
===============================================
Tamaño del array (VALUE) = 1,000,000,000
Número de procesos (NPROCS) = 1
[Naive] tiempo=210.146s | suma=499993610.450011
[numpy.sum] tiempo=0.654s | suma=499993610.450247
Pool nproc=1: 133.287 s | suma=499993610.450011
Numba @njit (sec): 1.161 s | suma=499993610.450011
Numba @njit(parallel=True): 1.154 s | suma=499993610.450011
===============================================
VALUE=1000000000 | NPROCS=2 | srun -c 2
===============================================
Tamaño del array (VALUE) = 1,000,000,000
Número de procesos (NPROCS) = 2
[Naive] tiempo=210.283s | suma=499993610.450011
[numpy.sum] tiempo=0.654s | suma=499993610.450247
Pool nproc=2: 71.606 s | suma=499993610.450243
Numba @njit (sec): 1.170 s | suma=499993610.450011
Numba @njit(parallel=True): 0.580 s | suma=499993610.450243
===============================================
VALUE=1000000000 | NPROCS=4 | srun -c 4
===============================================
Tamaño del array (VALUE) = 1,000,000,000
Número de procesos (NPROCS) = 4
[Naive] tiempo=191.392s | suma=499993610.450011
[numpy.sum] tiempo=0.655s | suma=499993610.450247
Pool nproc=4: 42.376 s | suma=499993610.450168
Numba @njit (sec): 1.199 s | suma=499993610.450011
Numba @njit(parallel=True): 0.296 s | suma=499993610.450168
===============================================
VALUE=1000000000 | NPROCS=8 | srun -c 8
===============================================
Tamaño del array (VALUE) = 1,000,000,000
Número de procesos (NPROCS) = 8
[Naive] tiempo=190.346s | suma=499993610.450011
[numpy.sum] tiempo=0.655s | suma=499993610.450247
Pool nproc=8: 27.331 s | suma=499993610.450273
Numba @njit (sec): 1.215 s | suma=499993610.450011
Numba @njit(parallel=True): 0.225 s | suma=499993610.450273
END=Wed Dec 17 11:34:03 CET 2025