# Santos Dumont (SD) - Numba CPU threads B710

Numba sequencial usando 1 thread, e paralelo usando vários threads

In [1]:
def nbheat(tr) :
    import numpy as np
    from numba import set_num_threads, get_num_threads, \
                      threading_layer, njit, config

    n            = 2400
    energy       = 1.0
    niters       = 250
    anew         = np.zeros((n + 2,  n + 2), np.float64)
    aold         = np.zeros((n + 2,  n + 2), np.float64)
    sources      = np.empty((3, 2), np.int16)
    sources[:,:] = [ [n//2, n//2], [n//3, n//3], [n*4//5, n*8//9] ]

    set_num_threads(tr)    # seta a qtde. de threads
    
# parte executada por Numba
#----------------------------------------
    @njit('(float64[:,:],float64[:,:])', parallel=True, \
            fastmath=True, nogil=True)
    def kernel(anew, aold) :
        anew[1:-1,1:-1] = (aold[1:-1,1:-1]/2.0
                           +(aold[2:,1:-1]+aold[:-2,1:-1]
                           +aold[1:-1,2:]+aold[1:-1,:-2])/8.0)
#----------------------------------------

    for _ in range(0, niters, 2) :
        kernel(anew, aold)
        anew[sources[:, 0], sources[:, 1]] += energy
        kernel(aold, anew)
        aold[sources[:, 0], sources[:, 1]] += energy

    heat = np.sum(aold[1:-1, 1:-1])
    
    return heat, aold, threading_layer(), get_num_threads()

1 thread (sequencial)

# Execução no nó de login

Obs.: nos nós de login o Intel Hyperthreading está ligado, então o total de CPUs é 22

## 1 thread (sequencial)

In [2]:
%%timeit -n1 -r1
heat, result, TL, NT = nbheat(1)
print(f"Heat: {heat:.4f}", end=" | ")
print(f"Threading layer: {TL}", end=" | ")
print(f"Thread count: {NT}")

Heat: 750.0000 | Threading layer: tbb | Thread count: 1
17 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


## 2 threads (paralelo)

In [3]:
%%timeit -n1 -r1
heat, result, TL, NT = nbheat(2)
print(f"Heat: {heat:.4f}", end=" | ")
print(f"Threading layer: {TL}", end=" | ")
print(f"Thread count: {NT}")

Heat: 750.0000 | Threading layer: tbb | Thread count: 2
3.05 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


## 12 threads (paralelo)

In [4]:
%%timeit -n1 -r1
heat, result, TL, NT = nbheat(12)
print(f"Heat: {heat:.4f}", end=" | ")
print(f"Threading layer: {TL}", end=" | ")
print(f"Thread count: {NT}")

Heat: 750.0000 | Threading layer: tbb | Thread count: 12
1.68 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


## 24 threads (paralelo)

In [5]:
%%timeit -n1 -r1
heat, result, TL, NT = nbheat(24)
print(f"Heat: {heat:.4f}", end=" | ")
print(f"Threading layer: {TL}", end=" | ")
print(f"Thread count: {NT}")

Heat: 750.0000 | Threading layer: tbb | Thread count: 24
1.71 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
