# Google Colab (GC) - Numba CPU *thread*

In [1]:
def nbheat(tr) :
    import numpy as np
    from numba import set_num_threads, get_num_threads, \
                      threading_layer, njit, config

    n            = 2400
    energy       = 1.0
    niters       = 250
    anew         = np.zeros((n + 2,  n + 2), np.float64)
    aold         = np.zeros((n + 2,  n + 2), np.float64)
    sources      = np.empty((3, 2), np.int16)
    sources[:,:] = [ [n//2, n//2], [n//3, n//3], [n*4//5, n*8//9] ]

    config.THREADING_LAYER = 'omp'    # OpenMP
    set_num_threads(tr)               # seta a qtde. de threads

# parte executada por Numba
#----------------------------------------
    @njit('(float64[:,:],float64[:,:])', parallel=True, \
            fastmath=True, nogil=True)
    def kernel(anew, aold) :
        anew[1:-1,1:-1] = (aold[1:-1,1:-1]/2.0
                           +(aold[2:,1:-1]+aold[:-2,1:-1]
                           +aold[1:-1,2:]+aold[1:-1,:-2])/8.0)
#----------------------------------------

    for _ in range(0, niters, 2) :
        kernel(anew, aold)
        anew[sources[:, 0], sources[:, 1]] += energy
        kernel(aold, anew)
        aold[sources[:, 0], sources[:, 1]] += energy

    heat = np.sum(aold[1:-1, 1:-1])
    
    return heat, aold, threading_layer(), get_num_threads()

Obs.: no GC apenas um núcleo físico e um virtual estão disponíveis.

## 1 thread
* na segunda vez que roda é mais rápido, pois Numba usa o que já está no cache

In [2]:
%%timeit -n1 -r1
heat, result, TL, NT = nbheat(1)
print(f"Heat: {heat:.4f}", end=" | ")
print(f"Threading layer: {TL}", end=" | ")
print(f"Thread count: {NT}")

Heat: 750.0000 | Threading layer: omp | Thread count: 1
4.77 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [4]:
%%timeit -n1 -r1
heat, result, TL, NT = nbheat(1)
print(f"Heat: {heat:.4f}", end=" | ")
print(f"Threading layer: {TL}", end=" | ")
print(f"Thread count: {NT}")

Heat: 750.0000 | Threading layer: omp | Thread count: 1
3.47 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


## 2 trheads
* usa um núcleo físico e um virtual (Intel hyperthreading)

In [3]:
%%timeit -n1 -r1
heat, result, TL, NT = nbheat(2)
print(f"Heat: {heat:.4f}", end=" | ")
print(f"Threading layer: {TL}", end=" | ")
print(f"Thread count: {NT}")

Heat: 750.0000 | Threading layer: omp | Thread count: 2
3.24 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
