<a href="https://colab.research.google.com/github/jcvasquezbetancur/DS-colab/blob/master/numba_comparison.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --upgrade tbb -q
!pip install --upgrade numba -q

[K     |████████████████████████████████| 3.9MB 6.6MB/s 
[K     |████████████████████████████████| 3.4MB 6.6MB/s 
[K     |████████████████████████████████| 25.3MB 115kB/s 
[?25h

In [12]:
from numba import jit,njit
import numpy as np
A= np.arange(100)*1.0
x = A.reshape(10, 10)

In [5]:
def go_slow(a):
    trace = 0.0
    for i in range(a.shape[0]):   
        trace += np.tanh(a[i, i]) 
    return a + trace              


In [6]:
%%timeit
print(go_slow(x))

[1;30;43mSe truncaron las últimas líneas 5000 del resultado de transmisión.[0m
 [ 19.  20.  21.  22.  23.  24.  25.  26.  27.  28.]
 [ 29.  30.  31.  32.  33.  34.  35.  36.  37.  38.]
 [ 39.  40.  41.  42.  43.  44.  45.  46.  47.  48.]
 [ 49.  50.  51.  52.  53.  54.  55.  56.  57.  58.]
 [ 59.  60.  61.  62.  63.  64.  65.  66.  67.  68.]
 [ 69.  70.  71.  72.  73.  74.  75.  76.  77.  78.]
 [ 79.  80.  81.  82.  83.  84.  85.  86.  87.  88.]
 [ 89.  90.  91.  92.  93.  94.  95.  96.  97.  98.]
 [ 99. 100. 101. 102. 103. 104. 105. 106. 107. 108.]]
[[  9.  10.  11.  12.  13.  14.  15.  16.  17.  18.]
 [ 19.  20.  21.  22.  23.  24.  25.  26.  27.  28.]
 [ 29.  30.  31.  32.  33.  34.  35.  36.  37.  38.]
 [ 39.  40.  41.  42.  43.  44.  45.  46.  47.  48.]
 [ 49.  50.  51.  52.  53.  54.  55.  56.  57.  58.]
 [ 59.  60.  61.  62.  63.  64.  65.  66.  67.  68.]
 [ 69.  70.  71.  72.  73.  74.  75.  76.  77.  78.]
 [ 79.  80.  81.  82.  83.  84.  85.  86.  87.  88.]
 [ 89.  90.  91. 

In [7]:
@jit(nopython=True) # Set "nopython" mode for best performance, equivalent to @njit
def go_fast(a): # Function is compiled to machine code when called the first time
    trace = 0.0
    for i in range(a.shape[0]):   # Numba likes loops
        trace += np.tanh(a[i, i]) # Numba likes NumPy functions
    return a + trace              # Numba likes NumPy broadcasting

In [8]:
%%timeit
print(go_fast(x))

[[  9.  10.  11.  12.  13.  14.  15.  16.  17.  18.]
 [ 19.  20.  21.  22.  23.  24.  25.  26.  27.  28.]
 [ 29.  30.  31.  32.  33.  34.  35.  36.  37.  38.]
 [ 39.  40.  41.  42.  43.  44.  45.  46.  47.  48.]
 [ 49.  50.  51.  52.  53.  54.  55.  56.  57.  58.]
 [ 59.  60.  61.  62.  63.  64.  65.  66.  67.  68.]
 [ 69.  70.  71.  72.  73.  74.  75.  76.  77.  78.]
 [ 79.  80.  81.  82.  83.  84.  85.  86.  87.  88.]
 [ 89.  90.  91.  92.  93.  94.  95.  96.  97.  98.]
 [ 99. 100. 101. 102. 103. 104. 105. 106. 107. 108.]]
[[  9.  10.  11.  12.  13.  14.  15.  16.  17.  18.]
 [ 19.  20.  21.  22.  23.  24.  25.  26.  27.  28.]
 [ 29.  30.  31.  32.  33.  34.  35.  36.  37.  38.]
 [ 39.  40.  41.  42.  43.  44.  45.  46.  47.  48.]
 [ 49.  50.  51.  52.  53.  54.  55.  56.  57.  58.]
 [ 59.  60.  61.  62.  63.  64.  65.  66.  67.  68.]
 [ 69.  70.  71.  72.  73.  74.  75.  76.  77.  78.]
 [ 79.  80.  81.  82.  83.  84.  85.  86.  87.  88.]
 [ 89.  90.  91.  92.  93.  94.  95.  96.  97

In [3]:
@njit(parallel=True) # Set "nopython" mode for best performance, equivalent to @njit
def go_fast_par(a): # Function is compiled to machine code when called the first time
    trace = 0.0
    for i in range(a.shape[0]):   # Numba likes loops
        trace += np.tanh(a[i, i]) # Numba likes NumPy functions
    return a + trace              # Numba likes NumPy broadcasting

In [4]:
go_fast_par(x)



array([[  9.,  10.,  11.,  12.,  13.,  14.,  15.,  16.,  17.,  18.],
       [ 19.,  20.,  21.,  22.,  23.,  24.,  25.,  26.,  27.,  28.],
       [ 29.,  30.,  31.,  32.,  33.,  34.,  35.,  36.,  37.,  38.],
       [ 39.,  40.,  41.,  42.,  43.,  44.,  45.,  46.,  47.,  48.],
       [ 49.,  50.,  51.,  52.,  53.,  54.,  55.,  56.,  57.,  58.],
       [ 59.,  60.,  61.,  62.,  63.,  64.,  65.,  66.,  67.,  68.],
       [ 69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,  77.,  78.],
       [ 79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,  88.],
       [ 89.,  90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.],
       [ 99., 100., 101., 102., 103., 104., 105., 106., 107., 108.]])

In [20]:
go_fast_par.parallel_diagnostics(level=4)

 
 Parallel Accelerator Optimizing:  Function go_fast_par, <ipython-
input-3-25894253b3bb> (1)  


Parallel loop listing for  Function go_fast_par, <ipython-input-3-25894253b3bb> (1) 
-----------------------------------------------------------------------------------------|loop #ID
@njit(parallel=True) # Set "nopython" mode for best performance, equivalent to @njit     | 
def go_fast_par(a): # Function is compiled to machine code when called the first time    | 
    trace = 0.0                                                                          | 
    for i in range(a.shape[0]):   # Numba likes loops                                    | 
        trace += np.tanh(a[i, i]) # Numba likes NumPy functions                          | 
    return a + trace              # Numba likes NumPy broadcasting-----------------------| #0
--------------------------------- Fusing loops ---------------------------------
Attempting fusion of parallel loops (combines loops with similar properties)...
--

In [5]:
%%timeit
print(go_fast_par(x))

[1;30;43mSe truncaron las últimas líneas 5000 del resultado de transmisión.[0m
 [ 29.  30.  31.  32.  33.  34.  35.  36.  37.  38.]
 [ 39.  40.  41.  42.  43.  44.  45.  46.  47.  48.]
 [ 49.  50.  51.  52.  53.  54.  55.  56.  57.  58.]
 [ 59.  60.  61.  62.  63.  64.  65.  66.  67.  68.]
 [ 69.  70.  71.  72.  73.  74.  75.  76.  77.  78.]
 [ 79.  80.  81.  82.  83.  84.  85.  86.  87.  88.]
 [ 89.  90.  91.  92.  93.  94.  95.  96.  97.  98.]
 [ 99. 100. 101. 102. 103. 104. 105. 106. 107. 108.]]
[[  9.  10.  11.  12.  13.  14.  15.  16.  17.  18.]
 [ 19.  20.  21.  22.  23.  24.  25.  26.  27.  28.]
 [ 29.  30.  31.  32.  33.  34.  35.  36.  37.  38.]
 [ 39.  40.  41.  42.  43.  44.  45.  46.  47.  48.]
 [ 49.  50.  51.  52.  53.  54.  55.  56.  57.  58.]
 [ 59.  60.  61.  62.  63.  64.  65.  66.  67.  68.]
 [ 69.  70.  71.  72.  73.  74.  75.  76.  77.  78.]
 [ 79.  80.  81.  82.  83.  84.  85.  86.  87.  88.]
 [ 89.  90.  91.  92.  93.  94.  95.  96.  97.  98.]
 [ 99. 100. 101. 

In [8]:
from numba import prange
@njit(parallel=True)
def do_sum_parallel(A):
    # each thread can accumulate its own partial sum, and then a cross
    # thread reduction is performed to obtain the result to return
    n = len(A)
    acc = 0.
    for i in prange(n):
        acc += np.sqrt(A[i])
    return acc

@njit(parallel=True, fastmath=True)
def do_sum_parallel_fast(A):
    n = len(A)
    acc = 0.
    for i in prange(n):
        acc += np.sqrt(A[i])
    return acc

In [19]:
do_sum_parallel.parallel_diagnostics(level=4)

 
 Parallel Accelerator Optimizing:  Function do_sum_parallel, <ipython-
input-8-0fda09521d4a> (2)  


Parallel loop listing for  Function do_sum_parallel, <ipython-input-8-0fda09521d4a> (2) 
--------------------------------------------------------------------------|loop #ID
@njit(parallel=True)                                                      | 
def do_sum_parallel(A):                                                   | 
    # each thread can accumulate its own partial sum, and then a cross    | 
    # thread reduction is performed to obtain the result to return        | 
    n = len(A)                                                            | 
    acc = 0.                                                              | 
    for i in prange(n):---------------------------------------------------| #1
        acc += np.sqrt(A[i])                                              | 
    return acc                                                            | 
-----------------------------

In [18]:
%%timeit
print(do_sum_parallel(A))

[1;30;43mSe truncaron las últimas líneas 5000 del resultado de transmisión.[0m
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
6

In [17]:
%%timeit
print(do_sum_parallel_fast(A))

[1;30;43mSe truncaron las últimas líneas 5000 del resultado de transmisión.[0m
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
661.4629471031477
6

In [10]:
from numba.pycc import CC

cc = CC('my_module')
cc.verbose = True

@cc.export('multf', 'f8(f8, f8)')
@cc.export('multi', 'i4(i4, i4)')
def mult(a, b):
    return a * b

@cc.export('square', 'f8(f8)')
def square(a):
    return a ** 2

if __name__ == "__main__":
    cc.compile()

generating LLVM code for 'my_module' into /tmp/pycc-build-my_module-xf1gy5xe/my_module.cpython-37m-x86_64-linux-gnu.o
C compiler: x86_64-linux-gnu-gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fdebug-prefix-map=/build/python3.7-OGiuun/python3.7-3.7.10=. -fstack-protector-strong -Wformat -Werror=format-security -g -fdebug-prefix-map=/build/python3.7-OGiuun/python3.7-3.7.10=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC

creating /tmp/pycc-build-my_module-xf1gy5xe/usr
creating /tmp/pycc-build-my_module-xf1gy5xe/usr/local
creating /tmp/pycc-build-my_module-xf1gy5xe/usr/local/lib
creating /tmp/pycc-build-my_module-xf1gy5xe/usr/local/lib/python3.7
creating /tmp/pycc-build-my_module-xf1gy5xe/usr/local/lib/python3.7/dist-packages
creating /tmp/pycc-build-my_module-xf1gy5xe/usr/local/lib/python3.7/dist-packages/numba
creating /tmp/pycc-build-my_module-xf1gy5xe/usr/local/lib/python3.7/dist-packages/numba/pycc


In [11]:
import my_module
my_module.multi(3, 4)

12

In [12]:
my_module.square(np.sqrt(2))

2.0000000000000004

# TO_DO
[decorator Jitclass](
https://numba.pydata.org/numba-doc/latest/user/jitclass.html#jitclass)