In [1]:
import numpy as np
import numba
import time

In [2]:
n = 1000000
a = np.random.randn(n)
b = np.random.randn(n)
c = np.empty(n, dtype='float64')

In [9]:
%%time
c = a + b

CPU times: user 2.54 ms, sys: 7.97 ms, total: 10.5 ms
Wall time: 12 ms


In [12]:
%%time
for i in range(n):
    c[i] = a[i] + b[i]

CPU times: user 475 ms, sys: 11.9 ms, total: 487 ms
Wall time: 486 ms


In [27]:
%%time
def numba_fun(in1, in2, out):

    for i in range(n):
        out[i] = in1[i] + in2[i]

numba_fun(a, b, c)

CPU times: user 382 ms, sys: 16.9 ms, total: 399 ms
Wall time: 397 ms


In [28]:
%%time
@numba.njit
def numba_fun(in1, in2, out):

    for i in numba.prange(n):
        out[i] = in1[i] + in2[i]

numba_fun(a, b, c)

CPU times: user 100 ms, sys: 1.03 ms, total: 101 ms
Wall time: 99.7 ms


In [29]:
for i in range(10):
    t1 = time.perf_counter_ns()
    numba_fun(a, b, c)
    t2 = time.perf_counter_ns()
    print(f'Time after compiling {round((t2-t1)/1000)} μs')

Time after compiling 2025 μs
Time after compiling 1872 μs
Time after compiling 1784 μs
Time after compiling 1683 μs
Time after compiling 1633 μs
Time after compiling 1600 μs
Time after compiling 1567 μs
Time after compiling 1529 μs
Time after compiling 1558 μs
Time after compiling 1508 μs


In [39]:
@numba.njit(parallel=True)
def numba_fun(in1, in2, out):

    for i in numba.prange(n):
        out[i] = in1[i] + in2[i]

numba.set_num_threads(4)

t1 = time.perf_counter()
numba_fun(a, b, c)
t2 = time.perf_counter()
print(f'Time first call {round((t2-t1)*1000)} ms')

for i in range(10):
    t1 = time.perf_counter_ns()
    numba_fun(a, b, c)
    t2 = time.perf_counter_ns()
    print(f'Time after compiling {round((t2-t1)/1000)} μs')

Time first call 386 ms
Time after compiling 563 μs
Time after compiling 476 μs
Time after compiling 435 μs
Time after compiling 422 μs
Time after compiling 371 μs
Time after compiling 403 μs
Time after compiling 395 μs
Time after compiling 360 μs
Time after compiling 340 μs
Time after compiling 337 μs
