In [33]:
from pylab import *
import numba as nb
nb.config.OPT = nb.core.config._OptLevel(0)

In [34]:
@nb.njit()
def sss_energy(u, v, w, e):  # Slowsest implementation
  e[:] = (u**2.0 + v**2.0 + w**2.0)/2.0

@nb.njit()
def ss_energy(u, v, w, e):  # better implementation
  e[:] = (u**2.0 + v**2.0 + w**2.0)*0.5

@nb.njit()
def s_energy(u, v, w, e):  # best implementation
  for i in range(u.size):
    e[i] = (u[i]*u[i] + v[i]*v[i] + w[i]*w[i])*0.5

In [35]:
N = int(8192)
u, v, w = randn(3, N)
e = empty(N)

%timeit sss_energy(u, v, w, e)
%timeit ss_energy(u, v, w, e)
%timeit s_energy(u, v, w, e)

873 µs ± 15.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.32 ms ± 80 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
584 µs ± 34.2 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [42]:
## Example of if
@nb.njit()
def if_energy(u, v, w, e, flag):  # slow implementation
  for i in range(u.size):
    if flag[0]:
      e[i] = (u[i]**2.0 + v[i]**2.0 + w[i]**2.0)*0.5
    else:
      e[i] = (u[i]**2.0 + v[i]**2.0 + 2*w[i]**2.0)*0.5


## Example of if
@nb.njit()
def if_energy(u, v, w, e, flag):  # fast implementation
    if flag[0]:
      for i in range(u.size):
        e[i] = (u[i]**2.0 + v[i]**2.0 + w[i]**2.0)*0.5
    else:
      for i in range(u.size):
        e[i] = (u[i]**2.0 + v[i]**2.0 + 2*w[i]**2.0)*0.5

## Example of if COMPILED FLAG
compiled_flag = True
@nb.njit()
def comp_energy(u, v, w, e):  # slow implementation
  for i in range(u.size):
    if compiled_flag:
      e[i] = (u[i]**2.0 + v[i]**2.0 + w[i]**2.0)*0.5
    else:
      e[i] = (u[i]**2.0 + v[i]**2.0 + 2*w[i]**2.0)*0.5

In [40]:
flag = array([True])

%timeit ss_energy(u, v, w, e)
%timeit if_energy(u, v, w, e, flag)
%timeit comp_energy(u, v, w, e)


992 µs ± 153 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.14 ms ± 68.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
5.58 ms ± 18.4 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [46]:
%timeit comp_energy(u, v, w, e)

1.14 ms ± 22.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [45]:
%timeit if_energy(u, v, w, e, flag)

1.15 ms ± 98.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [48]:
comp_energy(u, v, w, e)
print(e[:10])

[0.30761302 1.9128611  2.61123757 0.79664866 4.41064793 4.17681159
 0.63614227 1.93865137 0.90335328 0.79187701]


In [51]:
flag[:] = False
if_energy(u, v, w, e, flag)
print(e[:10])

[0.30941148 2.24824024 2.76105696 0.94804036 5.54046399 4.1988025
 0.94361235 2.64916022 1.11792026 1.31951074]


In [52]:
@nb.njit()
def fast_sum0(A):
  s = 0
  for i in range(A.shape[0]):
    for j in range(A.shape[1]):
      s += A[i, j]
  return s

@nb.njit()
def fast_sum1(A):
  s = 0
  for i in range(A.shape[0]):
    for j in range(A.shape[1]):
      s += A[j, i]
  return s

In [53]:
A = randn(2048, 2048)

%timeit fast_sum0(A)
%timeit fast_sum1(A)

86.6 ms ± 4.59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
334 ms ± 48.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
