In [1]:
from numba import jit
import numpy as np
import time

def go_slow(a):
    trace = 0
    for i in range(a.shape[0]):
        trace += np.tanh(a[i, i])
    return a + trace

go_fast = jit(go_slow, nopython=True)

In [2]:
x = np.arange(100).reshape(10, 10)
%timeit go_slow(x)

45 µs ± 2.02 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [6]:
x = np.arange(100).reshape(10, 10)
go_fast(x)
%timeit go_fast(x)

1.42 µs ± 72.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [4]:
from dis import dis
dis(go_slow)

  6           0 LOAD_CONST               1 (0)
              2 STORE_FAST               1 (trace)

  7           4 SETUP_LOOP              44 (to 50)
              6 LOAD_GLOBAL              0 (range)
              8 LOAD_FAST                0 (a)
             10 LOAD_ATTR                1 (shape)
             12 LOAD_CONST               1 (0)
             14 BINARY_SUBSCR
             16 CALL_FUNCTION            1
             18 GET_ITER
        >>   20 FOR_ITER                26 (to 48)
             22 STORE_FAST               2 (i)

  8          24 LOAD_FAST                1 (trace)
             26 LOAD_GLOBAL              2 (np)
             28 LOAD_METHOD              3 (tanh)
             30 LOAD_FAST                0 (a)
             32 LOAD_FAST                2 (i)
             34 LOAD_FAST                2 (i)
             36 BUILD_TUPLE              2
             38 BINARY_SUBSCR
             40 CALL_METHOD              1
             42 INPLACE_ADD
             44 STORE_F

In [5]:
for code in go_fast.inspect_llvm().values():
    print(code)

; ModuleID = 'go_slow'
source_filename = "<string>"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@"_ZN08NumbaEnv8__main__11go_slow$241E5ArrayIxLi2E1C7mutable7alignedE" = common local_unnamed_addr global i8* null
@.const.picklebuf.140504373705856 = internal constant { i8*, i32 } { i8* getelementptr inbounds ([137 x i8], [137 x i8]* @.const.pickledata.140504373705856, i32 0, i32 0), i32 137 }
@.const.pickledata.140504373705856 = internal constant [137 x i8] c"\80\04\95~\00\00\00\00\00\00\00\8C\08builtins\94\8C\0AValueError\94\93\94\8C[array is too big; `arr.size * arr.dtype.itemsize` is larger than the maximum possible size.\94\85\94N\87\94."
@.const.go_slow = internal constant [8 x i8] c"go_slow\00"
@PyExc_RuntimeError = external global i8
@".const.missing Environment" = internal constant [20 x i8] c"missing Environment\00"
@PyExc_TypeError = external global i8
@".const.can't unbox array from PyObject into native value.  The obj