# [Numba: Flexible analytics written in Python with machine code speeds and avoiding](https://www.youtube.com/watch?v=QpaapVaL8Fw&list=PLcGdsSGfQXigXVXUNHWvO13l-Z_ecvPro)

Numba compile python code using LLVM compiler.

Demonstration

In [10]:
from numba import jit, vectorize
import numpy as np

In [6]:
def simple():
    total = 0.0
    for i in range(999):
        for j in range(1, 999):
            total += (i / j)
    return total

In [7]:
@jit
def simple_jit():
    total = 0.0
    for i in range(999):
        for j in range(1, 999):
            total += (i / j)
    return total

In [8]:
%timeit simple()

37.8 ms ± 28 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
%timeit simple_jit()

955 µs ± 3.72 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


# How Numba works

- @jit and @vectorize
- LLVM
```
The LLVM compiler infrastructure project is a "collection of modular and reusable compiler and toolchain technologies"[3] used to develop compiler front ends and back ends.
```
- **Features**
    - supports win, os X and Linux
    - CPUs and NVIDIA GPUs
    - Python 2 and 3
    - Numpy versions 1.6 through 1.9
    
- **Numba Modes**
    - object mode:
        - Compiled code operates on Python objects. Only significant performance imrpovement is compilation of loops that can be compiled in nopython mode (see below)
    - nopython mode: (-> machine code)
        - Compiled code operates on "machine native" data. Usually within 25% of the performance of equivalent C or FORTRAN

# The Basics

In [11]:
@jit(nopython = True)
def nan_compact(x):
    out = np.empty_like(x)
    out_index = 0
    for element in x:
        if not np.isnan(element):
            out[out_index] = element
            out_index += 1
    return out[:out_index]

In [27]:
a = np.random.uniform(size = 10000)
a[a < 0.2] = np.nan

In [26]:
a[np.logical_not(np.isnan(a))]

array([0.23903402, 0.37750529, 0.80901637, ..., 0.41273596, 0.81624855,
       0.60124823])

In [25]:
nan_compact(a)

array([0.23903402, 0.37750529, 0.80901637, ..., 0.41273596, 0.81624855,
       0.60124823])

In [28]:
np.testing.assert_equal(nan_compact(a), a[np.logical_not(np.isnan(a))])

In [29]:
%timeit a[np.logical_not(np.isnan(a))]
%timeit nan_compact(a)

33.8 µs ± 337 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
5.72 µs ± 56.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


# Making UFuncs

ufuncs is universal functions

In [30]:
def game_wins(win_prob, max_wins, max_losses):
    wins   = 0
    losses = 0
    while (wins < max_wins) and (losses < max_losses):
        if np.random.rand() < win_prob:
            wins += 1
        else:
            losses += 1
    return wins            

In [42]:
@vectorize(nopython = True)
def game_wins_vec(win_prob, max_wins, max_losses):
    wins   = 0
    losses = 0
    while (wins < max_wins) and (losses < max_losses):
        if np.random.rand() < win_prob:
            wins += 1
        else:
            losses += 1
    return wins 

In [39]:
sim_input = np.tile(np.linspace(0.0, 1.0, 100), (1000, 1))
sim_input.shape

(1000, 100)

In [43]:
sim_results = game_wins(sim_input, 12, 3)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [44]:
sim_results = game_wins_vec(sim_input, 12, 3)

In [45]:
%timeit sim_results = game_wins_vec(sim_input, 12, 3)

6.26 ms ± 31.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
