In [1]:
import numpy as np

x = np.linspace(0, 1)

In [None]:
x**2 + np.sin(x)**2/np.exp(x)

## Beyond numpy
Some calculation cannot be efficienclty performed with numpy
* numpy need a lot of memory
* Operation not implemented

Example : 
* Calculation of $\pi$ (With a very very very slow formula!!!)
$$ \frac\pi4 = \sum_i \frac{(-1)^i}{2i+1} = 1 - \frac13 + \frac 15 - \frac17 + \ldots $$

* Operation similar to cumsum
$$ y_n = f(y_{n-1}, x_n) $$

In [13]:
# Implementation in pure python
def calc_pi(N):
    out = 0
    sgn = 1
    for i in range(N):
        out = out + sgn/(2*i + 1)
        sgn = -sgn
    return 4*out

print(calc_pi(10000))
%timeit calc_pi(1000000)

3.1414926535900345
110 ms ± 1.23 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [12]:
# numpy
def calc_pi_numpy(N):
    k = np.arange(N)
    return -8*np.sum((k%2-.5)/(2*k+1))

%timeit calc_pi_numpy(1000000)
    

24.8 ms ± 200 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## ctypes
* Interface between python and shared library (dll, so)
* Accelerate your code (this method is not recommended)
* Use existing code !!!
* Use closed source library

No magic : you have to know C and deal with pointer, memory allocation, ...

In [15]:
import ctypes
lib = ctypes.cdll.LoadLibrary('../calc_pi/libpi.so')

# Raw function
_calc_pi = lib.calc_pi

# Wrapper to be python friendly
def calc_pi_ctypes(N):
    out = ctypes.c_double(0)
    _calc_pi(N, ctypes.byref(out))
    return out.value*4

%timeit calc_pi_ctypes(10**6)

4.45 ms ± 19.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Numba 
Compile your python code for free

In [19]:
import numba

@numba.jit(numba.float64(numba.int32))
def calc_pi_numba(N):
    out = 0
    sgn = 1
    for i in range(N):
        out = out + sgn/(2*i + 1)
        sgn = -sgn
    return 4*out

%timeit calc_pi_numba(10**6)

4.49 ms ± 40.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
