In [1]:
%load_ext cython

In [2]:
import numpy as np

In [4]:
def make_arrays(n=1000):
    a = np.random.rand(n)
    b = np.random.rand(n)
    c = np.random.rand(n)
    x = np.random.rand(n)
    return a, b, c, x

### The numpy implementation

In [5]:
def using_numpy(a, b, c, x):
    return a * x**2 + b * x + c

### The cython version (vectorised)

In [7]:
%%cython
cimport cython
import numpy as np

@cython.boundscheck(False)
cdef void fn(double[:] a, double[:] b, double[:] c, 
                  double[:] x, double[:] answer):
    cdef int n = a.shape[0]
    for i in range(n):
        answer[i] = a[i] * x[i]**2 + b[i]*x[i] + c[i]
        
def using_cython(a, b, c, x):
    answer = np.zeros_like(x)
    fn(a, b, c, x, answer)
    return answer

# Comparisons

### At n = 1k

In [10]:
a,b,c,x = make_arrays(n=1000)
%timeit result = using_numpy(a, b, c, x)
%timeit result = using_cython(a, b, c, x)

The slowest run took 7.44 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 3: 8.33 µs per loop
10000 loops, best of 3: 20 µs per loop


### At n = (100k)

In [11]:
a,b,c,x = make_arrays(n=100000)
%timeit result = using_numpy(a, b, c, x)
%timeit result = using_cython(a, b, c, x)

The slowest run took 4.92 times longer than the fastest. This could mean that an intermediate result is being cached 
1000 loops, best of 3: 474 µs per loop
1000 loops, best of 3: 396 µs per loop


### At n = 1M

In [12]:
a,b,c,x = make_arrays(n=1000000)
%timeit result = using_numpy(a, b, c, x)
%timeit result = using_cython(a, b, c, x)

100 loops, best of 3: 19.1 ms per loop
100 loops, best of 3: 6.97 ms per loop


### At n = 10M

In [13]:
a,b,c,x = make_arrays(n=10000000)
%timeit result = using_numpy(a, b, c, x)
%timeit result = using_cython(a, b, c, x)

10 loops, best of 3: 192 ms per loop
10 loops, best of 3: 69.4 ms per loop


### At n = 100M (need about 5 GB memory free)

In [15]:
a,b,c,x = make_arrays(n=int(100e6))
%timeit result = using_numpy(a, b, c, x)
%timeit result = using_cython(a, b, c, x)

1 loops, best of 3: 1.88 s per loop
1 loops, best of 3: 637 ms per loop
