# Algorithms... are regularly used for performance benchmarks

## Loops are of most importance in Finance

In [1]:
import random
def average_py(n):
    s = 0
    for i in range(n):
        s += random.random()
    return s / n

In [2]:
n = 100000000

In [3]:
%time average_py(n)

CPU times: user 9.16 s, sys: 11.3 ms, total: 9.17 s
Wall time: 9.19 s


0.5000347979995398

In [4]:
# Times the function several times for a more reliable estimate
%timeit average_py(n) 

9.26 s ± 60.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
# Alt. uses list comprehension instead of a function
%time sum([random.random() for _ in range(n)]) / n

CPU times: user 10.7 s, sys: 1.13 s, total: 11.8 s
Wall time: 11.9 s


0.4999947240859887

### Vectorization with Numpy

#### Note to self -- It is tempting to wrie vectorized code with NumPy whenever possible due to concise syntax and speed improvements typically observed. However, these benifits often come at the proce of a much higher memory footprint.

In [6]:
import numpy as np

In [7]:
def average_np(n):
    s = np.random.random(n)
    return s.mean()

In [8]:
%time average_np(n)

CPU times: user 999 ms, sys: 191 ms, total: 1.19 s
Wall time: 1.22 s


0.5000093768085916

In [9]:
%timeit average_np(n)

1.14 s ± 16 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Numba 

#### Numba (numba.pydata.org) is a package that allows the dynamic compiling of pure Python code by the use of LLVM. 

In [10]:
import numba

In [11]:
average_nb = numba.jit(average_py)
%time average_nb(n)

CPU times: user 700 ms, sys: 39.5 ms, total: 739 ms
Wall time: 799 ms


0.49998987802442635

In [12]:
# Second execution should be much faster
%time average_nb(n)

CPU times: user 570 ms, sys: 2.16 ms, total: 572 ms
Wall time: 573 ms


0.500047261872069

In [13]:
# Very good average because the code was compiled one and then reused
%timeit average_nb(n)

586 ms ± 23.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Cython

In [14]:
%load_ext Cython

In [15]:
%%cython -a
import random 

def average_cy1(int n):
    cdef int i
    cdef float s = 0
    for i in range(n):
        s += random.random()
    return s / n
    

In [16]:
%time average_cy1(n)

CPU times: user 4.94 s, sys: 40.3 ms, total: 4.98 s
Wall time: 5.01 s


0.16777215898036957

In [17]:
%timeit average_cy1(n)

4.81 s ± 41.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
%%cython 
from libc.stdlib cimport rand
cdef extern from 'limits.h':
    int INT_MAX
cdef int i
cdef float rn
for i in range(5):
    rn = rand() / INT_MAX
    print(rn)

0.6792964339256287
0.934692919254303
0.3835020661354065
0.5194163918495178
0.8309653401374817


In [19]:
%%cython -a
from libc.stdlib cimport rand
cdef extern from 'limits.h':
    int INT_MAX

def average_cy2(int n):
    cdef int i 
    cdef float s = 0
    for i in range(n):
        s += rand() / INT_MAX
    return s / n

In [20]:
%time average_cy2(n)

CPU times: user 498 ms, sys: 1.69 ms, total: 500 ms
Wall time: 499 ms


0.16777215898036957

In [21]:
%timeit average_cy2(n)

512 ms ± 16.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Prime numbers factorization

In [22]:
# Base case
def is_prime(I):
    if I % 2 == 0:
        return False
    for i in range(3, int(I ** 0.5) + 1, 2):
        if I % i == 0:
            return False
    return True

In [23]:
n = int(1e8 + 3)

In [24]:
n

100000003

In [25]:
%time is_prime(n)

CPU times: user 24 µs, sys: 4 µs, total: 28 µs
Wall time: 28.8 µs


False

In [26]:
p1 = int(1e8 + 7)

In [27]:
p1

100000007

In [28]:
%time is_prime(p1)

CPU times: user 317 µs, sys: 0 ns, total: 317 µs
Wall time: 320 µs


True

In [29]:
p2 = 100109100129162907

In [30]:
p2.bit_length()

57

In [31]:
%time is_prime(p2)

CPU times: user 14 s, sys: 34.2 ms, total: 14.1 s
Wall time: 14.1 s


True

### Numba

In [32]:
is_prime_nb = numba.jit(is_prime)

In [33]:
%time is_prime_nb(n)

CPU times: user 74.5 ms, sys: 3.58 ms, total: 78.1 ms
Wall time: 83 ms


False

In [34]:
%time is_prime_nb(n)

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 7.15 µs


False

In [35]:
%time is_prime_nb(p1)

CPU times: user 18 µs, sys: 1 µs, total: 19 µs
Wall time: 18.8 µs


True

In [36]:
%time is_prime_nb(p1)

CPU times: user 18 µs, sys: 0 ns, total: 18 µs
Wall time: 20.3 µs


True

In [37]:
%time is_prime_nb(p2)

CPU times: user 1.28 s, sys: 4.14 ms, total: 1.28 s
Wall time: 1.28 s


True

In [38]:
%time is_prime_nb(p2)

CPU times: user 1.38 s, sys: 16 ms, total: 1.39 s
Wall time: 1.43 s


True

### Cython 

In [39]:
%%cython 

def is_prime_cy1(I):
    if I % 2 == 0:
        return False
    for i in range(3, int(I ** 0.5) + 1, 2):
        if I % i == 0:
            return False
    return True

In [40]:
%timeit is_prime(p1)

287 µs ± 4.72 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [41]:
%timeit is_prime_cy1(p1)

177 µs ± 3.1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [42]:
%%cython 

def is_prime_cy2(long I):
    cdef long i
    if I % 2 == 0:
        return False
    for i in range(3, int(I ** 0.5) +1, 2):
        if I % i == 0:
            return False
    return True

In [43]:
%timeit is_prime_cy2(p1)

37.9 µs ± 97.2 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [44]:
%time is_prime_nb(p2)

CPU times: user 1.28 s, sys: 4.43 ms, total: 1.28 s
Wall time: 1.28 s


True

In [45]:
%time is_prime_cy2(p2)

CPU times: user 1.2 s, sys: 3.98 ms, total: 1.2 s
Wall time: 1.21 s


True

### Multiplocessing 

In [46]:
import multiprocessing as mp

In [47]:
pool = mp.Pool(processes=4)

In [48]:
%time pool.map(is_prime, 10 * [p1])

CPU times: user 1.52 ms, sys: 2.09 ms, total: 3.61 ms
Wall time: 4.87 ms


[True, True, True, True, True, True, True, True, True, True]

In [49]:
%time pool.map(is_prime_nb, 10 * [p2])

CPU times: user 6.21 ms, sys: 2.9 ms, total: 9.11 ms
Wall time: 5.67 s


[True, True, True, True, True, True, True, True, True, True]

In [50]:
%time pool.map(is_prime_cy2, 10 * [p2])

CPU times: user 3.92 ms, sys: 1.96 ms, total: 5.88 ms
Wall time: 5.15 s


[True, True, True, True, True, True, True, True, True, True]

### Fibonacci Numbers

#### Recursive

In [52]:
def fib_rec_py1(n):
    if n < 2:
        return n
    else:
        return fib_rec_py1(n-1) + fib_rec_py1(n-2)


In [53]:
%time fib_rec_py1(35)

CPU times: user 2.75 s, sys: 15.9 ms, total: 2.76 s
Wall time: 2.78 s


9227465

In [54]:
fib_rec_nb = numba.jit(fib_rec_py1)

In [55]:
%time fib_rec_nb(35)

CPU times: user 2.8 s, sys: 25.6 ms, total: 2.83 s
Wall time: 2.84 s


9227465

In [56]:
%%cython
def fib_rec_cy(int n):
    if n < 2:
        return n
    else:
        return fib_rec_cy(n-1) + fib_rec_cy(n-2)
    

In [57]:
%time fib_rec_cy(35)

CPU times: user 562 ms, sys: 3.47 ms, total: 565 ms
Wall time: 565 ms


9227465

#### The major problem with recursice algorithm is that intermediate results are not cached but rather recalculated. To avoid this particular problem, a decorator can be used that takes care of the caching of the intermediate results. This speeds up the execution by multiple orders of magnitude:

In [58]:
from functools import lru_cache as cache

In [59]:
@cache(maxsize=None)
def fib_rec_py2(n):
    if n < 2:
        return n 
    else:
        return fib_rec_py2(n-1) + fib_rec_py2(n-2)

In [60]:
%time fib_rec_py2(35)

CPU times: user 19 µs, sys: 14 µs, total: 33 µs
Wall time: 37.2 µs


9227465

In [61]:
%time fib_rec_py2(80)

CPU times: user 36 µs, sys: 5 µs, total: 41 µs
Wall time: 45.1 µs


23416728348467685

#### Iterative

In [62]:
def fib_it_py(n):
    x, y = 0, 1
    for i in range(1, n + 1):
        x, y = y, x + y 
        return x

In [63]:
%time fib_it_py(80)

CPU times: user 5 µs, sys: 1 µs, total: 6 µs
Wall time: 6.91 µs


1