In [69]:
import math
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn
%matplotlib inline

### <font color="blue"> Demo, *just-in-time* compilation

In [70]:
from numba import jit, vectorize, float64

In [71]:
@jit(nopython=True)
def step_numba():
    return 1. if random.random() > .5 else -1.

In [72]:
@jit(nopython=True)
def walk_numba(n):
    x = np.zeros(n)
    dx = 1. / n
    for i in range(n - 1):
        x_new = x[i] + dx * step_numba()
        if x_new > 5e-3:
            x[i + 1] = 0.
        else:
            x[i + 1] = x_new
    return x

In [73]:
n = 100000

In [74]:
%%timeit
walk_numba(n)

The slowest run took 86.08 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 2.42 ms per loop


In [75]:
@jit
def primes_jit(kmax):  
    p = []
    result = []  
    if kmax > 1000:
        kmax = 1000
    k = 0
    n = 2
    while k < kmax:
        i = 0
        while i < k and n % p[i] != 0:
            i = i + 1
        if i == k:
            p.append(n)
            k = k + 1
            result.append(n)
        n = n + 1
    return result

In [76]:
%%timeit
p = primes_jit(100)
#print p

The slowest run took 2221.95 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 64.9 µs per loop


### <font color="blue"> Demo: the Mandelbrot fractal

In [77]:
size = 200
iterations = 100

#### First cython attempt

First just add the cython magic

In [78]:
%%cython -a
import numpy as np

def mandelbrot_cython(m, size, iterations):
    for i in range(size):
        for j in range(size):
            c = -2 + 3./size*j + 1j*(1.5-3./size*i)
            z = 0
            for n in range(iterations):
                if np.abs(z) <= 10:
                    z = z*z + c
                    m[i, j] = n
                else:
                    break

In [79]:
%%timeit -n1 -r1 
m = np.zeros((size, size), dtype=np.int32)
mandelbrot_cython(m, size, iterations)

1 loop, best of 1: 1.76 s per loop


Small speedup

#### Second attempt

Now add type information, use *memory views* for NumPy arrays, and replace `np.dot()`

In [80]:
%%cython -a
import numpy as np

def mandelbrot_cython(int[:,::1] m, 
                      int size, 
                      int iterations):
    cdef int i, j, n
    cdef complex z, c
    for i in range(size):
        for j in range(size):
            c = -2 + 3./size*j + 1j*(1.5-3./size*i)
            z = 0
            for n in range(iterations):
                if z.real**2 + z.imag**2 <= 100:
                    z = z*z + c
                    m[i, j] = n
                else:
                    break

In [81]:
%%timeit -n1 -r1 m = np.zeros((size, size), dtype=np.int32)
mandelbrot_cython(m, size, iterations)

1 loop, best of 1: 3.31 ms per loop


Huge speedup!

### <font color="red"> *Exercise: calculate primes*

In [82]:
def primes(kmax):  
    p = []
    result = []  
    if kmax > 1000:
        kmax = 1000
    k = 0
    n = 2
    while k < kmax:
        i = 0
        while i < k and n % p[i] != 0:
            i = i + 1
        if i == k:
            p.append(n)
            k = k + 1
            result.append(n)
        n = n + 1
    return result

In [83]:
%%timeit
p = primes(100)

100 loops, best of 3: 2.13 ms per loop


In [84]:
%load_ext Cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [85]:
%%cython
def primes_simplecython(kmax):  
    p = []
    result = []  
    if kmax > 1000:
        kmax = 1000
    k = 0
    n = 2
    while k < kmax:
        i = 0
        while i < k and n % p[i] != 0:
            i = i + 1
        if i == k:
            p.append(n)
            k = k + 1
            result.append(n)
        n = n + 1
    return result

In [86]:
%%timeit
p = primes_simplecython(100)

1000 loops, best of 3: 387 µs per loop


In [87]:
%%cython
#%%cython --compile-args=-fopenmp --link-args=-fopenmp
def primes_cython(int kmax):  # The argument will be converted to int or raise a TypeError.
    cdef int n, k, i  # These variables are declared with C types.
    cdef int p[1000]  # Another C type
    result = []  # A Python type
    if kmax > 1000:
        kmax = 1000
    k = 0
    n = 2
    while k < kmax:
        i = 0
        while i < k and n % p[i] != 0:
            i = i + 1
        if i == k:
            p[k] = n
            k = k + 1
            result.append(n)
        n = n + 1
    return result

In [88]:
%%timeit
p = primes_cython(100)

10000 loops, best of 3: 31.8 µs per loop


In [89]:
@jit
def primes_jit(kmax):  
    p = []
    result = []  
    if kmax > 1000:
        kmax = 1000
    k = 0
    n = 2
    while k < kmax:
        i = 0
        while i < k and n % p[i] != 0:
            i = i + 1
        if i == k:
            p.append(n)
            k = k + 1
            result.append(n)
        n = n + 1
    return result

In [90]:
%%timeit
p = primes_jit(100)


The slowest run took 1489.29 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 80.4 µs per loop


`cache=True` stores the compiled function in a file and avoids re-compilation on re-running a Python program

In [100]:
@jit(cache=True)
def primes_jit2(kmax):  
    p = []
    result = []  
    if kmax > 1000:
        kmax = 1000
    k = 0
    n = 2
    while k < kmax:
        i = 0
        while i < k and n % p[i] != 0:
            i = i + 1
        if i == k:
            p.append(n)
            k = k + 1
            result.append(n)
        n = n + 1
    return result

In [101]:
%%timeit
p = primes_jit2(100)


The slowest run took 178.45 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 65.2 µs per loop
