In [None]:
# from multi_threading import threads

In [1]:
# from __future__ import print_function
%load_ext cython
import Cython
print(Cython.__version__)

0.29.23


### Data Synthesis
- 10million incomes:[5k,500k]
- calculate the total tax

In [2]:
import random
import numpy as np
array1 = np.random.randint(5000, 500000, size=(10000000)).astype(np.float64)

In [3]:
# from tax import tottax_python
def tax_python(amount):
    if amount <= 18200:
        return 0
    elif amount <= 37000:
        return 0.19 * (amount - 18200)
    elif amount <= 80000:
        return 3572 + 0.325 * (amount - 37000)
    elif amount <= 180000:
        return 17547 + 0.37 * (amount - 80000)
    else:
        return 54547 + 0.45 * (amount - 180000)

def tottax_python(incomes):
    tot = 0  
    n = incomes.shape[0]
    for i in range(n):
        tot += tax_python(incomes[i])
    return tot

In [4]:
py_result = tottax_python(array1)
timeit_result = %timeit -o tottax_python(array1)
py_time_avg = timeit_result.average

8.12 s ± 106 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Time our code 
#### credit for  [Cython for NumPy users]("https://github.com/cython/cython/blob/master/docs/examples/userguide/numpy_tutorial/numpy_and_cython.ipynb")

In [5]:
def compare_time(current, reference, name):
    ratio = reference/current
    if ratio > 1:
        word = "faster"
    else:
        ratio = 1 / ratio 
        word = "slower"
        
    print("We are", "{0:.1f}".format(ratio), "times", word, "than the", name, "version.")

def print_report(compute_function):
    if np.all(compute_function(array1) == py_result):
        print("exactly same result!")
    else:
        print("not same answer!, the difference between them is {0:.5f}".format(compute_function(array1) - py_result))
    timeit_result = %timeit -o compute_function(array1)
    run_time = timeit_result.average
    compare_time(run_time, py_time_avg, "pure Python")

In [6]:
%%cython 
# from tax_c import tottax_python
cdef double tax_cython(double amount):
    if amount <= 18200:
        return 0
    elif amount <= 37000:
        return 0.19 * (amount - 18200)
    elif amount <= 80000:
        return 3572 + 0.325 * (amount - 37000)
    elif amount <= 180000:
        return 17547 + 0.37 * (amount - 80000)
    else:
        return 54547 + 0.45 * (amount - 180000)

def tottax_python(incomes):
    tot = 0    
    n = incomes.shape[0]
    for i in range(n):
        tot += tax_cython(incomes[i])
    return tot

In [7]:
print_report(tottax_python)

exactly same result!
812 ms ± 10.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
We are 10.0 times faster than the pure Python version.


In [8]:
#for secondtime
print_report(tottax_python)

exactly same result!
833 ms ± 16.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
We are 9.7 times faster than the pure Python version.


In [18]:
from tax_c import tottax_python
print_report(tottax_python)

809 ms ± 4.07 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
We are 10.2 times faster than the pure Python version.


In [None]:
#both logic and loop are written in cython

In [40]:
print_report(tottax_cython)

exactly same!
43.6 ms ± 972 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
We are 188.5 times faster than the pure Python version.


In [None]:
# %load tax_c_c.pyx

cpdef double tax_cython(double amount ):#
    if amount <= 18200:
        return 0
    elif amount <= 37000:
        return 0.19 * (amount - 18200)
    elif amount <= 80000:
        return 3572 + 0.325 * (amount - 37000)
    elif amount <= 180000:
        return 17547 + 0.37 * (amount - 80000)
    else:
        return 54547 + 0.45 * (amount - 180000)

cpdef double tottax_cython(double[:] incomes):
    cdef int i
    cdef int n = incomes.shape[0]
    cdef double tot = 0
    for i in range(n):
        tot += tax_cython(incomes[i])
    return tot


In [27]:
##both logic and loop are written in cython and gil is released!
from tax_c_c import tottax_cython
print_report(tottax_cython)

exactly same result!
43.1 ms ± 672 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
We are 188.3 times faster than the pure Python version.


In [None]:
# %load tax_c_nogil.pyx

cpdef double tax_cython(double amount ) nogil:#
    if amount <= 18200:
        return 0
    elif amount <= 37000:
        return 0.19 * (amount - 18200)
    elif amount <= 80000:
        return 3572 + 0.325 * (amount - 37000)
    elif amount <= 180000:
        return 17547 + 0.37 * (amount - 80000)
    else:
        return 54547 + 0.45 * (amount - 180000)

cpdef double tottax_cython(double[:] incomes):
    cdef int i
    cdef int n = incomes.shape[0]
    cdef double tot = 0
    with nogil:
        for i in range(n):
            tot += tax_cython(incomes[i])
        return tot


In [33]:
##both logic and loop are written in cython and gil is released!

from tax_c_nogil import tottax_cython
print_report(tottax_cython)

exactly same result!
45.9 ms ± 879 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
We are 176.9 times faster than the pure Python version.


### multithreading

In [34]:
from concurrent.futures import ThreadPoolExecutor
# from tax_c_nogil import tottax_cython

In [35]:
def threads(incomes):
    with ThreadPoolExecutor(max_workers = 10) as exe:
        sections = np.array_split(incomes, 10)
        jobs = [exe.submit(tottax_cython , s) for s in sections]
    
    return sum(job.result() for job in jobs)

In [36]:
threads(array1)

900765351089.0531

In [None]:
py_result

In [37]:
print_report(threads)

not same answer!, the difference between them is -0.03491
7.84 ms ± 132 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
We are 1035.4 times faster than the pure Python version.


inspired by [Easy wins with Cython: fast and multi-core by Caleb Hattingh]("https://www.youtube.com/watch?v=NfnMJMkhDoQ&ab_channel=PyConAU") video and the [Cython for NumPy users]("https://github.com/cython/cython/blob/master/docs/examples/userguide/numpy_tutorial/numpy_and_cython.ipynb")notebook. contains both files and mere ipython-style implementation 

In [None]:
#multi threading good for i/o bounded tasks (i.e. network, IO, or user interaction.)
#multi processing good for cpu bounded tasks