# Making Your Code Faster: Cython and parallel processing in the Jupyter Notebook  
  
### PyData DC 2016  
  
*Gustavo A. Patino*  
*Department of Biomedical Sciences*  
*Department of Neurology*  
*Oakland University William Beaumont School of Medicine*  
*Rochester, MI*  
  
patino@oakland.edu  
https://github.com/gapatino/Making-Your-Code-Faster-Cython-and-parallel-processing-in-the-Jupyter-Notebook  

### Problem:  
The function $y=x^{2}$ can be approximated using its derivative $y'=2x$ through the Euler method:  
$y_{(n+1)} = y_n + (step*y')$  
The precision of the approximation depends on the step being very small.  
We want to find the step size that gives a difference < 1e-5 when comparing the values obtained  
using the $y=x^{2}$ formula and the Euler method after evaluating a million points  
  
*Note how a step size of 1 means we will evaluate values of x between 0 and 1000000, while a step size*  
*of 0.001 means that x ranges from 0 to 1000*

In [None]:
# Define the variable that will calculate values of y using both the y=x**2 formula and y'=2x using the Euler method
# Input is a step size for the Euler method; 10000 values of x will be evaluated
# Function returns the absolute difference between the last value of both sets of calculations

def errorEuler(step_size):
    # Range function only accepts integer values, to create list of values to evaluate will need to divide 
    # the list of 1000000 integers by (1/step_size)
    values0 = list(range(0,1000001))
    # Initialize variable that will keep results of x**2
    values_squared = []
    for index, value in enumerate(values0):
        values0[index] = value / (1/step_size)
        values_squared.append(values0[index]**2)
    # Calculate values of x**2 using Euler method and y'=2x
    # Start with initializing the variable that will contain those results
    values_euler = [(2*values0[0]*step_size)]
    for index, value in enumerate(values0[1:]):
        values_euler.append(values_euler[index-1]+(2*values0[index]*step_size))
    return abs(values_squared[-1] - values_euler[-1])


In [None]:
# Difference if step size is 1
print(errorEuler(1))

In [None]:
%%timeit
# Run the errorEuler function with different step sizes to find the one that returns a final difference lower 
# than 1e-5
# Initialize the difference value
difference0 = 1
# Initialize step size
step_size = 1
# Use a while loop to decrease step_size until we are below the desired difference
while difference0 > 1e-5:
    difference0 = errorEuler(step_size)
    step_size /= 10
print(step_size*10)

### Cython version

In [None]:
%load_ext Cython

In [None]:
%%cython

# Make Cython version of errorEuler
def errorEuler_cython(float step_size):
    # Range function only accepts integer values, to create list of values to evaluate will need to divide 
    # the list of 1000000 integers by (1/step_size)
    cdef list values0 = list(range(0,1000001))
    # Initialize variable that will keep results of x**2
    cdef list values_squared = []
    for index, value in enumerate(values0):
        values0[index] = value / (1/step_size)
        values_squared.append(values0[index]**2)
    # Calculate values of x**2 using Euler method and y'=2x
    # Start with initializing the variable that will contain those results
    cdef list values_euler = [(2*values0[0]*step_size)]
    for index, value in enumerate(values0[1:]):
        values_euler.append(values_euler[index-1]+(2*values0[index]*step_size))
    return abs(values_squared[-1] - values_euler[-1])

In [None]:
%%timeit
# Run the errorEuler function with different step sizes to find the one that returns a final difference lower 
# than 1e-5
# Initialize the difference value
difference0 = 1
# Initialize step size
step_size = 1
# Use a while loop to decrease step_size until we are below the desired difference
while difference0 > 1e-5:
    difference0 = errorEuler_cython(step_size)
    step_size /= 10
print(step_size*10)

Maybe the first time was slow while doing the initial compilation?

In [None]:
%%timeit
# Run the errorEuler function with different step sizes to find the one that returns a final difference lower 
# than 1e-5
# Initialize the difference value
difference0 = 1
# Initialize step size
step_size = 1
# Use a while loop to decrease step_size until we are below the desired difference
while difference0 > 1e-5:
    difference0 = errorEuler_cython(step_size)
    step_size /= 10
print(step_size*10)

### Numpy version

In [None]:
import numpy as np

In [None]:
def errorEuler_numpy(step_size):
    # Range function only accepts integer values, to create list of values to evaluate will need to divide 
    # the list of 1000000 integers by (1/step_size)
    values0 = np.arange(0,1000001) / (1/step_size)
    # Initialize variable that will keep results of x**2
    values_squared = values0**2
    # Calculate values of x**2 using Euler method and y'=2x
    # Start with initializing the variable that will contain those results
    values_euler = [(2*values0[0]*step_size)]
    for index, value in enumerate(values0[1:]):
        values_euler.append(values_euler[index-1]+(2*values0[index]*step_size))
    return abs(values_squared[-1] - values_euler[-1])

In [None]:
%%timeit
# Run the errorEuler function with different step sizes to find the one that returns a final difference lower 
# than 1e-5
# Initialize the difference value
difference0 = 1
# Initialize step size
step_size = 1
# Use a while loop to decrease step_size until we are below the desired difference
while difference0 > 1e-5:
    difference0 = errorEuler_numpy(step_size)
    step_size /= 10
print(step_size*10)

### Scipy version

In [None]:
from scipy.integrate import ode

In [None]:
# define a function with the differential equation
# y=x**2  y'=2x  x(0)=0  y(0)=0
def EulerSquare(t, y): # Use t instead of x
    return 2*t

# initial conditions
y0 = 0
x0 = 0

# Create ODE object
solver = ode(EulerSquare)
solver.set_initial_value(y0, x0)
solver.t, solver.y

To solve the differential equation for a given value we simply use the integrate method with the value as input

In [None]:
solver.integrate(20)
solver.t, solver.y

*Note: solver.y is an np.ndarray*  
  
solver.integrate() does not accept a list or np.array as input. For these reason we need a for loop to plug-in each  
of the 1000000 values we want to evaluate

In [None]:
# Define the errorEuler function for scipy
def errorEuler_scipy(step_size):
    # Range function only accepts integer values, to create list of values to evaluate will need to divide 
    # the list of 1000000 integers by (1/step_size)
    values0 = np.arange(0,1000001) / (1/step_size)
    # Initialize variable that will keep results of x**2
    values_squared = values0**2
    # Calculate values of x**2 using Euler solver
    # Start with initializing the variable that will contain those results
    values_euler = [solver.y]
    for value in values0[1:]:
        values_euler.append(solver.integrate(value))
    return abs(values_squared[-1] - values_euler[-1])

We will only run one iteration of the integration, and display the difference between the calculated values for the last element

In [None]:
%%timeit
solver.set_initial_value(y0, x0)
print(errorEuler_scipy(1))

Notice how the algorithm returns more exact results, but is fairly slow

In [None]:
%%timeit
solver.set_initial_value(y0, x0)
print(errorEuler_scipy(0.0001))

In [None]:
%%timeit
solver.set_initial_value(y0, x0)
solver.set_integrator('dopri5') # Runge-Kutta method of order 4(5)
print(errorEuler_scipy(1))

### Numba version

In [None]:
from numba import jit

In [None]:
# Make Numba version of errorEuler
errorEuler_numba=jit(errorEuler)

In [None]:
%%timeit
# Run the errorEuler function with different step sizes to find the one that returns a final difference lower 
# than 1e-5
# Initialize the difference value
difference0 = 1
# Initialize step size
step_size = 1
# Use a while loop to decrease step_size until we are below the desired difference
while difference0 > 1e-5:
    difference0 = errorEuler_numba(step_size)
    step_size /= 10
print(step_size*10)

Note: Numba sometimes have problems compiling empty lists

In [None]:
# Make Numba version of errorEuler_numpy
errorEuler_numpy_numba=jit(errorEuler_numpy)

In [None]:
%%timeit
# Run the errorEuler function with different step sizes to find the one that returns a final difference lower 
# than 1e-5
# Initialize the difference value
difference0 = 1
# Initialize step size
step_size = 1
# Use a while loop to decrease step_size until we are below the desired difference
while difference0 > 1e-5:
    difference0 = errorEuler_numpy_numba(step_size)
    step_size /= 10
print(step_size*10)

### Parallel processing version  

Note: Need to start engines in the terminal:  
$ ipcluster start

In [None]:
from ipyparallel import Client

In [None]:
rc=Client() # Create ipyparallel.Client instance
v=rc[:]     # Create a view of the instance that includes all cores
rc.ids      # Returns identities of all the cores the instance has access to

In [None]:
def errorEuler_parallel(step_size):
    # Range function only accepts integer values, to create list of values to evaluate will need to divide 
    # the list of 1000000 integers by (1/step_size)
    values0 = list(range(0,1000001))
    # Initialize variable that will keep results of x**2
    values_squared = []
    for index, value in enumerate(values0):
        values0[index] = value / (1/step_size)
        values_squared.append(values0[index]**2)
    # Calculate values of x**2 using Euler method and y'=2x
    # Start with initializing the variable that will contain those results
    values_euler = [(2*values0[0]*step_size)]
    for index, value in enumerate(values0[1:]):
        values_euler.append(values_euler[index-1]+(2*values0[index]*step_size))
    return abs(values_squared[-1] - values_euler[-1])

Asynchronous execution

In [None]:
%%timeit
# Run the errorEuler function with different step sizes to find the one that returns a final difference lower 
# than 1e-5
# Initialize the difference value
difference0 = 1
# Initialize step size, has to a list of 4 values so that errorEuler function is run with a different one in each
# of the 4 cores available
# Because of overhead in changing values of a list (since we are not using Numpy) the final print*10 is not feasible
# Better to initialize step_sizes with bigger values and first step in the while loop is to divide by 10000
step_size = [10000, 1000, 100, 1]  
# Use a while loop to decrease step_size until we are below the desired difference
while difference0 > 1e-5:
    for index, value in enumerate(step_size):
        step_size[index] = value/10000
    asynch_job=v.map(errorEuler_parallel, step_size) # Run function in each core with a different value from step_size
    asynch_results = asynch_job.get()                # Collect results from each core
    difference0 = min(asynch_results)
print(step_size)

Optimize the while loop with Numpy

In [None]:
%%timeit
# Run the errorEuler function with different step sizes to find the one that returns a final difference lower 
# than 1e-5
# Initialize the difference value
difference0 = 1
# Initialize step size, has to a list of 4 values so that errorEuler function is run with a different one in each
# of the 4 cores available
step_size = np.array([1, 0.1, 0.001, 0.0001])
# Use a while loop to decrease step_size until we are below the desired difference
while difference0 > 1e-5:
    asynch_job=v.map(errorEuler_parallel, step_size) # Run function in each core with a different value from step_size
    asynch_results = asynch_job.get()                # Collect results from each core
    difference0 = min(asynch_results)
    step_size /= 10000
print(step_size*10000)

Use Numpy also in the function

In [None]:
%px import numpy as np # %px is how we execute a command in the individual cores. In this form executes in all cores
                       # for running a command in only core 1 precede the "%px command" line by: 
                       # %px --targets 1                Can also use slice indexing, e.g. --targets [1:2]
                       # To execute code blocks use:
                       # %%px --targets n
                       #     commands

In [None]:
def errorEuler_parallel_numpy(step_size):
    # Range function only accepts integer values, to create list of values to evaluate will need to divide 
    # the list of 1000000 integers by (1/step_size)
    values0 = np.arange(0,1000001) / (1/step_size)
    # Initialize variable that will keep results of x**2
    values_squared = values0**2
    # Calculate values of x**2 using Euler method and y'=2x
    # Start with initializing the variable that will contain those results
    values_euler = [(2*values0[0]*step_size)]
    for index, value in enumerate(values0[1:]):
        values_euler.append(values_euler[index-1]+(2*values0[index]*step_size))
    return abs(values_squared[-1] - values_euler[-1])

In [None]:
%%timeit
# Run the errorEuler function with different step sizes to find the one that returns a final difference lower 
# than 1e-5
# Initialize the difference value
difference0 = 1
# Initialize step size, has to a list of 4 values so that errorEuler function is run with a different one in each
# of the 4 cores available
step_size = np.array([1, 0.1, 0.001, 0.0001])
# Use a while loop to decrease step_size until we are below the desired difference
while difference0 > 1e-5:
    asynch_job=v.map(errorEuler_parallel_numpy, step_size) # Run function in each core w/ different val from step_size
    asynch_results = asynch_job.get()                      # Collect results from each core
    difference0 = min(asynch_results)
    step_size /= 10000
print(step_size*10000)

Synchronous execution

In [None]:
%%timeit
# Run the errorEuler function with different step sizes to find the one that returns a final difference lower 
# than 1e-5
# Initialize the difference value
difference0 = 1
# Initialize step size, has to a list of 4 values so that errorEuler function is run with a different one in each
# of the 4 cores available
step_size = np.array([1, 0.1, 0.001, 0.0001])
# Use a while loop to decrease step_size until we are below the desired difference
while difference0 > 1e-5:
    synch_results=v.map_sync(errorEuler_parallel_numpy, step_size) # Results don't need to be collected separately
    difference0 = min(synch_results)
    step_size /= 10000
print(step_size*10000)

In [None]:
# Make Numba version of errorEuler_parallel_numpy
errorEuler_parallel_numpy_numba=jit(errorEuler_parallel_numpy)

In [None]:
%%timeit
# Run the errorEuler function with different step sizes to find the one that returns a final difference lower 
# than 1e-5
# Initialize the difference value
difference0 = 1
# Initialize step size, has to a list of 4 values so that errorEuler function is run with a different one in each
# of the 4 cores available
step_size = np.array([1, 0.1, 0.001, 0.0001])
# Use a while loop to decrease step_size until we are below the desired difference
while difference0 > 1e-5:
    asynch_job=v.map(errorEuler_parallel_numpy_numba, step_size) # Run function in each core w/ different val from step_size
    asynch_results = asynch_job.get()                      # Collect results from each core
    difference0 = min(asynch_results)
    step_size /= 10000
print(step_size*10000)