In [None]:
%load_ext cython

In [None]:
%%cython -a

import numpy as np
import pandas as pd

def f(x):
    return x ** 2 - x

def integrate_f(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx

def apply_integrate_f(col_a, col_b, col_N):
    n = len(col_N)
    res = np.empty(n,dtype=np.float64)
    for i in range(n):
        res[i] = integrate_f(col_a[i], col_b[i], col_N[i])
    return res


To pass our series data to the cython function(expecting `double[:]`) we pass the underlying NumPy array which works nicely with Cython.

In [None]:
df = pd.DataFrame({"a": np.random.randn(1000),
                  "b": np.random.randn(1000),
                  "N": np.random.randint(100, 1000, (1000))})

%timeit apply_integrate_f(df['a'].to_numpy(), df['b'].to_numpy(), df['N'].to_numpy())

Let's type annotate the function input parameters to get a first order performance gain:

In [None]:
%%cython -a

import numpy as np
import pandas as pd

def f(double x):
    return x ** 2 - x

def integrate_f(double a, double b, long N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx

def apply_integrate_f(double[:] col_a, double[:] col_b, long[:] col_N):
    n = len(col_N)
    res = np.empty(n,dtype=np.float64)
    for i in range(n):
        res[i] = integrate_f(col_a[i], col_b[i], col_N[i])
    return res

In [None]:
%%timeit

apply_integrate_f(df['a'].to_numpy(), df['b'].to_numpy(), df['N'].to_numpy())

Now we move on to type annotation to the functions. There are three ways of declaring functions:

`def` - Python style:

- Called by Python or Cython code, and both input/output are Python objects.

`cdef` - C style:

- Called from Cython and C, but not from Python code. Cython treats the function as pure C functions, which can take any type of arguments, including non-Python types, e.g. pointers.

`cpdef` - Python/C mixed:

- cpdef function combines both def and cdef. Generates a cdef function for C types and a def function for Python types. May be as fast as those using cdef and might be as slow as def declared functions.

In [None]:
%%cython -a

import numpy as np
import pandas as pd

cdef f(double x):
    return x ** 2 - x

cpdef integrate_f(double a, double b, long N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx

cpdef apply_integrate_f(double[:] col_a, double[:] col_b, long[:] col_N):
    n = len(col_N)
    res = np.empty(n,dtype=np.float64)
    for i in range(n):
        res[i] = integrate_f(col_a[i], col_b[i], col_N[i])
    return res

In [None]:
%timeit apply_integrate_f(df['a'].to_numpy(), df['b'].to_numpy(), df['N'].to_numpy())

The final step is type annotating variables in the function body

In [None]:
%%cython -a

import numpy as np
import pandas as pd

cdef double f(double x):
    return x ** 2 - x

cpdef double integrate_f(double a, double b, long N):
    cdef double s, dx
    cdef long i
    
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f(a + i * dx)
    return s * dx

cpdef double[:] apply_integrate_f(double[:] col_a, double[:] col_b, long[:] col_N):
    cdef long n, i
    cdef double[:] res

    n = len(col_N)
    res = np.empty(n,dtype=np.float64)
    for i in range(n):
        res[i] = integrate_f(col_a[i], col_b[i], col_N[i])
    return res

In [None]:
%timeit apply_integrate_f(df['a'].to_numpy(), df['b'].to_numpy(), df['N'].to_numpy())