In [1]:
%load_ext Cython
import os,tempfile
os.chdir(tempfile.mkdtemp())
tempfile.mkstemp()

(57, '/var/folders/r7/n9dny1wj46q8njz2gds66kqr0000gp/T/tmp_g68zgzx')

# pyx compilation

put Cython codes into a file `cythoncode.pyx`

create `setup.py`, either
1. with default `cythonize`:

```python
from distutils.core import setup
from Cython.Build import cythonize
setup( name="cythoncode", ext_modules = cythonize("cythoncode.pyx") )
```

2. With custom C compilation flags:

```python
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext

setup(  name = "cythoncode",
        cmdclass = {"build_ext": build_ext},
        ext_modules = [ Extension("cythoncode",
                                  sources=["cythoncode.pyx"],
                                  language='c++',   #using C++
                                  libraries=["m"],  #for using C's math lib
                                  extra_compile_args = ["-ffast-math"])])
```

3. With `cimport numpy`:

```python
import os
import numpy
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext

setup(cmdclass = {'build_ext': build_ext},
      name="cythoncode",
      ext_modules=[Extension("cythoncode",
                             sources=["cythoncode.pyx"])],
      include_dirs=[numpy.get_include(),
                    os.path.join(numpy.get_include(), 'numpy')])
```

To compile, either
1. in commandline, run `python setup.py build_ext --inplace`;
2. in main program, `import pyximport; pyximport.install()` (don't need `setup.py`)

To load the built module in the main program, `from cythoncode import *`

In [5]:
%%file cythoncode.pyx
# cython: language_level=3, boundscheck=False
## directives above: indicate python3 syntax, disable boundscheck for arrays
from libc.math cimport pow, log

cdef double square_and_add (double x): #cdef function can only be called from Cython, but not from Python.
    return log(x)

cpdef print_result (double x):         #cpdef function can be called from Python; def only python
    print(f"({x} ^ 2) + {x} = {square_and_add(x)}")

Overwriting cythoncode.pyx


In [6]:
%%file setup.py
from distutils.core import Extension, setup
from Cython.Build import cythonize
from Cython.Distutils import build_ext
ext = Extension(name="cythoncode", sources=["cythoncode.pyx"])  # define an extension that will be cythonized and compiled
setup(
#     ext_modules=cythonize(ext)
    ext_modules=[ ext ],cmdclass = {'build_ext': build_ext}
     )

Overwriting setup.py


In [8]:
# import pyximport; pyximport.install()  #either this
!python3 setup.py build_ext --inplace    #or this
from cythoncode import print_result
print_result(3)

running build_ext
cythoning cythoncode.pyx to cythoncode.c
building 'cythoncode' extension
creating build
creating build/temp.macosx-10.7-x86_64-3.7
gcc -Wno-unused-result -Wsign-compare -Wunreachable-code -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -I/Users/hoi/opt/include -arch x86_64 -I/Users/hoi/opt/include -arch x86_64 -I/Users/hoi/opt/include/python3.7m -c cythoncode.c -o build/temp.macosx-10.7-x86_64-3.7/cythoncode.o
gcc -bundle -undefined dynamic_lookup -L/Users/hoi/opt/lib -arch x86_64 -L/Users/hoi/opt/lib -arch x86_64 -arch x86_64 build/temp.macosx-10.7-x86_64-3.7/cythoncode.o -o /private/var/folders/r7/n9dny1wj46q8njz2gds66kqr0000gp/T/tmphur_clfu/cythoncode.cpython-37m-darwin.so
(3.0 ^ 2) + 3.0 = 1.0986122886681098


## `pxd` header file
if the code is to be `cimport`ed by another cython file, need to put declarations (of `cpdef` and `cdef` methods/variables) in a same-name pxd file and implementation only in pyx

In [None]:
%%file mcts.pxd
# cython: language_level=3
cdef class Node:
    cdef public int visits
    cdef public double wins
    cdef public dict childNodes # {move:nextnode} the move that get *into* next node
    cdef public int viewpoint
    cpdef int UCTSelectChild(self,double explore=?)
    cpdef void AddChild(self, int move, n)
    cpdef void Update(self, double result)

In [None]:
%%file mcts.pyx

cdef class Node:
    def __init__(self, Game game):
        self.wins = 0. 
        self.visits = 0
        self.childNodes={}
        self.viewpoint = 3-game.playerToMove
        
    cpdef int UCTSelectChild(self,double explore=1):
        return bestmove
    
    cpdef void AddChild(self, int move, n):
        self.childNodes[move] = n
    
    cpdef void Update(self, double result):
        self.visits += 1
        self.wins += result # game results in the range [0.0, 1.0]

## [in notebook](https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#compiling-with-a-jupyter-notebook)

* add `%%cython` in cell defining cython functions (those in `.pyx`), then the functions can be used as normal (treat each cell with `%%cython` as a file)
* `%%cython -+` for codes in C++

## `cython` function that works even if `gcc` compilation fails

In [83]:
%%file particle_extern.cpp
#include "particle_extern.h"

Particle::Particle(float m, float c, float *p, float *v) :
    mass(m), charge(c) 
{
    for (int i=0; i<3; ++i) {
        pos[i] = p[i]; vel[i] = v[i];
    }
}

void Particle::applyImpulse(float *f, float t)
{
    float newvi;
    for(int i=0; i<3; ++i) {
        newvi = vel[i] + t / mass * f[i];
        pos[i] = (newvi + vel[i]) * t / 2.;
        vel[i] = newvi;
    }
}


Writing particle_extern.cpp


In [2]:
%%cython
import cython
@cython.locals(n=cython.int)
def fib_pure_python(n):
    cython.declare(a=cython.int, b=cython.int, i=cython.int)
    a, b = 1, 1
    for i in range(n):
        a, b = a + b, a
    return a

# using C/C++ libraries
for standard [C libraries](https://github.com/cython/cython/tree/master/Cython/Includes/libc) or [C++ libraries](https://github.com/cython/cython/tree/master/Cython/Includes/libcpp):
```python 
from libc.math cimport sqrt```

For custom library (in header file `custom.h`):
```cython
cdef extern from "custom.h":
    double sin(double x)
```
To allow it to be used by other python codes, add `cpdef` before `double sin`



*    `def` - Python, Cython 
*    `cdef` - faster, Cython only, can use pointers
*    `cpdef` - faster, Python, Cython, no pointers


## [Using std C functions](https://cython.readthedocs.io/en/latest/src/tutorial/external.html)

In [61]:
%%cython
from libc.stdlib cimport atoi
cdef parse_charptr_to_py_int(char* s):
    assert s is not NULL, "byte string value is NULL"
    return atoi(s)  # note: atoi() has no error detection!

access C code function:

In [69]:
%%cython
cdef extern from "math.h":
    cpdef double sin(double)

In [70]:
type(sin)

cython_function_or_method

Given name to the signature declaration allows calling the function with keyword arguments (cdef function):

In [81]:
%%cython
cdef extern from "string.h":
    char* strstr(const char *haystack, const char *needle)

cdef char* data = "hfvcakdfagbcffvschvxcdfgccbcfhvgcsnfxjh"

cdef char* pos = strstr(needle='akd', haystack=data)
print(pos is not NULL)

## STL

template argument in the [] brackets

In [None]:
%%cython -I . -+ -a
cdef extern from "<algorithm>" namespace "std":
    iter std_find "std::find" [iter, T](iter first, iter last, const T& val)
cdef extern from "<algorithm>" namespace "std":
    iter std_random_shuffle "std::random_shuffle" [iter](iter first, iter last)
# std_find[vector[int].iterator, int](vec.begin(), vec.end(), m)
# std_random_shuffle[vector[int].iterator]( vec.begin(), vec.end() )

## [Using self-defined C classes (externs)](https://cython.readthedocs.io/en/latest/src/userguide/wrapping_CPlusPlus.html)

In [None]:
https://cython.readthedocs.io/en/latest/src/tutorial/cdef_classes.html

## [define class in cython](https://cython.readthedocs.io/en/latest/src/tutorial/cdef_classes.html)
*        All attributes must be pre-declared at compile-time
*        Attributes are by default only accessible from Cython (typed access)
*        Properties can be declared to expose dynamic attributes to Python-space

If you don’t do anything special, a function declared with cdef that does not return a Python object has no way of reporting Python exceptions to its caller. If an exception is detected in such a function, a warning message is printed and the exception is ignored.

*  ```cython
cdef int spam() except -1: ...
``` 
indicates that if -1 is returned by `spam`, an error has occured 
*  ```cython
cdef int spam() except? -1: ...
``` 
indicates that if -1 is returned by `spam`, Cython should call `PyErr_Occurred()` to make sure it really is an error.
*  ```cython
cdef void spam() except *: ...
``` 
indicates Cython should call `PyErr_Occurred()` after every call, to make sure it really is an error.
*  ```cython
cdef int spam() except +
```
for external C++ function that may raise an exception

In [58]:
%%cython -+
from libc.math cimport sin

cdef class Function_cy:
    cpdef double evaluate(self, double x) except *:
        return 0

cdef class SinOfSquareFunction_cy(Function_cy):             #inherits just to demonstrate overriding
    cpdef double evaluate(self, double x) except *:
        return sin(x ** 2)
    
def integrate_cy(Function_cy f, double a, double b, int N):
    cdef int i
    cdef double s, dx
    dx = (b - a) / N
    if f is None: raise ValueError("f cannot be None") #Since the argument is typed, we need to check whether it is None. In Python, this would have resulted in an AttributeError when the evaluate method was looked up, but Cython would instead try to access the (incompatible) internal structure of None as if it were a Function, leading to a crash or data corruption.
    s = 0
    for i in range(N):
        s += f.evaluate(a + i * dx)
    return s * dx

In [59]:
from math import sin
class SinOfSquareFunction(Function_cy):  #must interit from this to pass to integrate_cy
    def evaluate(self, x):
        return sin(x ** 2)
    
def integrate(f, a, b, N):
    dx = (b - a) / N
    s=0
    for i in range(N):
        s += f.evaluate(a + i * dx)
    return s * dx

In [60]:
%timeit integrate_cy(SinOfSquareFunction_cy(), 0, 1, 10000)
%timeit integrate_cy(SinOfSquareFunction(), 0, 1, 10000)   # pass python-function to cython integrate
%timeit integrate(SinOfSquareFunction_cy(), 0, 1, 10000)   # pass Cy-function to python integrate
%timeit integrate(SinOfSquareFunction(), 0, 1, 10000)

131 µs ± 8.54 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.82 ms ± 250 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.34 ms ± 95.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.39 ms ± 143 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


public/private attributes:

In [None]:
%%cython
cdef class WaveFunction(Function):

    # Not available in Python-space:
    cdef double offset

    # Available in Python-space:
    cdef public double freq

    # Available in Python-space, but only for reading:
    cdef readonly double scale

    # Available in Python-space:
    @property
    def period(self):
        return 1.0 / self.freq
    @period.setter
    def period(self, value):
        self.freq = 1.0 / value

# Examples

* [Compiler directives](https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#compiler-directives): add `@cython.xxx(False)` to turn on/off checks (must `cimport cython` first)
* `from libc.math cimport pow` for importing C's lib; `from libcpp.vector cimport vector` for importing C++'s lib

In [2]:
%%cython
cimport cython
@cython.cdivision(False)
def primes_c(int nb_primes):
    cdef int n=2, len_p=0, i   #variales defined in cdef are C's objects (cannot be returned)
    cdef int p[1000]           #C-static array (cannot be p[nb_primes])

    while len_p < nb_primes:
        for i in p[:len_p]:
            if n % i == 0:
                break
        else:
            p[len_p] = n
            len_p += 1
        n += 1

    return [prime for prime in p[:len_p]]    #  return as python list
#     return result_as_list

In [3]:
%%cython -+
from libcpp.vector cimport vector
cimport cython

def primes_cpp(unsigned int nb_primes):
    cdef int n=2, i
    cdef vector[int] p
    p.reserve(nb_primes)

    while p.size() < nb_primes:
        for i in p:
            if n % i == 0:
                break
        else:
            p.push_back(n)
        n += 1
    return p

In [44]:
%%cython
def primes_cy(nb_primes):  #cythonize a python code without adapting variables to C
    p=[]
    n=2
    while len(p) < nb_primes:
        for i in p:
            if n % i == 0:
                break
        else:
            p+=n,
        n += 1
    return p

In [45]:
def primes(nb_primes):
    p=[]
    n=2
    while len(p) < nb_primes:
        for i in p:
            if n % i == 0:
                break
        else:
            p+=n,
        n += 1
    return p
%timeit primes_c(1000)
%timeit primes_cpp(1000)
%timeit primes_cy(1000)
%timeit primes(1000)

1.61 ms ± 12 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.63 ms ± 30.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
11.9 ms ± 48 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
20.7 ms ± 94 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
%%cython
from libc.stdlib cimport rand
cpdef int test():
    return rand()

In [10]:
test()

596516649

# openmp

In [13]:
%%cython

from cython.parallel import prange

cdef int i
cdef int n = 12345678
cdef int sum = 0

for i in prange(n, nogil=True):
    sum += i

print(sum)

-2023233221


# Using [Numpy](http://cython.readthedocs.io/en/latest/src/userguide/numpy_tutorial.html)

```cython
cimport numpy as np```

in `setup.py`
```python
import os
import numpy
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext

setup(cmdclass = {'build_ext': build_ext},
      name="cythoncode",
      ext_modules=[Extension("cythoncode",
                             sources=["cythoncode.pyx"])],
      include_dirs=[numpy.get_include(),
                    os.path.join(numpy.get_include(), 'numpy')])
```

## Pairwise Distances (numpy list)

In [3]:
import numpy as np
X = np.random.random((500, 3))

In [4]:
def pairwise_v1(X):
    X = np.asarray(X)    
    n_samples, n_dim = X.shape
    D = np.empty((n_samples, n_samples))

    for i in range(n_samples):
        for j in range(n_samples):
            D[i, j] = np.sqrt(np.sum((X[i] - X[j]) ** 2))
    return D

In [5]:
%%cython
import numpy as np
cimport numpy as np
from libc.math cimport sqrt
cimport cython

#no bounds check for array; disable negative (count from end) indexing
@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v2(np.ndarray[double, ndim=2, mode='c'] X not None): #mode='c' indicates C-ordered (contiguous in memory)
    
    cdef np.intp_t i, j, n_samples, N # np.intp: Integer used for indexing (unsigned). _t means the type of it
    cdef double tmp, d    
    n_samples = X.shape[0]
    N = X.shape[1]

    cdef np.ndarray[double, ndim=2, mode='c'] D = np.empty((n_samples, n_samples))
    for i in range(n_samples):
        for j in range(n_samples):
            d = 0
            for k in range(N):
                tmp = X[i,k] - X[j,k]
                d += tmp * tmp
            D[i, j] = sqrt(d)

    return D

[Typed Memoryview](http://cython.readthedocs.io/en/latest/src/userguide/memoryviews.html) - faster passing arrays as arguments

In [6]:
%%cython
import numpy as np
cimport numpy as np
from libc.math cimport sqrt
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v3(double[:, ::1] X not None): # ::1 means 2nd dim inc one element apart in memory. can be replaced by : since np array is of this form
    
    cdef np.intp_t i, j, n_samples, N
    cdef double tmp, d    
    n_samples = X.shape[0]
    N = X.shape[1]

    cdef double[:, ::1] D = np.empty((n_samples, n_samples))
    for i in range(n_samples):
        for j in range(n_samples):
            d = 0
            for k in range(N):
                tmp = X[i,k] - X[j,k]
                d += tmp * tmp
            D[i, j] = sqrt(d)

    return np.asarray(D)  #convert from memory view back to ordinary numpy array

In [7]:
%timeit pairwise_v1(X)
%timeit pairwise_v2(X)
%timeit pairwise_v3(X)

1.61 s ± 106 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
870 µs ± 35.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
771 µs ± 29.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
