## Cython入門

### 計算向量集的距離矩陣

In [4]:
import numpy as np
np.random.seed(42)
X = np.random.rand(200, 3)

In [14]:
def pairwise_dist_python(X):
    m, n = X.shape
    D = np.empty((m, m), dtype=np.float)
    for i in xrange(m):
        for j in xrange(i, m):
            d = 0.0
            for k in xrange(n):
                tmp = X[i, k] - X[j, k]
                d += tmp * tmp
            D[i, j] = D[j, i] = d ** 0.5
    return D

In [15]:
from scipy.spatial.distance import pdist, squareform
%timeit squareform(pdist(X))
%timeit pairwise_dist_python(X)
np.allclose(squareform(pdist(X)), pairwise_dist_python(X))

1000 loops, best of 3: 443 μs per loop
10 loops, best of 3: 92.7 ms per loop


True

In [21]:
%%cython
import numpy as np

def pairwise_dist_cython(X):
    m, n = X.shape
    D = np.empty((m, m), dtype=np.float)
    for i in xrange(m):
        for j in xrange(i, m):
            d = 0.0
            for k in xrange(n):
                tmp = X[i, k] - X[j, k]
                d += tmp * tmp
            D[i, j] = D[j, i] = d ** 0.5
    return D

In [22]:
%timeit pairwise_dist_cython(X)
np.allclose(pairwise_dist_cython(X), pairwise_dist_python(X))

10 loops, best of 3: 72.9 ms per loop


True

In [11]:
%%cython
import numpy as np
import cython
from libc.math cimport sqrt

@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_dist_cython2(double[:, ::1] X):
    cdef int m, n, i, j, k
    cdef double tmp, d
    m, n = X.shape[0], X.shape[1]    
    cdef double[:, ::1] D = np.empty((m, m), dtype=np.float64)
    for i in range(m):
        for j in range(i, m):
            d = 0.0
            for k in range(n):
                tmp = X[i, k] - X[j, k]
                d += tmp * tmp
            D[i, j] = D[j, i] = sqrt(d)
    return np.asarray(D)

In [24]:
%timeit pairwise_dist_cython2(X)
np.allclose(pairwise_dist_cython2(X), pairwise_dist_python(X))

10000 loops, best of 3: 196 μs per loop


True

### 將Cython程式編譯成延伸模組

In [25]:
import sys
sys.modules[pairwise_dist_cython2.__module__]

<module '_cython_magic_f9e6211d48d0b874fa7ae6ce345d297b' from 'C:\Users\RY\Dropbox\scipybook2\settings\.ipython\cython\_cython_magic_f9e6211d48d0b874fa7ae6ce345d297b.pyd'>

> **TIP**

> 可以透過`%%cython`指令的`-n`參數指定編譯之後的延伸模組名，例如`%%cython –n fast_pdist`。

In [26]:
%%file setup_fast_pdist.py
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
import numpy as np

ext_modules = [
    Extension("fast_pdist", ["fast_pdist.pyx"],
        include_dirs = [np.get_include()]),    
]

setup(
  name = 'a faster version of pdist',
  cmdclass = {'build_ext': build_ext},
  ext_modules = ext_modules
)

Overwriting setup_fast_pdist.py


In [27]:
#%hide_output
!python setup_fast_pdist.py build_ext --inplace

running build_ext
skipping 'fast_pdist.c' Cython extension (up-to-date)


In [26]:
import fast_pdist
np.allclose(fast_pdist.pairwise_dist_cython2(X), pairwise_dist_python(X))

True

### C語系中的Python物件型態

In [3]:
%%language cpp
typedef struct {
    Py_ssize_t ob_refcnt;
    struct _typeobject *ob_type;
    double ob_fval;
} PyFloatObject;

In [28]:
import sys
sys.getsizeof(1.0)

16

In [29]:
%%cython -a
a = 1.0
b = 2.0
c = a + b

### 使用cdef關鍵字宣告變數型態

> **WARNING**

> 請注意這裡使用`cdef`定義的三個全局變數為C語系的全局變數，並不能在Python中透過編譯之後的延伸模組取得它們的值。

In [16]:
%%cython -a
cdef double a = 1.0
cdef double b = 2.0
cdef double c = a + b

In [27]:
%%cython -a

cdef double s = 0
a = 3.0
s = s + a #❶
s = s + <double>a #❷

In [22]:
%%cython -a
cdef list clist = [1000, 2, 3]
cdef int cindex = 0
clist[cindex] #❶

pylist = [1000, 2, 3]
pyindex = 0
pylist[pyindex] #❷

### 使用def定義函數

In [18]:
%%cython -a

def py_square_add(double x, double y):
    return x*x + y*y

In [25]:
%%cython -a

def sum_list(list alist): 
    cdef double s = 0
    cdef int i = 0
    for i in range(len(alist)):
        s += <double>alist[i]
    return s

### 使用cdef定義C語系函數

> **TIP**

> 若果不宣告`cdef`函數的傳回值型態，則其型態為Python物件。

In [23]:
%%cython -a
cdef double c_square_add(double x, double y):
    return x*x + y*y

cdef double a = c_square_add(1.0, 2.0)

In [24]:
%%cython -a
cpdef double cp_square_add(double x, double y):
    return x*x + y*y

cp_square_add(1.0, 2.0)