In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
%load_ext Cython
%timeit

import cython
import numpy as np
import matplotlib
matplotlib.style.use('ggplot')
import pandas


In [None]:
cython.__version__

# Example 1

In [None]:
def vectorized_sum_allocating_array(n=1_000_000):
    x = np.arange(n)
    return np.sum(x)

In [None]:
vectorized_sum_allocating_array(n=1_000_000)

In [None]:
%timeit vectorized_sum_allocating_array(n=1_000_000)

In [None]:
def sum_up_to_n(n=1_000_000):
    s = 0
    for i in range(n):
        s +=i
    return s

In [None]:
%timeit sum_up_to_n(1_000_000)

In [None]:
%%cython 

cpdef long sum_up_to_n_cython(long n):
    cdef:
        long s = 0
        long i

    for i in range(n):
        s +=i
    return s

In [None]:
%timeit sum_up_to_n_cython(1_000_000)

In [None]:
%timeit sum_up_to_n_cython(100_000_000)

In [None]:
sum_up_to_n_cython(100_000_000)

#### Pure python mode

In [None]:

def sum_up_to_n_cython_pure_python(n: cython.long):
    s: cython.long = 0
    n: cython.long 
    i: cython.long 
                
    for i in range(n):
        s +=i
    return s

In [None]:
sum_up_to_n_cython_pure_python(1_000_000)

In [None]:
py_time = %timeit -o sum_up_to_n_cython_pure_python(1_000_000)

In [None]:
%%cython

import cython

def sum_up_to_n_cython_pure_python(n: cython.long):
    s: cython.long = 0
    n: cython.long 
    i: cython.long 
        
    for i in range(n):
        s +=i
    return s

In [None]:
sum_up_to_n_cython_pure_python(1_000_000)

In [None]:
cy_time = %timeit -o sum_up_to_n_cython_pure_python(1_000_000)

Another way to do it

In [None]:
%%cython 
import cython

@cython.returns(cython.long)
@cython.locals(s=cython.long, i=cython.long, n=cython.long)
def sum_up_to_n_cython_pure_python(n):
    s = 0
    for i in range(n):
        s +=i
    return s

In [None]:
sum_up_to_n_cython_pure_python(1_000_000)

In [None]:
cy_time2 = %timeit -o sum_up_to_n_cython_pure_python(1_000_000)

In [None]:
py_time.average / cy_time.average 

In [None]:
py_time.average, cy_time.average 

In [None]:
py_time.average/ cy_time.average 

## Example 2

In [None]:
n_samples = 1_000_000
n_queries = 1
n_features = 10

query_vector = np.random.random((1, n_features))
X = np.random.random((n_samples,n_features))
X.shape, query_vector.shape

In [None]:
def euclidean_naive(x,B):
    return np.sqrt(np.sum((x-B)**2,axis=1))

In [None]:
%timeit euclidean_naive(query_vectors, X)

In [None]:
euclidean_naive(query_vectors, X)

In [None]:
xB = np.empty(X.shape)

def euclidean(x,B):
    result = np.empty(B.shape[0])
    np.subtract(x,B,out=xB)
    np.square(xB,out=xB)
    np.sum(xB,axis=1,out=result)
    np.sqrt(result,out=result)
    return result

In [None]:
%timeit euclidean_vectorized(query_vector, X, xB)

In [None]:
?np.sum

In [None]:
%%cython 
import numpy as np
import cython 
from libc.math cimport pow, sqrt

@cython.boundscheck(False)  # Deactivate bounds checking
def cy_euclidean(double[:,:] q,double[:,:] X):
    cdef int n_samples = X.shape[0]
    cdef int n_features = q.shape[1]
    cdef double res=0
    cdef double[:] result = np.zeros(len(X), dtype="double")

    for m in range(n_samples):
        res = 0.
        for i in range(n_features):
            res += pow(q[0,i]- X[m, i],2)
        result[m] = sqrt(res)
    
    return np.array(result)

In [None]:
cy_time = %timeit -o cy_euclidean(query_vectors, X)

In [None]:
py_distances = euclidean_naive(query_vectors, X)
cy_distances = cy_euclidean(query_vectors, X)
np.testing.assert_allclose(py_distances,cy_distances, rtol=1e-6, atol=0.00)

## Example 3

In [59]:
import scipy.sparse as sp
import numpy as np
np.random.seed(123)
n_features =1000
n_samples = 100

X = sp.random(n_samples, n_features, density=0.01, format='csr')

In [60]:
X.nnz

1000

In [61]:
%%timeit
s = sp.csr_matrix(X.sum(axis=0, dtype=np.int32))

156 µs ± 1.2 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [62]:
s = sp.csr_matrix(X.sum(axis=0, dtype=np.int32))
s.nnz, s.shape

(170, (1, 1000))

In [80]:
def efficient_mean_over_rows(X_sparse):
    new_data = []
    unique_indices = np.unique(X_sparse.indices)
    for k in unique_indices:
        val = X_sparse.data[X_sparse.indices==k].sum()
        new_data.append(val)
        
    X_sum = sp.csr_matrix((new_data, unique_indices, [0,len(new_data)]))
    return X_sum

In [64]:
t1 = %timeit -o efficient_mean_over_rows(X)

2.71 ms ± 34.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [65]:
t2 = %timeit -o sp.csr_matrix(X.sum(axis=0, dtype=np.int32))

156 µs ± 1.73 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [74]:
t2.average / t1.average

0.05769121182082981

In [81]:
r1 = sp.csr_matrix(X.sum(axis=0, dtype=np.int32))
r2 = efficient_mean_over_rows(X)

In [82]:
r1

<1x1000 sparse matrix of type '<class 'numpy.int32'>'
	with 170 stored elements in Compressed Sparse Row format>

In [83]:
r2

<1x999 sparse matrix of type '<class 'numpy.float64'>'
	with 632 stored elements in Compressed Sparse Row format>

In [204]:
aux = sp.csr_matrix([[1,0,0,1,0,0,0,0,4],[3,0,0,2,0,0,0,0,1]])
aux.toarray()

array([[1, 0, 0, 1, 0, 0, 0, 0, 4],
       [3, 0, 0, 2, 0, 0, 0, 0, 1]])

In [205]:
aux.data

array([1, 1, 4, 3, 2, 1])

In [206]:
aux.indices

array([0, 3, 8, 0, 3, 8], dtype=int32)

In [207]:
aux.indptr

array([0, 3, 6], dtype=int32)

Note that the resulting matrix of aggregating the nonzero values squashing all rows into one will have at index K the sum of all values in `.data` that have index K.

- For index 0 the result will be adding 1+3
- For index 3 the result will be 2
-For index 8 the result will be 5

In [104]:
unique_indices = np.unique(aux.indices)
unique_indices

array([0, 3, 8], dtype=int32)

In [105]:
new_data = []
for k in unique_indices:
    val = aux.data[aux.indices==k].sum()
    new_data.append(val)

In [106]:
new_data

[4, 2, 5]

In [107]:
aux_mean = sp.csr_matrix((new_data, unique_indices, [0,len(new_data)]))
aux_mean

<1x9 sparse matrix of type '<class 'numpy.int64'>'
	with 3 stored elements in Compressed Sparse Row format>

In [117]:
aux_mean.todense()

matrix([[4, 0, 0, 2, 0, 0, 0, 0, 5]])

In [192]:
from collections import defaultdict
def efficient_mean_over_rows(X_sparse):
    
    s = defaultdict(int)

    for i,d in zip(X_sparse.indices, X_sparse.data):
        s[i] += d
        
    new_data = list(s.values())
    
    X_sum = sp.csr_matrix((new_data, list(s.keys()), [0,len(new_data)]))
    return X_sum

In [147]:
t1 = %timeit -o efficient_mean_over_rows(X)

354 µs ± 853 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [151]:
t2 = %timeit -o sp.csr_matrix(X.sum(axis=0))

170 µs ± 883 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [171]:
efficient_mean_over_rows(X)

<1x999 sparse matrix of type '<class 'numpy.float64'>'
	with 632 stored elements in Compressed Sparse Row format>

In [172]:
sp.csr_matrix(X.sum(axis=0))

<1x1000 sparse matrix of type '<class 'numpy.float64'>'
	with 632 stored elements in Compressed Sparse Row format>

In [165]:
sp.csr_matrix(aux.sum(axis=0))

<1x9 sparse matrix of type '<class 'numpy.int64'>'
	with 3 stored elements in Compressed Sparse Row format>

In [166]:
efficient_mean_over_rows(aux)

<1x9 sparse matrix of type '<class 'numpy.int64'>'
	with 3 stored elements in Compressed Sparse Row format>

In [220]:
from collections import defaultdict
def efficient_mean_over_rows2(X_sparse):
    
    s = defaultdict(int)
    indices = X_sparse.indices
    data = X_sparse.data
    
    unique_data_indices = np.unique(indices)
    new_data = np.zeros(len(unique_data_indices))
    index_to_pos = {index:k for k,index in enumerate(unique_data_indices)}
    
    for i,d in zip(indices, data):
        new_data[index_to_pos[i]] += d 
        
    
    X_sum = sp.csr_matrix((new_data, unique_data_indices, [0,len(new_data)]))
    return X_sum

In [221]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [223]:
%timeit efficient_mean_over_rows2(X)

383 µs ± 3.52 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## Example 4

In [None]:
ids = [str(i) for i in range(1_000_000)]
q = '900000'

In [None]:
ids.index(q)

In [None]:
%%cython -a
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef int index_cython(list l, str q):
    cdef:
        int pos = 0, k = 0
        int n_l = len(l)
        str u

    for k in range(n_l):
        u = l[k]
        if u == q:
            return pos
        else:
            pos = pos +1
    raise ValueError

In [None]:
%timeit index_cython(ids,q)

In [None]:
%timeit ids.index(q)

## Example 4

In [None]:
def dart():
    x,y = np.random.random(), np.random.random()
    return (x*x + y*y <= 1)

def pi(n):
    dart_counts = sum((dart() for _ in range(n)))
    return dart_counts*4/n

In [None]:
N = 100_000
py_time = %timeit -o pi(N)

In [None]:
def np_pi(n):
    x = np.random.random(n)
    y = np.random.random(n)
    dart_counts = (x*x + y*y <= 1).sum()
    
    return dart_counts*4/n

In [None]:
np_time = %timeit -o np_pi(N)

In [None]:
print(f'vectorized version is {round(py_time.average /np_time.average,1)}x faster')

In [None]:
%%cython -a
import numpy as np
import cython 

#@cython.boundscheck(False)  # Deactivate bounds checking
cpdef float cy_pi(int n):
    cdef:
        int counter = 0
        int flag
        float x, y

    for i in range(n):
        x = np.random.random()
        y = np.random.random()
        counter += x*x + y*y <= 1
        
    return counter * 4 / n

In [None]:
cy_time = %timeit -o cy_pi(N)

In [None]:
print(f'vectorized version is {round(py_time.average /np_time.average,1)}x faster')

In [None]:
%%cython -a
import numpy as np
import cython 
from libc.stdlib cimport rand, RAND_MAX

@cython.cdivision(True)
@cython.boundscheck(False)  # Deactivate bounds checking
@cython.wraparound(False)  # Deactivate bounds checking
cpdef float cy_pi(n: cython.int ):
    counter: cython.int  
    flag: cython.int 
    x: cython.float
    y: cython.float
    i: cython.int 
        
    for i in range(n):
        x = rand() / RAND_MAX 
        y = rand() / RAND_MAX 
        counter += x*x + y*y <= 1
        
    return counter * 4 / n



In [None]:
cy_time = %timeit -o cy_pi(N)

In [None]:
%%cython -a
import numpy as np
import cython 
from cython.parallel import prange
from libc.stdlib cimport rand, RAND_MAX

@cython.cdivision(True)
@cython.boundscheck(False)  # Deactivate bounds checking
@cython.wraparound(False)  # Deactivate bounds checking
cpdef float cy_par_pi(n: cython.int ):
    counter: cython.int  
    flag: cython.int 
    i: cython.int
    counter = 0
    
    cdef double[:] x=np.random.random(n)
    cdef double[:] y=np.random.random(n)

    for i in range(n):
        x_ = x[i]
        y_ = y[i]
        counter += x_*x_ + y_*y_ <= 1
        
    return counter * 4 / n

In [None]:
cy_time = %timeit -o cy_par_pi(N)