<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Todo:-think-vectorization" data-toc-modified-id="Todo:-think-vectorization-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Todo: think vectorization</a></span></li></ul></div>

In [8]:
import cython
import numpy as np
%load_ext cython

The cython extension is already loaded. To reload it, use:
  %reload_ext cython


In [9]:
#INPUT:q_emb.shape:(19, 128)
#INPUT:self._embedding_matrix.shape:(52, 128)
#OUTPUT:euclidean_dist.shape:(19, 52)



In [14]:
Q = np.random.random((19, 128)) 
E = np.random.random((52, 128))
# output: (19,52)
# where row i contains d(Q[i],e) for each row e in E
euclidean_dist = np.linalg.norm(Q[:, None, :] - E[None, :, :], axis=-1)


In [37]:

def _get_ones(x, y):
    return np.ones((x, y))

def _ext_A(A):
    nA, dim = A.shape
    A_ext = _get_ones(nA, dim * 3)
    A_ext[:, dim : 2 * dim] = A
    A_ext[:, 2 * dim :] = A ** 2
    return A_ext

def _ext_B(B):
    nB, dim = B.shape
    B_ext = _get_ones(dim * 3, nB)
    B_ext[:dim] = (B ** 2).T
    B_ext[dim : 2 * dim] = -2.0 * B.T
    del B
    return B_ext

def _euclidean(A_ext, B_ext):
    sqdist = A_ext.dot(B_ext).clip(min=0)
    return np.sqrt(sqdist)

def euclidean( _query_vectors, raw_B):
    data = _ext_B(raw_B)
    return _euclidean(_query_vectors, data)

def euclidean_vectorized(query_vectors, raw_B):
    _query_vectors = _ext_A(query_vectors)
    return euclidean( _query_vectors, raw_B)

def _norm(A):
    return A / np.linalg.norm(A, ord=2, axis=1, keepdims=True)

def _cosine(A_norm_ext, B_norm_ext):
    return A_norm_ext.dot(B_norm_ext).clip(min=0) / 2

def cosine( _query_vectors, raw_B):
    data = _ext_B(raw_B)
    return _cosine(_query_vectors, data)

def cosine_vectorized( query_vectors, raw_B):
    _query_vectors = _ext_A(query_vectors)
    return cosine(_query_vectors, raw_B)


In [38]:
euclidean_dist.shape, euclidean_vectorized(Q , E).shape

((19, 52), (19, 52))

In [40]:
cosine_vectorized(Q , E).shape

(19, 52)

In [18]:
n_samples = 1000000
n_queries = 1
n_features = 10

query_vectors = np.random.random((n_queries,n_features))
X =np.random.random((n_samples,n_features))

In [20]:
%%timeit
euclidean_vectorized(query_vectors, X)

153 ms ± 1.93 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [98]:
timeit_timeit_euclidean_naive = _ = _

In [99]:
def euclidean_naive(x,B):
    return np.sqrt(np.sum((x-B)**2,axis=1))

In [100]:
%%timeit 
euclidean_naive(query_vectors, X)

50.5 ms ± 2.47 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [92]:
timeit_euclidean_naive = _

In [101]:
a1 = euclidean_naive(query_vectors, X)
a2 = euclidean_vectorized(query_vectors, X).flatten()
np.testing.assert_allclose(a1,a2, rtol=1e-6, atol=0.00)

In [102]:
%%cython -a
import numpy as np
import cython 
from libc.math cimport pow, sqrt

@cython.boundscheck(False)  # Deactivate bounds checking
def cy_euclidean(double[:,:] q,double[:,:] X):
    cdef int n_samples = X.shape[0]
    cdef int n_features = q.shape[1]
    cdef double res=0
    cdef double[:] result = np.zeros(len(X), dtype="double")

    for m in range(n_samples):
        res = 0.
        for i in range(n_features):
            res += pow(q[0,i]- X[m, i],2)
        result[m] = sqrt(res)
    
    return np.array(result)

In [103]:
cy_euclidean(query_vectors, X)

array([1.53791115, 1.1018229 , 1.56553229, ..., 1.41525492, 1.25051464,
       1.13014243])

In [104]:
%%timeit -o
cy_euclidean(query_vectors, X)

11.7 ms ± 300 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


<TimeitResult : 11.7 ms ± 300 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)>

In [105]:
timeit_cy_euclidean = _

In [106]:
timeit_cy_euclidean.average

0.011683433771429138

### Todo: think vectorization

In [323]:
%%cython -a -c=-DUSE_XSIMD -c=-march=native

import numpy as np
cimport cython 
from cython.parallel import prange

from libc.math cimport pow, sqrt
cimport numpy as np

@cython.boundscheck(False)
@cython.wraparound(False)
def cy_euclidean3(double[:,:] q,double[:,:] X):
    cdef int n_samples = X.shape[0]
    cdef int n_features = q.shape[1]
    cdef double partial = 0
    cdef double res=0
    cdef double[:] result = np.zeros(len(X), dtype="double")
    #cdef double[:] result = n_samples*[0]
    
    for m in range(n_samples):
        res = 0.
        
        for i in range(n_features):
            partial = q[0,i] - X[m, i]
            res = res + partial * partial
        result[m] = sqrt(res)
    
    return np.array(result)

In [312]:
cy_euclidean3(query_vectors, X)

array([3.1783345 , 2.46369614, 2.33293514, ..., 2.78822995, 2.97800318,
       2.31306567])

In [313]:
%%timeit
cy_euclidean3(query_vectors, X)

466 µs ± 33.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
