# Playing with `numpexpr`

In [1]:
# import os
# os.environ['NUMEXPR_NUM_THREADS'] = '12'
import numpy as np
import numexpr as ne

In [2]:
x = np.random.random((5000, 50))

Check out the difference in time of the next two numpy equivalent expressions. The main difference comes from the implementation of the array's `__paw__` method which tensds to be slower thatn the equivalent multiplications.

In [3]:
%timeit ((x + 1.) * x + 1.) * x + 1.

729 µs ± 634 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [4]:
%timeit x**3 + x**2 + x + 1.

15.9 ms ± 16.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


Let's do the same using `numexpr`. Check out the use of multiple threading with the `top` command!

In [5]:
%timeit ne.evaluate('((x + 1.) * x + 1.) * x + 1.')

139 µs ± 4.19 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [6]:
%timeit ne.evaluate('x**3 + x**2 + x + 1.')

137 µs ± 609 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


Let's do other expressions. Notice that we replace `np.sin` for `sin` inside the expression.

In [None]:
%timeit np.sin(x) + np.cos(x)

In [None]:
%timeit ne.evaluate('sin(x) + cos(x)')

Some operators can be used as well

In [8]:
%timeit np.sin(x) + np.cos(x) > x

2.77 ms ± 32.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [9]:
%timeit ne.evaluate('sin(x) + cos(x) > x')

281 µs ± 2.01 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


We can see the configuration of `numexpr` with the following 

In [10]:
ne.show_config()

mkl_info:
    libraries = ['mkl_rt', 'pthread']
    library_dirs = ['/apps/daint/UES/6.0.UP04/sandboxes/sarafael/miniconda-pythonhpc/lib']
    define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
    include_dirs = ['/apps/daint/UES/6.0.UP04/sandboxes/sarafael/miniconda-pythonhpc/include']


# `euclidean_trick` with numexpr

<mark>Question</mark>  Modify the `euclidean_trick_numexpr` function on the next cell to use `numexpr`. Time it and compare that the result is the same as `euclidean_trick`.

In [29]:
def euclidean_trick_numexpr(x, y):
    """Euclidean square distance matrix.
    
    Inputs:
    x: (N, m) numpy array
    y: (N, m) numpy array
    
    Ouput:
    (N, N) Euclidean square distance matrix:
    r_ij = x_ij^2 - y_ij^2
    """
    x2 = ne.evaluate('sum(x*x, axis=1)')[:, np.newaxis] #np.einsum('ij,ij->i', x, x)[:, np.newaxis]
    y2 = ne.evaluate('sum(y*y, axis=1)')[np.newaxis, :] #np.einsum('ij,ij->i', y, y)[np.newaxis, :]

    xy = np.dot(x, y.T)

    return ne.evaluate('abs(x2 + y2 - 2. * xy)')

In [11]:
def euclidean_trick(x, y):
    """Euclidean square distance matrix.
    
    Inputs:
    x: (N, m) numpy array
    y: (N, m) numpy array
    
    Ouput:
    (N, N) Euclidean square distance matrix:
    r_ij = x_ij^2 - y_ij^2
    """
    x2 = np.einsum('ij,ij->i', x, x)[:, np.newaxis]
    y2 = np.einsum('ij,ij->i', y, y)[np.newaxis, :]

    xy = np.dot(x, y.T)

    return np.abs(x2 + y2 - 2. * xy)

In [12]:
nsamples = 6000
nfeat = 50

x = 10. * np.random.random([nsamples, nfeat])

In [18]:
%lsmagic

Available line magics:
%alias  %alias_magic  %autoawait  %autocall  %automagic  %autosave  %bookmark  %cat  %cd  %clear  %colors  %conda  %config  %connect_info  %cp  %debug  %dhist  %dirs  %doctest_mode  %ed  %edit  %env  %gui  %hist  %history  %killbgscripts  %ldir  %less  %lf  %lk  %ll  %load  %load_ext  %loadpy  %logoff  %logon  %logstart  %logstate  %logstop  %ls  %lsmagic  %lx  %macro  %magic  %man  %matplotlib  %mkdir  %more  %mv  %notebook  %page  %pastebin  %pdb  %pdef  %pdoc  %pfile  %pinfo  %pinfo2  %pip  %popd  %pprint  %precision  %prun  %psearch  %psource  %pushd  %pwd  %pycat  %pylab  %qtconsole  %quickref  %recall  %rehashx  %reload_ext  %rep  %rerun  %reset  %reset_selective  %rm  %rmdir  %run  %save  %sc  %set_env  %store  %sx  %system  %tb  %time  %timeit  %unalias  %unload_ext  %who  %who_ls  %whos  %xdel  %xmode

Available cell magics:
%%!  %%HTML  %%SVG  %%bash  %%capture  %%debug  %%file  %%html  %%javascript  %%js  %%latex  %%markdown  %%perl  %%prun  %%pypy  %%

In [27]:
%%time
y_slow = euclidean_trick(x, x)

CPU times: user 2.6 s, sys: 427 ms, total: 3.03 s
Wall time: 776 ms


In [30]:
%%time
y_fast = euclidean_trick_numexpr(x, x)

CPU times: user 2.54 s, sys: 314 ms, total: 2.86 s
Wall time: 396 ms


In [28]:
np.abs(y_fast - y_slow)

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])