In [1]:
import numpy as np

In [2]:
def euclidean_broadcast(x, y):
    """Euclidean square distance matrix.
    
    Inputs:
    x: (N, m) numpy array
    y: (N, m) numpy array
    
    Ouput:
    (N, N) Euclidean square distance matrix:
    r_ij = x_ij^2 - y_ij^2
    """
    diff = x[:, np.newaxis, :] - y[np.newaxis, :, :]

    return (diff * diff).sum(axis=2)

In [3]:
def euclidean_trick(x, y):
    """Euclidean square distance matrix.
    
    Inputs:
    x: (N, m) numpy array
    y: (N, m) numpy array
    
    Ouput:
    (N, N) Euclidean square distance matrix:
    r_ij = x_ij^2 - y_ij^2
    """
    x2 = np.einsum('ij,ij->i', x, x)[:, np.newaxis]
    y2 = np.einsum('ij,ij->i', y, y)[np.newaxis, :]

    xy = np.dot(x, y.T)

    return np.abs(x2 + y2 - 2. * xy)

In [4]:
nsamples = 2000
nfeat = 50

x = 10. * np.random.random([nsamples, nfeat])

%timeit euclidean_broadcast(x, x)
%timeit euclidean_trick(x, x)

1.06 s ± 8.75 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
51.2 ms ± 34.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## `linear_profiler`

In [5]:
%load_ext line_profiler

In [6]:
%lprun?

[0;31mDocstring:[0m
Execute a statement under the line-by-line profiler from the
line_profiler module.

Usage:
  %lprun -f func1 -f func2 <statement>

The given statement (which doesn't require quote marks) is run via the
LineProfiler. Profiling is enabled for the functions specified by the -f
options. The statistics will be shown side-by-side with the code through the
pager once the statement has completed.

Options:

-f <function>: LineProfiler only profiles functions and methods it is told
to profile.  This option tells the profiler about these functions. Multiple
-f options may be used. The argument may be any expression that gives
a Python function or method object. However, one must be careful to avoid
spaces that may confuse the option parser.

-m <module>: Get all the functions/methods in a module

One or more -f or -m options are required to get any useful results.

-D <filename>: dump the raw statistics out to a pickle file on disk. The
usual extension for this is ".lprof".

In [7]:
%lprun -f euclidean_trick euclidean_trick(x, x)

Timer unit: 1e-06 s

Total time: 0.049864 s
File: <ipython-input-3-40126e716187>
Function: euclidean_trick at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def euclidean_trick(x, y):
     2                                               """Euclidean square distance matrix.
     3                                               
     4                                               Inputs:
     5                                               x: (N, m) numpy array
     6                                               y: (N, m) numpy array
     7                                               
     8                                               Ouput:
     9                                               (N, N) Euclidean square distance matrix:
    10                                               r_ij = x_ij^2 - y_ij^2
    11                                               """
    12         1        187.0    187.0      0.4

In [8]:
%lprun -f euclidean_broadcast euclidean_broadcast(x,x)

Timer unit: 1e-06 s

Total time: 1.02312 s
File: <ipython-input-2-d2c447ba8ccb>
Function: euclidean_broadcast at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def euclidean_broadcast(x, y):
     2                                               """Euclidean square distance matrix.
     3                                               
     4                                               Inputs:
     5                                               x: (N, m) numpy array
     6                                               y: (N, m) numpy array
     7                                               
     8                                               Ouput:
     9                                               (N, N) Euclidean square distance matrix:
    10                                               r_ij = x_ij^2 - y_ij^2
    11                                               """
    12         1     726577.0 726577.0  

## cProfiler

In [13]:
%prun euclidean_trick(x, x)

 

         29 function calls in 0.054 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        3    0.030    0.010    0.030    0.010 {built-in method numpy.core._multiarray_umath.implement_array_function}
        1    0.021    0.021    0.051    0.051 <ipython-input-3-40126e716187>:1(euclidean_trick)
        1    0.003    0.003    0.054    0.054 <string>:1(<module>)
        2    0.000    0.000    0.000    0.000 {built-in method numpy.core._multiarray_umath.c_einsum}
        1    0.000    0.000    0.054    0.054 {built-in method builtins.exec}
        2    0.000    0.000    0.000    0.000 einsumfunc.py:1004(einsum)
        2    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(einsum)
       10    0.000    0.000    0.000    0.000 einsumfunc.py:995(_einsum_dispatcher)
        1    0.000    0.000    0.030    0.030 <__array_function__ internals>:2(dot)
        2    0.000    0.000    0.000    0.000 {method 'get' of '

In [8]:
%load_ext memory_profiler

In [9]:
%mprun?

[0;31mDocstring:[0m
Execute a statement under the line-by-line memory profiler from the
memory_profiler module.

Usage, in line mode:
  %mprun -f func1 -f func2 <statement>

Usage, in cell mode:
  %%mprun -f func1 -f func2 [statement]
  code...
  code...

In cell mode, the additional code lines are appended to the (possibly
empty) statement in the first line. Cell mode allows you to easily
profile multiline blocks without having to put them in a separate
function.

The given statement (which doesn't require quote marks) is run via the
LineProfiler. Profiling is enabled for the functions specified by the -f
options. The statistics will be shown side-by-side with the code through
the pager once the statement has completed.

Options:

-f <function>: LineProfiler only profiles functions and methods it is told
to profile.  This option tells the profiler about these functions. Multiple
-f options may be used. The argument may be any expression that gives
a Python function or method object.

In [10]:
%mprun -f euclidean_broadcast euclidean_broadcast(x,x)

ERROR: Could not find file <ipython-input-2-d2c447ba8ccb>
NOTE: %mprun can only be used on functions defined in physical files, and not in the IPython environment.





In [11]:
! which python

/apps/daint/UES/6.0.UP04/sandboxes/sarafael/miniconda-pythonhpc/bin/python
