# Profiling

In [1]:
import numpy as np

In [2]:
def euclidean_broadcast(x, y):
    """Euclidean square distance matrix.
    
    Inputs:
    x: (N, m) numpy array
    y: (N, m) numpy array
    
    Ouput:
    (N, N) Euclidean square distance matrix:
    r_ij = (x_ij - y_ij)^2
    """
    diff = x[:, np.newaxis, :] - y[np.newaxis, :, :]

    return (diff * diff).sum(axis=2)

In [3]:
def euclidean_trick(x, y):
    """Euclidean square distance matrix.
    
    Inputs:
    x: (N, m) numpy array
    y: (N, m) numpy array
    
    Ouput:
    (N, N) Euclidean square distance matrix:
    r_ij = (x_ij - y_ij)^2
    """
    x2 = np.einsum('ij,ij->i', x, x)[:, np.newaxis]
    y2 = np.einsum('ij,ij->i', y, y)[np.newaxis, :]

    xy = x @ y.T

    return np.abs(x2 + y2 - 2. * xy)

In [4]:
nsamples = 2000
nfeat = 50

rng = np.random.default_rng()
x = 10. * rng.random((nsamples, nfeat))

## 1. `timeit`

In [5]:
%timeit euclidean_broadcast(x, x)
%timeit euclidean_trick(x, x)

1.71 s ± 4.54 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
71.4 ms ± 344 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## 2. `line_profiler`
***
much more usefull from a user prospective !!
***

In [6]:
%load_ext line_profiler

In [7]:
%lprun -f euclidean_broadcast euclidean_broadcast(x,x)

Timer unit: 1e-09 s

Total time: 1.72591 s
File: /tmp/ipykernel_3727/3677175976.py
Function: euclidean_broadcast at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def euclidean_broadcast(x, y):
     2                                               """Euclidean square distance matrix.
     3                                               
     4                                               Inputs:
     5                                               x: (N, m) numpy array
     6                                               y: (N, m) numpy array
     7                                               
     8                                               Ouput:
     9                                               (N, N) Euclidean square distance matrix:
    10                                               r_ij = (x_ij - y_ij)^2
    11                                               """
    12         1  810187721.0 8101877

In [8]:
%lprun -f euclidean_trick euclidean_trick(x, x)

Timer unit: 1e-09 s

Total time: 0.0604921 s
File: /tmp/ipykernel_3727/2952558958.py
Function: euclidean_trick at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def euclidean_trick(x, y):
     2                                               """Euclidean square distance matrix.
     3                                               
     4                                               Inputs:
     5                                               x: (N, m) numpy array
     6                                               y: (N, m) numpy array
     7                                               
     8                                               Ouput:
     9                                               (N, N) Euclidean square distance matrix:
    10                                               r_ij = (x_ij - y_ij)^2
    11                                               """
    12         1     335442.0 335442.0     

## 3. `cProfile`
***
 Nice that it can be integrated tu jupyter notebook, use `snakeviz` for nice and thourough vizualusation
 ***

In [9]:
%prun -r euclidean_trick(x, x)

 

<pstats.Stats at 0x155548169d90>

         22 function calls in 0.074 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.071    0.071    0.071    0.071 2952558958.py:1(euclidean_trick)
        1    0.003    0.003    0.074    0.074 <string>:1(<module>)
        2    0.000    0.000    0.000    0.000 {built-in method numpy.core._multiarray_umath.c_einsum}
        1    0.000    0.000    0.074    0.074 {built-in method builtins.exec}
        2    0.000    0.000    0.001    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
        2    0.000    0.000    0.001    0.000 <__array_function__ internals>:177(einsum)
        2    0.000    0.000    0.001    0.000 einsumfunc.py:1009(einsum)
       10    0.000    0.000    0.000    0.000 einsumfunc.py:1001(_einsum_dispatcher)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}