# Performance considerations

In [2]:
from time import clock
from timeit import timeit
import memory_profiler
%load_ext memory_profiler

import numpy as np
from scipy.linalg import eigh
from scipy.spatial.distance import squareform, pdist

import pandas as pd
from matplotlib.pyplot import plot, scatter
import seaborn as sns
%matplotlib inline

from sklearn.datasets import make_blobs
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import adjusted_mutual_info_score, accuracy_score, v_measure_score, normalized_mutual_info_score


In [3]:
from copac import COPAC

## Timeit

In [8]:
clstr = COPAC()
for n in [20, 100, 1000, 10000]:
    X = np.random.rand(n, 50)
    %timeit clstr.fit_predict(X)

21.9 ms ± 5.49 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
48.7 ms ± 1.06 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
658 ms ± 31.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
42.1 s ± 3.85 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Memit

In [12]:
clstr = COPAC()
for n in range(1, 11):
    n = 1000 * n
    X = np.random.rand(n, 50)
    %memit clstr.fit_predict(X)

peak memory: 347.40 MiB, increment: 0.00 MiB
peak memory: 347.40 MiB, increment: 0.00 MiB
peak memory: 447.65 MiB, increment: 100.25 MiB
peak memory: 636.50 MiB, increment: 289.10 MiB
peak memory: 787.20 MiB, increment: 439.80 MiB
peak memory: 1155.91 MiB, increment: 802.97 MiB
peak memory: 1416.86 MiB, increment: 1063.93 MiB
peak memory: 1725.25 MiB, increment: 1372.31 MiB
peak memory: 2141.61 MiB, increment: 1788.67 MiB
peak memory: 2574.90 MiB, increment: 2221.97 MiB


## Profiling

Runtime of (5000, 20) sized matrix: 15 sec.
* initial k-NN: 1 sec.
* calculate correlation dimension: 4 sec.
* correlation distance triu: 4.4 sec.
* correlation distance tril: 4.6 sec.
* correlation distance rest: 0.7 sec. (incl. DBSCAN)

In [20]:
X = np.random.rand(5000, 20)
clstr.fit_predict(X)

init took 0.000 sec.
k-NN took 0.957 sec.
corr dim took 4.134 sec.
grouping took 0.001 sec.
init took 0.000 sec.
triu took 0.000 sec.
tril took 0.000 sec.
... took 0.000 sec.
dbscan took 0.001 sec.
rest took 0.000 sec.
init took 0.000 sec.
triu took 0.000 sec.
tril took 0.000 sec.
... took 0.000 sec.
dbscan took 0.001 sec.
rest took 0.000 sec.
init took 0.000 sec.
triu took 0.000 sec.
tril took 0.000 sec.
... took 0.000 sec.
dbscan took 0.001 sec.
rest took 0.000 sec.
init took 0.000 sec.
triu took 0.000 sec.
tril took 0.000 sec.
... took 0.000 sec.
dbscan took 0.001 sec.
rest took 0.000 sec.
init took 0.002 sec.
triu took 0.076 sec.
tril took 0.082 sec.
... took 0.005 sec.
dbscan took 0.008 sec.
rest took 0.000 sec.
init took 0.249 sec.
triu took 4.363 sec.
tril took 4.568 sec.
... took 0.225 sec.
dbscan took 0.154 sec.
rest took 0.000 sec.
init took 0.003 sec.
triu took 0.002 sec.
tril took 0.001 sec.
... took 0.008 sec.
dbscan took 0.001 sec.
rest took 0.000 sec.
init took 0.000 sec

array([-1, -1, -1, ..., -1, -1, -1])

Runtime of (10000, 20) sized matrix: 49 sec.
* initial k-NN: 4 sec.
* calculate correlation dimension: 10 sec.
* correlation distance triu: 14.6 sec.
* correlation distance tril: 18.0 sec.
* correlation distance rest: 1.6 sec. (incl. DBSCAN)

In [21]:
X = np.random.rand(10000, 20)
clstr.fit_predict(X)

init took 0.000 sec.
k-NN took 3.784 sec.
corr dim took 9.646 sec.
grouping took 0.002 sec.
init took 0.000 sec.
triu took 0.000 sec.
tril took 0.000 sec.
... took 0.000 sec.
dbscan took 0.001 sec.
rest took 0.000 sec.
init took 0.000 sec.
triu took 0.000 sec.
tril took 0.000 sec.
... took 0.000 sec.
dbscan took 0.001 sec.
rest took 0.000 sec.
init took 0.000 sec.
triu took 0.000 sec.
tril took 0.000 sec.
... took 0.000 sec.
dbscan took 0.001 sec.
rest took 0.000 sec.
init took 0.000 sec.
triu took 0.000 sec.
tril took 0.000 sec.
... took 0.000 sec.
dbscan took 0.001 sec.
rest took 0.000 sec.
init took 0.011 sec.
triu took 0.356 sec.
tril took 0.382 sec.
... took 0.042 sec.
dbscan took 0.029 sec.
rest took 0.000 sec.
init took 0.789 sec.
triu took 14.611 sec.
tril took 17.959 sec.
... took 0.875 sec.
dbscan took 0.628 sec.
rest took 0.000 sec.
init took 0.028 sec.
triu took 0.014 sec.
tril took 0.009 sec.
... took 0.096 sec.
dbscan took 0.003 sec.
rest took 0.000 sec.
init took 0.000 s

array([-1, -1, -1, ..., -1, -1, -1])

Reasonable performance. Most time spent in correlation distance, corr. dimension, k-NN