# Description

Compares two different ccc implementations: one using the new optimized adjusted Rand index (ARI) with numba, and the other one using the ARI from scikit-learn.

# Modules

In [1]:
import numpy as np

from ccc import coef

# Data

In [2]:
n_genes, n_samples = 100, 1000

In [3]:
np.random.seed(0)

In [4]:
data = np.random.rand(n_genes, n_samples)

In [5]:
data.shape

(100, 1000)

# Improved implementation (ARI implemented in numba)

In [6]:
def func():
    return coef.ccc(data, internal_n_clusters=range(2, 10 + 1), precompute_parts=True)

In [7]:
%%timeit func()
func()

52.5 s ± 92 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%prun -s cumulative -l 20 -T 04-cm_ari_numba.txt
func()

 
*** Profile printout saved to text file '04-cm_ari_numba.txt'. 


# Original implementation (ARI from sklearn)

In [9]:
from sklearn.metrics import adjusted_rand_score

In [10]:
coef.ari = adjusted_rand_score

In [11]:
def func():
    return coef.ccc(data, internal_n_clusters=range(2, 10 + 1), precompute_parts=True)

In [12]:
%%timeit func()
func()

5min 6s ± 499 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
%%prun -s cumulative -l 20 -T 04-cm_ari_sklearn.txt
func()

 
*** Profile printout saved to text file '04-cm_ari_sklearn.txt'. 
