# Description

Compares two different ccc implementations: one using precomputation of internal clusterings, and the other one using the original implementation that does not perform such precomputation.

# Modules

In [1]:
import numpy as np

from ccc.coef import ccc

# Data

In [2]:
n_genes, n_samples = 100, 1000

In [3]:
np.random.seed(0)

In [4]:
data = np.random.rand(n_genes, n_samples)

In [5]:
data.shape

(100, 1000)

# Improved implementation (`precompute_parts=True`)

In [6]:
def func():
    return ccc(data, internal_n_clusters=range(2, 10 + 1), precompute_parts=True)

In [7]:
%%timeit func()
func()

49.4 s ± 98.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%prun -s cumulative -l 20 -T 05-cm_precompute_parts_true.txt
func()

 
*** Profile printout saved to text file '05-cm_precompute_parts_true.txt'. 


# Original implementation (`precompute_parts=False`)

In [9]:
def func():
    return ccc(data, internal_n_clusters=range(2, 10 + 1), precompute_parts=False)

In [10]:
%%timeit func()
func()

1min 42s ± 158 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%%prun -s cumulative -l 20 -T 05-cm_precompute_parts_false.txt
func()

 
*** Profile printout saved to text file '05-cm_precompute_parts_false.txt'. 
