# Description

Similar as `06` but it computes across gene pairs instead of data matrix.

# Remove pycache dir

In [1]:
!echo ${CODE_DIR}

/opt/code


In [2]:
!find ${CODE_DIR}/libs -regex '^.*\(__pycache__\)$' -print

/opt/code/libs/ccc/pytorch/__pycache__
/opt/code/libs/ccc/utils/__pycache__
/opt/code/libs/ccc/scipy/__pycache__
/opt/code/libs/ccc/sklearn/__pycache__
/opt/code/libs/ccc/__pycache__
/opt/code/libs/ccc/coef/__pycache__


In [3]:
!find ${CODE_DIR}/libs -regex '^.*\(__pycache__\)$' -prune -exec rm -rf {} \;

In [4]:
!find ${CODE_DIR}/libs -regex '^.*\(__pycache__\)$' -print

# Modules

In [5]:
import numpy as np

from ccc.coef import ccc

In [6]:
# let numba compile all the code before profiling
ccc(np.random.rand(10), np.random.rand(10))

0.28

# Data

In [7]:
n_genes, n_samples = 10, 30000

In [8]:
np.random.seed(0)

In [9]:
data = np.random.rand(n_genes, n_samples)

In [10]:
data.shape

(10, 30000)

# With default `internal_n_clusters`

In [11]:
def func():
    res = np.full(int((data.shape[0] * (data.shape[0] - 1)) / 2), np.nan)

    n_clust = list(range(2, 10 + 1))
    idx = 0
    for i in range(data.shape[0] - 1):
        for j in range(i + 1, data.shape[0]):
            res[idx] = ccc(data[i], data[j], internal_n_clusters=n_clust)
            idx += 1

In [12]:
%%timeit func()
func()

3.06 s ± 10.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
%%prun -s cumulative -l 50 -T 11-cm_many_samples-default_internal_n_clusters.txt
func()

 
*** Profile printout saved to text file '11-cm_many_samples-default_internal_n_clusters.txt'. 


# With reduced `internal_n_clusters`

In [14]:
def func():
    res = np.full(int((data.shape[0] * (data.shape[0] - 1)) / 2), np.nan)

    n_clust = list(range(2, 5 + 1))
    idx = 0
    for i in range(data.shape[0] - 1):
        for j in range(i + 1, data.shape[0]):
            res[idx] = ccc(data[i], data[j], internal_n_clusters=n_clust)
            idx += 1

In [15]:
%%timeit func()
func()

1.29 s ± 4.15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
%%prun -s cumulative -l 50 -T 11-cm_many_samples-less_internal_n_clusters.txt
func()

 
*** Profile printout saved to text file '11-cm_many_samples-less_internal_n_clusters.txt'. 
