# Description

Clustermatch run using a larger number of samples.

# Use only one CPU core

In [1]:
%env CM_N_JOBS=1
%env NUMBA_NUM_THREADS=1
%env MKL_NUM_THREADS=1
%env OPEN_BLAS_NUM_THREADS=1
%env NUMEXPR_NUM_THREADS=1
%env OMP_NUM_THREADS=1

env: CM_N_JOBS=1
env: NUMBA_NUM_THREADS=1
env: MKL_NUM_THREADS=1
env: OPEN_BLAS_NUM_THREADS=1
env: NUMEXPR_NUM_THREADS=1
env: OMP_NUM_THREADS=1


# Remove pycache dir

In [2]:
!echo ${CODE_DIR}

/opt/code


In [3]:
!find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -print

/opt/code/libs/ccc/coef/__pycache__
/opt/code/libs/ccc/pytorch/__pycache__
/opt/code/libs/ccc/scipy/__pycache__
/opt/code/libs/ccc/utils/__pycache__
/opt/code/libs/ccc/__pycache__
/opt/code/libs/ccc/sklearn/__pycache__


In [4]:
!find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -prune -exec rm -rf {} \;

In [5]:
!find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -print

# Modules

In [6]:
import numpy as np

from ccc.coef import ccc

In [7]:
# let numba compile all the code before profiling
ccc(np.random.rand(10), np.random.rand(10))

0.0

# Data

In [8]:
n_genes, n_samples = 10, 30000

In [9]:
np.random.seed(0)

In [10]:
data = np.random.rand(n_genes, n_samples)

In [11]:
data.shape

(10, 30000)

# With default `internal_n_clusters`

In [12]:
def func():
    n_clust = list(range(2, 10 + 1))
    return ccc(data, internal_n_clusters=n_clust, use_ari_numba=True)

In [13]:
%%timeit func()
func()

6.02 s ± 4.21 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%%prun -s cumulative -l 50 -T 11-cm_many_samples-default_internal_n_clusters.txt
func()

 
*** Profile printout saved to text file '11-cm_many_samples-default_internal_n_clusters.txt'. 


# With reduced `internal_n_clusters`

In [15]:
def func():
    n_clust = list(range(2, 5 + 1))
    return ccc(data, internal_n_clusters=n_clust, use_ari_numba=True)

In [16]:
%%timeit func()
func()

857 ms ± 1.06 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
%%prun -s cumulative -l 50 -T 11-cm_many_samples-less_internal_n_clusters.txt
func()

 
*** Profile printout saved to text file '11-cm_many_samples-less_internal_n_clusters.txt'. 
