# Description

Generates a distribution of pvalues under the null hypothesis of no association.

This notebook uses individual gene pairs as input for CCC and parallelizes permutations.

# Modules loading

In [1]:
import numpy as np
from joblib import Parallel, delayed

from ccc.coef import ccc
from ccc import conf

# Settings

In [2]:
rs = np.random.RandomState(0)

In [3]:
N_JOBS = conf.GENERAL["N_JOBS"] // 2
display(N_JOBS)

PVALUE_N_JOBS = 2
display(PVALUE_N_JOBS)

10

2

In [4]:
DATA_N_OBJS, DATA_N_FEATURES = 100, 1000
PVALUE_N_PERMS = 1000

# Paths

In [5]:
OUTPUT_DIR = conf.RESULTS_DIR / "ccc_null-pvalues"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

In [6]:
OUTPUT_DIR

PosixPath('/opt/data/results/ccc_null-pvalues')

# Generate random data

In [7]:
data = rs.rand(DATA_N_OBJS, DATA_N_FEATURES)

In [8]:
data.shape

(100, 1000)

# Run CCC

In [9]:
def ccc_single(x, y):
    return ccc(x, y, pvalue_n_perms=PVALUE_N_PERMS, n_jobs=PVALUE_N_JOBS)

In [10]:
results = Parallel(n_jobs=N_JOBS)(
    delayed(ccc_single)(data[i], data[j])
    for i in range(data.shape[0] - 1)
    for j in range(i + 1, data.shape[0])
)

In [11]:
assert len(results) == (DATA_N_OBJS * (DATA_N_OBJS - 1)) / 2

In [12]:
results[0]

(0.0025468387096774193, 0.5474525474525475)

In [13]:
cm_values = [x[0] for x in results]

In [14]:
cm_pvalues = [x[1] for x in results]

In [15]:
assert len(cm_values) == len(cm_pvalues)
assert len(cm_values) == (DATA_N_OBJS * (DATA_N_OBJS - 1)) / 2

In [16]:
cm_values = np.array(cm_values)
cm_pvalues = np.array(cm_pvalues)

In [17]:
cm_values.shape

(4950,)

In [18]:
cm_values

array([0.00254684, 0.00104179, 0.00320558, ..., 0.00426304, 0.00043899,
       0.00321309])

In [19]:
cm_pvalues.shape

(4950,)

In [20]:
cm_pvalues

array([0.54745255, 0.92607393, 0.35264735, ..., 0.14485514, 0.98101898,
       0.35064935])

# Save

In [21]:
output_file = OUTPUT_DIR / "gene_pairs-cm_values.npy"
display(output_file)

np.save(output_file, cm_values)

PosixPath('/opt/data/results/ccc_null-pvalues/gene_pairs-cm_values.npy')

In [22]:
output_file = OUTPUT_DIR / "gene_pairs-cm_pvalues.npy"
display(output_file)

np.save(output_file, cm_pvalues)

PosixPath('/opt/data/results/ccc_null-pvalues/gene_pairs-cm_pvalues.npy')