# Description

Generates a distribution of pvalues under the null hypothesis of no association.

This notebook uses a data matrix as input for CCC and parallelizes computation across gene pairs.

# Modules loading

In [None]:
import numpy as np

from ccc.coef import ccc
from ccc import conf

# Settings

In [None]:
rs = np.random.RandomState(0)

In [None]:
DATA_N_OBJS, DATA_N_FEATURES = 100, 1000
PVALUE_N_PERMS = 1000

# Paths

In [None]:
OUTPUT_DIR = conf.RESULTS_DIR / "ccc_null-pvalues"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
OUTPUT_DIR

# Generate random data

In [None]:
data = rs.rand(DATA_N_OBJS, DATA_N_FEATURES)

In [None]:
data.shape

# Run CCC

In [None]:
res = ccc(
    data,
    n_jobs=conf.GENERAL["N_JOBS"],
    pvalue_n_perms=PVALUE_N_PERMS,
)

In [None]:
cm_values, cm_pvalues = res

In [None]:
cm_values.shape

In [None]:
cm_pvalues.shape

# Save

In [None]:
output_file = OUTPUT_DIR / "data_matrix-cm_values.npy"
display(output_file)

np.save(output_file, cm_values)

In [None]:
output_file = OUTPUT_DIR / "data_matrix-cm_pvalues.npy"
display(output_file)

np.save(output_file, cm_pvalues)