In [1]:
cd ../..

/home/paperspace/time-varying-graphs


In [2]:
from pathlib import Path

import numpy as np
from omegaconf import OmegaConf
from scipy.sparse import load_npz
from sklearn.metrics import *
from tqdm.auto import tqdm

import stereo
from stereo.utils.results import *

npi = np.intersect1d
npc = np.concatenate
npu = np.unique
npd = np.setdiff1d

In [18]:
model_name = 'TV-DBN'
dataset_name = 'PBMC'

In [17]:
cfg = OmegaConf.load(f'configs/dataset/GRN-{dataset_name}.yaml')
dataset = load_dataset(cfg)
training_tfs = dataset.genes[dataset.gene_is_tf]

Keeping 2050 genes.


### Run seeds and IDs

In [24]:
runs = load_run_ids(f'results/runs/{dataset_name}/{model_name}.json', 'DEG4')

In [25]:
As = []

for seed, run in tqdm(runs.items()):
    adjs_path = Path(f'results/PBMCGeneRegPseudotimeDataset/{run}/adjs')
    As_seed = []
    for i in range(dataset.n_seq):
        As_seed.append(load_npz(adjs_path / f'A_{i}.npz').toarray()[:, dataset.gene_is_tf])
    As.append(As_seed)

As = np.stack(As)
print(As.shape)

  0%|          | 0/3 [00:00<?, ?it/s]

(3, 3, 2050, 590)


In [26]:
t_to_selected = aggregate_As(
    As,
    tfs=dataset.genes[dataset.gene_is_tf],
    genes=dataset.genes,
    consensus=2,
    top_tfs=38,
    top_genes_per_tf=45,
)

t=0	 Selected 575 edges	N. TFs = 14	N. genes = 448	
t=1	 Selected 799 edges	N. TFs = 19	N. genes = 524	
t=2	 Selected 912 edges	N. TFs = 22	N. genes = 546	


In [27]:
npu([v[0] for v in t_to_selected[0]], return_counts=True)

(array(['ATRX', 'CEBPD', 'DRAP1', 'FOS', 'FOSB', 'IRF1', 'JUNB', 'KLF10',
        'KLF2', 'KLF6', 'STAT1', 'TRIM22', 'ZFP36', 'ZFP36L1'], dtype='<U7'),
 array([45, 45, 45, 45, 45, 45, 45, 45,  4, 45, 45, 45, 31, 45]))

In [28]:
write_graphs(t_to_selected, f'results/graphs/{model_name}-{dataset_name}')