In [1]:
import os
import pandas as pd
import scanpy as sc

from arboreto.algo import grnboost2, genie3
from arboreto.utils import load_tf_names
from distributed import Client, LocalCluster

In [2]:
# read in expression matrix 
# set a working directory
wdir = "/lustre/groups/ml01/workspace/samantha.bening/Bachelor/"
os.chdir( wdir )

adata = sc.read_h5ad('data2/veo_ibd_balanced.h5ad')
adata

AnnData object with n_obs × n_vars = 176789 × 28162
    obs: 'sampID', 'LMUSampID', 'PatientID', 'AnatomicLocation', 'PoolID', 'Protocol', 'CellsLoaded', 'FreezeProtocol', 'AnatomicLocation_Relation', 'AgeYM', 'Sex', 'Inflammation', 'Viability', 'SampleDate', 'sample_LMU_info_Timepoint', 'Ischemia_time', 'n_counts', 'log_counts', 'n_genes', 'mt_frac', 'ribo_frac', 'hb_frac', 'celltype_l3', 'celltype_l1', 'celltype_l2', 'PatientID_genotype'
    uns: 'PatientID_colors', 'PatientID_genotype_colors', 'celltype_l1_colors', 'celltype_l2_colors', 'celltype_l3_colors', 'log1p', 'neighbors', 'pca', 'umap'
    obsm: 'X_pca', 'X_umap', 'unintegrated_X_umap'
    varm: 'PCs'
    layers: 'raw'
    obsp: 'connectivities', 'distances'

In [3]:
adata.obs['celltype_l1'].value_counts()

celltype_l1
T Cell        51695
Stroma        51434
Epithelium    33546
B Cell        31015
Myeloid        8049
               1050
Name: count, dtype: int64

In [3]:
# filter for genes not expressed in e.g. 30 or more cells
sc.pp.filter_genes(adata, min_cells=30)
# make expression matrix 
ex_matrix = adata.to_df()
ex_matrix

Unnamed: 0,AL627309.1,AL627309.3,AL669831.5,FAM87B,LINC00115,FAM41C,AL645608.2,LINC02593,SAMD11,NOC2L,...,MAFIP,AC011043.1,AL592183.1,AC007325.4,AC007325.2,AL354822.1,AC004556.1,AC233755.2,AC233755.1,AC240274.1
CID006480-1:ACCTGTCGTGGTCCCA,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.088189,...,0.0,0.0,0.088189,0.0,0.0,0.0,0.0,0.0,0.0,0.088189
CID006480-1:GTAGTACTCGAGCCTG,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.149761,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
CID006480-1:GTGTGATGTGAGTTTC,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.144580,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
CID006480-1:TTGGATGTCATCGTAG,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.098530,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
CID006480-1:ACACCAACAAATCGGG,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.431547,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CID008847-1:TCGCTCATCATTGCGA,0.0,0.0,0.0,0.0,0.316601,0.0,0.0,0.0,0.0,0.316601,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
CID008847-1:TTCAGGATCGTTCCTG,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
CID008847-1:CCGTTCAAGCTAGCCC,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
CID008847-1:GTCCTCACAAGCACAG,0.0,0.0,0.0,0.0,0.560699,0.0,0.0,0.0,0.0,0.560699,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000


In [4]:
# make 'tf_list' which in this case is just the genes themselves (for gene-gene adjacencies)

tf_names = load_tf_names("../data/allTFs_human.txt")
len(tf_names)

1892

##  Create custom dask client

In [5]:
local_cluster = LocalCluster(n_workers=29, 
                             threads_per_worker=10) # put in the number of cores you gave the job

custom_client = Client(local_cluster)
custom_client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 29
Total threads: 290,Total memory: 300.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:33683,Workers: 29
Dashboard: http://127.0.0.1:8787/status,Total threads: 290
Started: Just now,Total memory: 300.00 GiB

0,1
Comm: tcp://127.0.0.1:46333,Total threads: 10
Dashboard: http://127.0.0.1:35211/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:42311,
Local directory: /tmp/dask-scratch-space/worker-lw6lqafe,Local directory: /tmp/dask-scratch-space/worker-lw6lqafe

0,1
Comm: tcp://127.0.0.1:44433,Total threads: 10
Dashboard: http://127.0.0.1:40499/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:40709,
Local directory: /tmp/dask-scratch-space/worker-3rvnqu_1,Local directory: /tmp/dask-scratch-space/worker-3rvnqu_1

0,1
Comm: tcp://127.0.0.1:39025,Total threads: 10
Dashboard: http://127.0.0.1:34835/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:35949,
Local directory: /tmp/dask-scratch-space/worker-i6hhor_m,Local directory: /tmp/dask-scratch-space/worker-i6hhor_m

0,1
Comm: tcp://127.0.0.1:39497,Total threads: 10
Dashboard: http://127.0.0.1:40149/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:40899,
Local directory: /tmp/dask-scratch-space/worker-lh1dd29d,Local directory: /tmp/dask-scratch-space/worker-lh1dd29d

0,1
Comm: tcp://127.0.0.1:34195,Total threads: 10
Dashboard: http://127.0.0.1:36503/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:46231,
Local directory: /tmp/dask-scratch-space/worker-53zvfmlh,Local directory: /tmp/dask-scratch-space/worker-53zvfmlh

0,1
Comm: tcp://127.0.0.1:45497,Total threads: 10
Dashboard: http://127.0.0.1:36297/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:35891,
Local directory: /tmp/dask-scratch-space/worker-9930aalx,Local directory: /tmp/dask-scratch-space/worker-9930aalx

0,1
Comm: tcp://127.0.0.1:40247,Total threads: 10
Dashboard: http://127.0.0.1:41161/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:40827,
Local directory: /tmp/dask-scratch-space/worker-gck2yzt5,Local directory: /tmp/dask-scratch-space/worker-gck2yzt5

0,1
Comm: tcp://127.0.0.1:34777,Total threads: 10
Dashboard: http://127.0.0.1:44627/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:32881,
Local directory: /tmp/dask-scratch-space/worker-vrjyoq_7,Local directory: /tmp/dask-scratch-space/worker-vrjyoq_7

0,1
Comm: tcp://127.0.0.1:45231,Total threads: 10
Dashboard: http://127.0.0.1:43733/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:40585,
Local directory: /tmp/dask-scratch-space/worker-jfb6batp,Local directory: /tmp/dask-scratch-space/worker-jfb6batp

0,1
Comm: tcp://127.0.0.1:39527,Total threads: 10
Dashboard: http://127.0.0.1:37651/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:37399,
Local directory: /tmp/dask-scratch-space/worker-kcr384g0,Local directory: /tmp/dask-scratch-space/worker-kcr384g0

0,1
Comm: tcp://127.0.0.1:45845,Total threads: 10
Dashboard: http://127.0.0.1:46275/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:44881,
Local directory: /tmp/dask-scratch-space/worker-dos50e7y,Local directory: /tmp/dask-scratch-space/worker-dos50e7y

0,1
Comm: tcp://127.0.0.1:36239,Total threads: 10
Dashboard: http://127.0.0.1:43157/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:37913,
Local directory: /tmp/dask-scratch-space/worker-jodnb7bn,Local directory: /tmp/dask-scratch-space/worker-jodnb7bn

0,1
Comm: tcp://127.0.0.1:33731,Total threads: 10
Dashboard: http://127.0.0.1:34865/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:34211,
Local directory: /tmp/dask-scratch-space/worker-qftfukmy,Local directory: /tmp/dask-scratch-space/worker-qftfukmy

0,1
Comm: tcp://127.0.0.1:44435,Total threads: 10
Dashboard: http://127.0.0.1:41689/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:40339,
Local directory: /tmp/dask-scratch-space/worker-l1f_z0ii,Local directory: /tmp/dask-scratch-space/worker-l1f_z0ii

0,1
Comm: tcp://127.0.0.1:44873,Total threads: 10
Dashboard: http://127.0.0.1:37849/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:46753,
Local directory: /tmp/dask-scratch-space/worker-_z6ubhoc,Local directory: /tmp/dask-scratch-space/worker-_z6ubhoc

0,1
Comm: tcp://127.0.0.1:34167,Total threads: 10
Dashboard: http://127.0.0.1:45519/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:39731,
Local directory: /tmp/dask-scratch-space/worker-u7e5laje,Local directory: /tmp/dask-scratch-space/worker-u7e5laje

0,1
Comm: tcp://127.0.0.1:42689,Total threads: 10
Dashboard: http://127.0.0.1:44631/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:45449,
Local directory: /tmp/dask-scratch-space/worker-2foymotm,Local directory: /tmp/dask-scratch-space/worker-2foymotm

0,1
Comm: tcp://127.0.0.1:42557,Total threads: 10
Dashboard: http://127.0.0.1:46391/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:34851,
Local directory: /tmp/dask-scratch-space/worker-_x1a9y2f,Local directory: /tmp/dask-scratch-space/worker-_x1a9y2f

0,1
Comm: tcp://127.0.0.1:37167,Total threads: 10
Dashboard: http://127.0.0.1:34315/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:40309,
Local directory: /tmp/dask-scratch-space/worker-m4v8j0j0,Local directory: /tmp/dask-scratch-space/worker-m4v8j0j0

0,1
Comm: tcp://127.0.0.1:41663,Total threads: 10
Dashboard: http://127.0.0.1:43601/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:44501,
Local directory: /tmp/dask-scratch-space/worker-7cl76tbf,Local directory: /tmp/dask-scratch-space/worker-7cl76tbf

0,1
Comm: tcp://127.0.0.1:46133,Total threads: 10
Dashboard: http://127.0.0.1:33381/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:43233,
Local directory: /tmp/dask-scratch-space/worker-ky7ayxhm,Local directory: /tmp/dask-scratch-space/worker-ky7ayxhm

0,1
Comm: tcp://127.0.0.1:40665,Total threads: 10
Dashboard: http://127.0.0.1:44727/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:44211,
Local directory: /tmp/dask-scratch-space/worker-o60exlc2,Local directory: /tmp/dask-scratch-space/worker-o60exlc2

0,1
Comm: tcp://127.0.0.1:46769,Total threads: 10
Dashboard: http://127.0.0.1:33319/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:35541,
Local directory: /tmp/dask-scratch-space/worker-r4w70ewh,Local directory: /tmp/dask-scratch-space/worker-r4w70ewh

0,1
Comm: tcp://127.0.0.1:44265,Total threads: 10
Dashboard: http://127.0.0.1:37093/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:38693,
Local directory: /tmp/dask-scratch-space/worker-kznh1pu3,Local directory: /tmp/dask-scratch-space/worker-kznh1pu3

0,1
Comm: tcp://127.0.0.1:46583,Total threads: 10
Dashboard: http://127.0.0.1:44001/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:37105,
Local directory: /tmp/dask-scratch-space/worker-um54s3wz,Local directory: /tmp/dask-scratch-space/worker-um54s3wz

0,1
Comm: tcp://127.0.0.1:34657,Total threads: 10
Dashboard: http://127.0.0.1:46363/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:33151,
Local directory: /tmp/dask-scratch-space/worker-klkf375g,Local directory: /tmp/dask-scratch-space/worker-klkf375g

0,1
Comm: tcp://127.0.0.1:42771,Total threads: 10
Dashboard: http://127.0.0.1:44061/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:40541,
Local directory: /tmp/dask-scratch-space/worker-j9_9dchv,Local directory: /tmp/dask-scratch-space/worker-j9_9dchv

0,1
Comm: tcp://127.0.0.1:44043,Total threads: 10
Dashboard: http://127.0.0.1:41419/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:36877,
Local directory: /tmp/dask-scratch-space/worker-8tgvogu4,Local directory: /tmp/dask-scratch-space/worker-8tgvogu4

0,1
Comm: tcp://127.0.0.1:40835,Total threads: 10
Dashboard: http://127.0.0.1:35383/status,Memory: 10.34 GiB
Nanny: tcp://127.0.0.1:43067,
Local directory: /tmp/dask-scratch-space/worker-gk8z68wi,Local directory: /tmp/dask-scratch-space/worker-gk8z68wi


## Run GRNBoost2

In [6]:
%%time
network = grnboost2(expression_data=ex_matrix,
                    tf_names=tf_names,
                    client_or_address=custom_client)

This may cause some slowdown.
Consider scattering data ahead of time and using futures.


2024-03-30 12:51:54,035 - distributed.protocol.core - CRITICAL - Failed to Serialize
Traceback (most recent call last):
  File "/home/icb/samantha.bening/tools/apps/mamba/envs/grnboost/lib/python3.12/site-packages/distributed/protocol/core.py", line 109, in dumps
    frames[0] = msgpack.dumps(msg, default=_encode_default, use_bin_type=True)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/icb/samantha.bening/tools/apps/mamba/envs/grnboost/lib/python3.12/site-packages/msgpack/__init__.py", line 36, in packb
    return Packer(**kwargs).pack(o)
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "msgpack/_packer.pyx", line 294, in msgpack._cmsgpack.Packer.pack
  File "msgpack/_packer.pyx", line 300, in msgpack._cmsgpack.Packer.pack
  File "msgpack/_packer.pyx", line 297, in msgpack._cmsgpack.Packer.pack
  File "msgpack/_packer.pyx", line 264, in msgpack._cmsgpack.Packer._pack
  File "msgpack/_packer.pyx", line 231, in msgpack._cmsgpack.Packer._pack
  Fil

CancelledError: finalize-1ee4c30438bd043bbe7a9b628d803fbb

In [None]:
# filter for only importance >= 0.001 
network = network[network['importance'] >= 0.001]

network.to_csv(f'src/SCENICfiles/adj_full_TFs.csv',  header=False, index=False)

In [8]:
custom_client.shutdown()
local_cluster.close()
