# SCENIC Protocol - **DEBUGGING** - Mouse brain data set


__Author:__ Bram Van de Sande

__Date:__ 6 AUG 2019

__Outline:__ Acquistion and cleaning of selected scRNAseq data sets.

_Experiments:_

| Accession ID | Cancer type | 
| ------------- | ----------- | 
| GSE60361 | Mouse brain |

https://pyscenic.readthedocs.io/en/latest/tutorial.html

# 1. GRNBOOST2 test (Network inference)

## A. Dask connection and package import

In [1]:
from dask.distributed import Client, progress
c = Client()
c

0,1
Connection method: Direct,
Dashboard: http://myrelease-dask-scheduler:8787/status,

0,1
Comm: tcp://10.42.5.28:8786,Workers: 4
Dashboard: http://10.42.5.28:8787/status,Total threads: 24
Started: 7 hours ago,Total memory: 37.25 GiB

0,1
Comm: tcp://10.42.1.254:39701,Total threads: 6
Dashboard: http://10.42.1.254:8790/status,Memory: 9.31 GiB
Nanny: tcp://10.42.1.254:42119,
Local directory: /tmp/dask-worker-space/worker-144ne64p,Local directory: /tmp/dask-worker-space/worker-144ne64p
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 117.47 MiB,Spilled bytes: 0 B
Read bytes: 286.4842666865531 B,Write bytes: 1.48 kiB

0,1
Comm: tcp://10.42.2.14:32851,Total threads: 6
Dashboard: http://10.42.2.14:8790/status,Memory: 9.31 GiB
Nanny: tcp://10.42.2.14:38511,
Local directory: /tmp/dask-worker-space/worker-5zs4uon4,Local directory: /tmp/dask-worker-space/worker-5zs4uon4
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 117.72 MiB,Spilled bytes: 0 B
Read bytes: 285.3311034648257 B,Write bytes: 1.47 kiB

0,1
Comm: tcp://10.42.3.239:45117,Total threads: 6
Dashboard: http://10.42.3.239:8790/status,Memory: 9.31 GiB
Nanny: tcp://10.42.3.239:33515,
Local directory: /tmp/dask-worker-space/worker-nvhd9qn8,Local directory: /tmp/dask-worker-space/worker-nvhd9qn8
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 117.18 MiB,Spilled bytes: 0 B
Read bytes: 285.7081708622106 B,Write bytes: 1.47 kiB

0,1
Comm: tcp://10.42.5.29:40895,Total threads: 6
Dashboard: http://10.42.5.29:8790/status,Memory: 9.31 GiB
Nanny: tcp://10.42.5.29:35749,
Local directory: /tmp/dask-worker-space/worker-q_0xs9eu,Local directory: /tmp/dask-worker-space/worker-q_0xs9eu
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 117.11 MiB,Spilled bytes: 0 B
Read bytes: 285.49979864954616 B,Write bytes: 1.47 kiB


In [2]:
progress

<function distributed.diagnostics.progressbar.progress(*futures, notebook=None, multi=True, complete=True, **kwargs)>

In [3]:
import os
import glob
import pickle
import pandas as pd
import numpy as np

from dask.diagnostics import ProgressBar

from arboreto.utils import load_tf_names
from arboreto.algo import grnboost2

from ctxcore.rnkdb import FeatherRankingDatabase as RankingDatabase
from pyscenic.utils import modules_from_adjacencies, load_motifs
from pyscenic.prune import prune2df, df2regulons
from pyscenic.aucell import aucell

#import seaborn as sns
from IPython.core.debugger import set_trace

## B. Input file pre-processing

In [4]:
PROJECT = "/home/jovyan/work/dask_k8s/project"
DATA_FOLDER="/tmp"
RESOURCES_FOLDER="/resources"
#DATABASE_FOLDER = "/databases/"
DATABASE_FOLDER = PROJECT+"/databases/"
SCHEDULER="myrelease-dask-scheduler:8786"
#DATABASES_GLOB = "."+os.path.join(PROJECT, DATABASE_FOLDER, "mm9-*.mc9nr.genes_vs_motifs.rankings.feather")
DATABASES_GLOB = os.path.join(DATABASE_FOLDER, "mm9-*.mc9nr.genes_vs_motifs.rankings.feather")
MOTIF_ANNOTATIONS_FNAME = "."+os.path.join(PROJECT, RESOURCES_FOLDER, "motifs-v9-nr.mgi-m0.001-o0.0.tbl")
MM_TFS_FNAME = "."+os.path.join(PROJECT, RESOURCES_FOLDER, 'mm_mgi_tfs.txt')
SC_EXP_FNAME = "."+os.path.join(PROJECT, RESOURCES_FOLDER, "GSE60361_C1-3005-Expression.txt")
REGULONS_FNAME = os.path.join(PROJECT, DATA_FOLDER, "regulons.p")
MOTIFS_FNAME = os.path.join(PROJECT, DATA_FOLDER, "motifs.csv")

In [5]:
os.listdir("/home/jovyan/work/dask_k8s/project/databases/")

['mm9-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings.feather',
 'hidden',
 'mm9-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather']

In [6]:
#%debug

In [7]:
SC_EXP_FNAME

'./resources/GSE60361_C1-3005-Expression.txt'

In [8]:
MOTIF_ANNOTATIONS_FNAME

'./resources/motifs-v9-nr.mgi-m0.001-o0.0.tbl'

In [9]:
DATABASES_GLOB

'/home/jovyan/work/dask_k8s/project/databases/mm9-*.mc9nr.genes_vs_motifs.rankings.feather'

In [6]:
#Load expression data
def load_matrix():
    #set_trace()
    matrix = pd.read_csv(SC_EXP_FNAME, sep='\t', header=0, index_col=0).T
    return matrix

ex_matrix = load_matrix()
ex_matrix.shape

(3005, 19972)

In [7]:
#Reducing matrix
ex_matrix = ex_matrix.iloc[:, :12000]

In [12]:
#df memory usage
	
ex_matrix.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
Index: 3005 entries, 1772071015_C02 to 1772058148_F03
Columns: 19972 entries, Tspan12 to Gm20738_loc3
dtypes: int64(19972)
memory usage: 458.1 MB


In [8]:
#Load tf names
tf_names = load_tf_names(MM_TFS_FNAME)

In [14]:
len(tf_names)

1721

In [15]:
type(tf_names)

list

In [16]:
c

0,1
Connection method: Direct,
Dashboard: http://myrelease-dask-scheduler:8787/status,

0,1
Comm: tcp://10.42.5.21:8786,Workers: 24
Dashboard: http://10.42.5.21:8787/status,Total threads: 24
Started: 7 minutes ago,Total memory: 51.41 GiB

0,1
Comm: tcp://10.42.1.247:36391,Total threads: 1
Dashboard: http://10.42.1.247:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.1.247:39255,
Local directory: /tmp/dask-worker-space/worker-cjnwb3wb,Local directory: /tmp/dask-worker-space/worker-cjnwb3wb
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 117.33 MiB,Spilled bytes: 0 B
Read bytes: 270.04786051605674 B,Write bytes: 132.02339847451663 B

0,1
Comm: tcp://10.42.1.248:34263,Total threads: 1
Dashboard: http://10.42.1.248:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.1.248:44975,
Local directory: /tmp/dask-worker-space/worker-as5u0l2p,Local directory: /tmp/dask-worker-space/worker-as5u0l2p
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 117.35 MiB,Spilled bytes: 0 B
Read bytes: 270.8213540132588 B,Write bytes: 132.40155085092655 B

0,1
Comm: tcp://10.42.1.249:33103,Total threads: 1
Dashboard: http://10.42.1.249:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.1.249:44647,
Local directory: /tmp/dask-worker-space/worker-x4djk6c1,Local directory: /tmp/dask-worker-space/worker-x4djk6c1
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 117.86 MiB,Spilled bytes: 0 B
Read bytes: 271.4505694482213 B,Write bytes: 132.70916728579707 B

0,1
Comm: tcp://10.42.1.250:34879,Total threads: 1
Dashboard: http://10.42.1.250:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.1.250:42037,
Local directory: /tmp/dask-worker-space/worker-xxwv12dk,Local directory: /tmp/dask-worker-space/worker-xxwv12dk
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 117.41 MiB,Spilled bytes: 0 B
Read bytes: 270.02631792725117 B,Write bytes: 132.01286654221167 B

0,1
Comm: tcp://10.42.2.10:42167,Total threads: 1
Dashboard: http://10.42.2.10:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.2.10:35357,
Local directory: /tmp/dask-worker-space/worker-8qu16wi_,Local directory: /tmp/dask-worker-space/worker-8qu16wi_
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 117.81 MiB,Spilled bytes: 0 B
Read bytes: 271.241178517827 B,Write bytes: 132.6067983864932 B

0,1
Comm: tcp://10.42.2.11:44765,Total threads: 1
Dashboard: http://10.42.2.11:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.2.11:35139,
Local directory: /tmp/dask-worker-space/worker-m3_chppk,Local directory: /tmp/dask-worker-space/worker-m3_chppk
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 117.79 MiB,Spilled bytes: 0 B
Read bytes: 270.12841581281384 B,Write bytes: 132.06278106404233 B

0,1
Comm: tcp://10.42.2.12:35073,Total threads: 1
Dashboard: http://10.42.2.12:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.2.12:42425,
Local directory: /tmp/dask-worker-space/worker-8f901imv,Local directory: /tmp/dask-worker-space/worker-8f901imv
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 117.78 MiB,Spilled bytes: 0 B
Read bytes: 270.04533249990806 B,Write bytes: 132.02216255551062 B

0,1
Comm: tcp://10.42.2.13:45769,Total threads: 1
Dashboard: http://10.42.2.13:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.2.13:46457,
Local directory: /tmp/dask-worker-space/worker-yrvwhfmu,Local directory: /tmp/dask-worker-space/worker-yrvwhfmu
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 117.66 MiB,Spilled bytes: 0 B
Read bytes: 271.48391647080507 B,Write bytes: 132.72547027461582 B

0,1
Comm: tcp://10.42.2.7:43223,Total threads: 1
Dashboard: http://10.42.2.7:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.2.7:33819,
Local directory: /tmp/dask-worker-space/worker-awk_u31a,Local directory: /tmp/dask-worker-space/worker-awk_u31a
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 118.94 MiB,Spilled bytes: 0 B
Read bytes: 270.1764782289756 B,Write bytes: 132.086278245277 B

0,1
Comm: tcp://10.42.2.8:43579,Total threads: 1
Dashboard: http://10.42.2.8:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.2.8:45225,
Local directory: /tmp/dask-worker-space/worker-63kpa5yh,Local directory: /tmp/dask-worker-space/worker-63kpa5yh
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 119.51 MiB,Spilled bytes: 0 B
Read bytes: 270.13880434972236 B,Write bytes: 132.0678599043087 B

0,1
Comm: tcp://10.42.2.9:37507,Total threads: 1
Dashboard: http://10.42.2.9:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.2.9:41463,
Local directory: /tmp/dask-worker-space/worker-hgbhmkv4,Local directory: /tmp/dask-worker-space/worker-hgbhmkv4
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 118.58 MiB,Spilled bytes: 0 B
Read bytes: 270.8174315972628 B,Write bytes: 132.39963322532847 B

0,1
Comm: tcp://10.42.3.232:42149,Total threads: 1
Dashboard: http://10.42.3.232:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.3.232:35875,
Local directory: /tmp/dask-worker-space/worker-ylp0dhor,Local directory: /tmp/dask-worker-space/worker-ylp0dhor
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 117.52 MiB,Spilled bytes: 0 B
Read bytes: 270.20466658898147 B,Write bytes: 132.10005922127982 B

0,1
Comm: tcp://10.42.3.233:45383,Total threads: 1
Dashboard: http://10.42.3.233:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.3.233:41367,
Local directory: /tmp/dask-worker-space/worker-30tn22fs,Local directory: /tmp/dask-worker-space/worker-30tn22fs
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 118.45 MiB,Spilled bytes: 0 B
Read bytes: 270.0223098594874 B,Write bytes: 132.01090704241605 B

0,1
Comm: tcp://10.42.3.234:38149,Total threads: 1
Dashboard: http://10.42.3.234:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.3.234:32969,
Local directory: /tmp/dask-worker-space/worker-tpknjoio,Local directory: /tmp/dask-worker-space/worker-tpknjoio
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 6.0%,Last seen: Just now
Memory usage: 118.10 MiB,Spilled bytes: 0 B
Read bytes: 271.2691559577222 B,Write bytes: 132.62047624599754 B

0,1
Comm: tcp://10.42.3.235:36051,Total threads: 1
Dashboard: http://10.42.3.235:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.3.235:38893,
Local directory: /tmp/dask-worker-space/worker-xwmblxj0,Local directory: /tmp/dask-worker-space/worker-xwmblxj0
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 117.88 MiB,Spilled bytes: 0 B
Read bytes: 353.17803259719875 B,Write bytes: 215.49846056778227 B

0,1
Comm: tcp://10.42.3.236:34377,Total threads: 1
Dashboard: http://10.42.3.236:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.3.236:36129,
Local directory: /tmp/dask-worker-space/worker-ldu2w7io,Local directory: /tmp/dask-worker-space/worker-ldu2w7io
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 117.50 MiB,Spilled bytes: 0 B
Read bytes: 270.04672629877837 B,Write bytes: 132.02284396829165 B

0,1
Comm: tcp://10.42.3.237:39205,Total threads: 1
Dashboard: http://10.42.3.237:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.3.237:36849,
Local directory: /tmp/dask-worker-space/worker-2onah08r,Local directory: /tmp/dask-worker-space/worker-2onah08r
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 117.39 MiB,Spilled bytes: 0 B
Read bytes: 354.9280759747835 B,Write bytes: 216.56628364563062 B

0,1
Comm: tcp://10.42.3.238:45007,Total threads: 1
Dashboard: http://10.42.3.238:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.3.238:36557,
Local directory: /tmp/dask-worker-space/worker-wgscmhv4,Local directory: /tmp/dask-worker-space/worker-wgscmhv4
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 117.39 MiB,Spilled bytes: 0 B
Read bytes: 270.59189589700446 B,Write bytes: 132.2893713274244 B

0,1
Comm: tcp://10.42.5.20:45739,Total threads: 1
Dashboard: http://10.42.5.20:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.5.20:43621,
Local directory: /tmp/dask-worker-space/worker-30clqd5d,Local directory: /tmp/dask-worker-space/worker-30clqd5d
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 117.13 MiB,Spilled bytes: 0 B
Read bytes: 270.909516391459 B,Write bytes: 132.44465245804662 B

0,1
Comm: tcp://10.42.5.22:38193,Total threads: 1
Dashboard: http://10.42.5.22:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.5.22:46053,
Local directory: /tmp/dask-worker-space/worker-m3pitc0g,Local directory: /tmp/dask-worker-space/worker-m3pitc0g
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 116.73 MiB,Spilled bytes: 0 B
Read bytes: 270.19602884132405 B,Write bytes: 132.0958363224251 B

0,1
Comm: tcp://10.42.5.24:33943,Total threads: 1
Dashboard: http://10.42.5.24:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.5.24:45087,
Local directory: /tmp/dask-worker-space/worker-832lgjoh,Local directory: /tmp/dask-worker-space/worker-832lgjoh
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 117.27 MiB,Spilled bytes: 0 B
Read bytes: 270.0710978386319 B,Write bytes: 132.03475894333116 B

0,1
Comm: tcp://10.42.5.25:32843,Total threads: 1
Dashboard: http://10.42.5.25:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.5.25:37729,
Local directory: /tmp/dask-worker-space/worker-c1pf9dqx,Local directory: /tmp/dask-worker-space/worker-c1pf9dqx
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 117.50 MiB,Spilled bytes: 0 B
Read bytes: 270.5082449558536 B,Write bytes: 132.24847531175064 B

0,1
Comm: tcp://10.42.5.26:40021,Total threads: 1
Dashboard: http://10.42.5.26:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.5.26:37023,
Local directory: /tmp/dask-worker-space/worker-cs358iu7,Local directory: /tmp/dask-worker-space/worker-cs358iu7
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 117.10 MiB,Spilled bytes: 0 B
Read bytes: 0.0 B,Write bytes: 0.0 B

0,1
Comm: tcp://10.42.5.27:34341,Total threads: 1
Dashboard: http://10.42.5.27:8790/status,Memory: 2.14 GiB
Nanny: tcp://10.42.5.27:41873,
Local directory: /tmp/dask-worker-space/worker-q5ak6gju,Local directory: /tmp/dask-worker-space/worker-q5ak6gju
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 117.04 MiB,Spilled bytes: 0 B
Read bytes: 269.9208246650183 B,Write bytes: 131.9612920584534 B


## C. Setting the start timestamp

In [17]:
from datetime import datetime
currentDateAndTime = datetime.now()
currentDateAndTime = currentDateAndTime.strftime("%Y_%m_%d_%H-%M-%S")
print(currentDateAndTime)

2023_02_25_21-50-51


In [18]:
#Start date 
!date -u +"%c"

Sat 25 Feb 2023 09:50:51 PM UTC


In [19]:
#Start date Unix
!date +"%s"

1677361852


## D. GRNboost2 run

### i. Running

In [20]:
#Run GRNboost2 in debug mode
#
#if __name__ == '__main__':
#    #c = Client()
#    from dask.distributed import performance_report
#    with performance_report(filename="dask-report_%s.html"%(currentDateAndTime)):
#        def grn(var1, var2, var3):
#            set_trace() #Activating debug mode
#            return grnboost2(var1, verbose=var2, client_or_address=var3)
#        adjacencies2 = grn(ex_matrix, var2=True, var3=c)

In [21]:
#GRNBoost2 in normal mode
if __name__ == '__main__':
    from dask.distributed import performance_report
    with performance_report(filename="dask-report_%s.html"%(currentDateAndTime)):
       adjacencies = grnboost2(ex_matrix, tf_names=tf_names, verbose=True, client_or_address=c)

preparing dask client
parsing input
creating dask graph
24 partitions
computing dask graph
not shutting down client, client was created externally
finished


In [22]:
#end date 
!date -u +"%c"

Sat 25 Feb 2023 10:57:38 PM UTC


### ii. Saving file

In [24]:
adjacencies.head()

Unnamed: 0,TF,target,importance
65,Gm14305,OTTMUSG00000016609_loc4,399.775712
65,Gm14305,OTTMUSG00000016609_loc3,242.122851
868,Rpl35,Rpl32,148.716645
868,Rpl35,Rpl18a,145.586963
82,Mef2c,Camk2n1,142.32189


In [23]:
#Getting adjacencies dimensions
adjacencies.shape

(5915026, 3)

In [5]:
#Dumping adjacencies for later
adjacencies.to_csv("intermediate_results/adjacencies_full-matrix.csv")

NameError: name 'adjacencies' is not defined

### iii. Re-loading file

In [9]:
adjacencies_dump = pd.read_csv("intermediate_results/adjacencies_12000.csv", index_col=0)

In [10]:
adjacencies_dump.head()

Unnamed: 0,TF,target,importance
6248,Zfp433_loc2,Zfp433_loc1,664.614695
7831,Srp54b_loc2,Srp54b_loc1,599.16242
7829,1700047I17Rik2_loc2,Fam177a_loc1,581.545982
7827,Fam177a_loc1,1700047I17Rik2_loc2,571.602001
9251,Gm16677_loc2,Gm16677_loc1,560.672568


In [11]:
adjacencies_dump.shape

(6986837, 3)

# 2. Regulon prediction

**Note**: The nodes of the clusters need to have access to a shared network drive on which the ranking databases are stored.

## A. Obtaining the modules

In [12]:
#Obtaining module list
modules = list(modules_from_adjacencies(adjacencies_dump, ex_matrix))


2023-02-27 21:23:13,736 - pyscenic.utils - INFO - Calculating Pearson correlations.

	Dropout masking is currently set to [False].

2023-02-27 21:26:07,650 - pyscenic.utils - INFO - Creating modules.


In [15]:
#Checking object type
type(modules)

list

## B. Inspecting the Feather Ranking database object

In [16]:
#Loading ranking databases
db_fnames = glob.glob(DATABASES_GLOB)
def name(fname):
    return os.path.splitext(os.path.basename(fname))[0]
dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
dbs

[FeatherRankingDatabase(name="mm9-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings"),
 FeatherRankingDatabase(name="mm9-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings")]

In [17]:
#Finding out what type of object is created by the RankingDatabase() function
type(dbs[0])

ctxcore.rnkdb.FeatherRankingDatabase

In [18]:
#Finding what is stored in db_fnames
db_fnames

['/home/jovyan/work/dask_k8s/project/databases/mm9-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings.feather',
 '/home/jovyan/work/dask_k8s/project/databases/mm9-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather']

In [19]:
#Finding the name() function output
name(db_fnames[0])

'mm9-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings'

## C. Debugging database loading

In [29]:
#Debugging the load method
def debug_database_load(module):
    set_trace()
    return dbs[0].load(module)
#    
#dbs[0].load(modules[0])
debug_database_load(modules[0])

> [0;32m/tmp/ipykernel_1001/4019392965.py[0m(4)[0;36mdebug_database_load[0;34m()[0m
[0;32m      2 [0;31m[0;32mdef[0m [0mdebug_database_load[0m[0;34m([0m[0mmodule[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      3 [0;31m    [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m----> 4 [0;31m    [0;32mreturn[0m [0mdbs[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m.[0m[0mload[0m[0;34m([0m[0mmodule[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      5 [0;31m[0;31m#[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      6 [0;31m[0;31m#dbs[0].load(modules[0])[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  s


--Call--
> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/rnkdb.py[0m(128)[0;36mload[0;34m()[0m
[0;32m    126 [0;31m        )
[0m[0;32m    127 [0;31m[0;34m[0m[0m
[0m[0;32m--> 128 [0;31m    [0;32mdef[0m [0mload[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mgs[0m[0;34m:[0m [0mGeneSignature[0m[0;34m)[0m [0;34m->[0m [0mpd[0m[0;34m.[0m[0mDataFrame[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    129 [0;31m        [0;31m# For some genes in the signature there might not be a rank available in the database.[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    130 [0;31m        [0mgene_set[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mgeneset[0m[0;34m.[0m[0mintersection[0m[0;34m([0m[0mset[0m[0;34m([0m[0mgs[0m[0;34m.[0m[0mgenes[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  s


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/rnkdb.py[0m(130)[0;36mload[0;34m()[0m
[0;32m    128 [0;31m    [0;32mdef[0m [0mload[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mgs[0m[0;34m:[0m [0mGeneSignature[0m[0;34m)[0m [0;34m->[0m [0mpd[0m[0;34m.[0m[0mDataFrame[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    129 [0;31m        [0;31m# For some genes in the signature there might not be a rank available in the database.[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 130 [0;31m        [0mgene_set[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mgeneset[0m[0;34m.[0m[0mintersection[0m[0;34m([0m[0mset[0m[0;34m([0m[0mgs[0m[0;34m.[0m[0mgenes[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    131 [0;31m[0;34m[0m[0m
[0m[0;32m    132 [0;31m        return self.ct_db.subset_to_pandas(
[0m


ipdb>  n


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/rnkdb.py[0m(132)[0;36mload[0;34m()[0m
[0;32m    130 [0;31m        [0mgene_set[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mgeneset[0m[0;34m.[0m[0mintersection[0m[0;34m([0m[0mset[0m[0;34m([0m[0mgs[0m[0;34m.[0m[0mgenes[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    131 [0;31m[0;34m[0m[0m
[0m[0;32m--> 132 [0;31m        return self.ct_db.subset_to_pandas(
[0m[0;32m    133 [0;31m            region_or_gene_ids=RegionOrGeneIDs(
[0m[0;32m    134 [0;31m                [0mregion_or_gene_ids[0m[0;34m=[0m[0mgene_set[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  s


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/rnkdb.py[0m(133)[0;36mload[0;34m()[0m
[0;32m    131 [0;31m[0;34m[0m[0m
[0m[0;32m    132 [0;31m        return self.ct_db.subset_to_pandas(
[0m[0;32m--> 133 [0;31m            region_or_gene_ids=RegionOrGeneIDs(
[0m[0;32m    134 [0;31m                [0mregion_or_gene_ids[0m[0;34m=[0m[0mgene_set[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    135 [0;31m                [0mregions_or_genes_type[0m[0;34m=[0m[0mself[0m[0;34m.[0m[0mct_db[0m[0;34m.[0m[0mall_region_or_gene_ids[0m[0;34m.[0m[0mtype[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  s


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/rnkdb.py[0m(134)[0;36mload[0;34m()[0m
[0;32m    132 [0;31m        return self.ct_db.subset_to_pandas(
[0m[0;32m    133 [0;31m            region_or_gene_ids=RegionOrGeneIDs(
[0m[0;32m--> 134 [0;31m                [0mregion_or_gene_ids[0m[0;34m=[0m[0mgene_set[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    135 [0;31m                [0mregions_or_genes_type[0m[0;34m=[0m[0mself[0m[0;34m.[0m[0mct_db[0m[0;34m.[0m[0mall_region_or_gene_ids[0m[0;34m.[0m[0mtype[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    136 [0;31m            )
[0m


ipdb>  s


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/rnkdb.py[0m(135)[0;36mload[0;34m()[0m
[0;32m    133 [0;31m            region_or_gene_ids=RegionOrGeneIDs(
[0m[0;32m    134 [0;31m                [0mregion_or_gene_ids[0m[0;34m=[0m[0mgene_set[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 135 [0;31m                [0mregions_or_genes_type[0m[0;34m=[0m[0mself[0m[0;34m.[0m[0mct_db[0m[0;34m.[0m[0mall_region_or_gene_ids[0m[0;34m.[0m[0mtype[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    136 [0;31m            )
[0m[0;32m    137 [0;31m        )
[0m


ipdb>  s


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/rnkdb.py[0m(133)[0;36mload[0;34m()[0m
[0;32m    131 [0;31m[0;34m[0m[0m
[0m[0;32m    132 [0;31m        return self.ct_db.subset_to_pandas(
[0m[0;32m--> 133 [0;31m            region_or_gene_ids=RegionOrGeneIDs(
[0m[0;32m    134 [0;31m                [0mregion_or_gene_ids[0m[0;34m=[0m[0mgene_set[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    135 [0;31m                [0mregions_or_genes_type[0m[0;34m=[0m[0mself[0m[0;34m.[0m[0mct_db[0m[0;34m.[0m[0mall_region_or_gene_ids[0m[0;34m.[0m[0mtype[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  s


--Call--
> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/datatypes.py[0m(217)[0;36m__init__[0;34m()[0m
[0;32m    215 [0;31m            )
[0m[0;32m    216 [0;31m[0;34m[0m[0m
[0m[0;32m--> 217 [0;31m    def __init__(
[0m[0;32m    218 [0;31m        [0mself[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    219 [0;31m        [0mregion_or_gene_ids[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mstr[0m[0;34m][0m[0;34m,[0m [0mSet[0m[0;34m[[0m[0mstr[0m[0;34m][0m[0;34m,[0m [0mTuple[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0;34m...[0m[0;34m][0m[0;34m][0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  n


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/datatypes.py[0m(230)[0;36m__init__[0;34m()[0m
[0;32m    228 [0;31m        """
[0m[0;32m    229 [0;31m[0;34m[0m[0m
[0m[0;32m--> 230 [0;31m        [0;32mif[0m [0misinstance[0m[0;34m([0m[0mregions_or_genes_type[0m[0;34m,[0m [0mstr[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    231 [0;31m            [0mregions_or_genes_type[0m [0;34m=[0m [0mRegionsOrGenesType[0m[0;34m.[0m[0mfrom_str[0m[0;34m([0m[0mregions_or_genes_type[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    232 [0;31m[0;34m[0m[0m
[0m


ipdb>  n


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/datatypes.py[0m(233)[0;36m__init__[0;34m()[0m
[0;32m    231 [0;31m            [0mregions_or_genes_type[0m [0;34m=[0m [0mRegionsOrGenesType[0m[0;34m.[0m[0mfrom_str[0m[0;34m([0m[0mregions_or_genes_type[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    232 [0;31m[0;34m[0m[0m
[0m[0;32m--> 233 [0;31m        [0;32mif[0m [0misinstance[0m[0;34m([0m[0mregion_or_gene_ids[0m[0;34m,[0m [0mset[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    234 [0;31m            [0mregion_or_gene_ids[0m [0;34m=[0m [0msorted[0m[0;34m([0m[0mregion_or_gene_ids[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    235 [0;31m[0;34m[0m[0m
[0m


ipdb>  r


--Return--
None
> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/datatypes.py[0m(245)[0;36m__init__[0;34m()[0m
[0;32m    243 [0;31m        [0mself[0m[0;34m.[0m[0mids[0m [0;34m=[0m [0mtuple[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mids_dict[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    244 [0;31m        [0mself[0m[0;34m.[0m[0mids_set[0m [0;34m=[0m [0mset[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mids_dict[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 245 [0;31m        [0mself[0m[0;34m.[0m[0mtype[0m [0;34m=[0m [0mregions_or_genes_type[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    246 [0;31m[0;34m[0m[0m
[0m[0;32m    247 [0;31m    [0;32mdef[0m [0m__str__[0m[0;34m([0m[0mself[0m[0;34m)[0m [0;34m->[0m [0mstr[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  r


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/rnkdb.py[0m(132)[0;36mload[0;34m()[0m
[0;32m    130 [0;31m        [0mgene_set[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mgeneset[0m[0;34m.[0m[0mintersection[0m[0;34m([0m[0mset[0m[0;34m([0m[0mgs[0m[0;34m.[0m[0mgenes[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    131 [0;31m[0;34m[0m[0m
[0m[0;32m--> 132 [0;31m        return self.ct_db.subset_to_pandas(
[0m[0;32m    133 [0;31m            region_or_gene_ids=RegionOrGeneIDs(
[0m[0;32m    134 [0;31m                [0mregion_or_gene_ids[0m[0;34m=[0m[0mgene_set[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  s


--Call--
> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/ctdb.py[0m(747)[0;36msubset_to_pandas[0;34m()[0m
[0;32m    745 [0;31m            )
[0m[0;32m    746 [0;31m[0;34m[0m[0m
[0m[0;32m--> 747 [0;31m    def subset_to_pandas(
[0m[0;32m    748 [0;31m        [0mself[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    749 [0;31m        [0mregion_or_gene_ids[0m[0;34m:[0m [0mRegionOrGeneIDs[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  s


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/ctdb.py[0m(778)[0;36msubset_to_pandas[0;34m()[0m
[0;32m    776 [0;31m            [0mfound_region_or_gene_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    777 [0;31m            [0mnot_found_region_or_gene_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 778 [0;31m        ) = self.has_all_region_or_gene_ids(region_or_gene_ids)
[0m[0;32m    779 [0;31m[0;34m[0m[0m
[0m[0;32m    780 [0;31m        [0;32mif[0m [0mcontains_all_input_gene_ids_or_regions_ids[0m [0;32mis[0m [0;32mFalse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  n


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/ctdb.py[0m(774)[0;36msubset_to_pandas[0;34m()[0m
[0;32m    772 [0;31m        """
[0m[0;32m    773 [0;31m[0;34m[0m[0m
[0m[0;32m--> 774 [0;31m        (
[0m[0;32m    775 [0;31m            [0mcontains_all_input_gene_ids_or_regions_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    776 [0;31m            [0mfound_region_or_gene_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  n


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/ctdb.py[0m(775)[0;36msubset_to_pandas[0;34m()[0m
[0;32m    773 [0;31m[0;34m[0m[0m
[0m[0;32m    774 [0;31m        (
[0m[0;32m--> 775 [0;31m            [0mcontains_all_input_gene_ids_or_regions_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    776 [0;31m            [0mfound_region_or_gene_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    777 [0;31m            [0mnot_found_region_or_gene_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  n


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/ctdb.py[0m(776)[0;36msubset_to_pandas[0;34m()[0m
[0;32m    774 [0;31m        (
[0m[0;32m    775 [0;31m            [0mcontains_all_input_gene_ids_or_regions_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 776 [0;31m            [0mfound_region_or_gene_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    777 [0;31m            [0mnot_found_region_or_gene_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    778 [0;31m        ) = self.has_all_region_or_gene_ids(region_or_gene_ids)
[0m


ipdb>  n


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/ctdb.py[0m(777)[0;36msubset_to_pandas[0;34m()[0m
[0;32m    775 [0;31m            [0mcontains_all_input_gene_ids_or_regions_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    776 [0;31m            [0mfound_region_or_gene_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 777 [0;31m            [0mnot_found_region_or_gene_ids[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    778 [0;31m        ) = self.has_all_region_or_gene_ids(region_or_gene_ids)
[0m[0;32m    779 [0;31m[0;34m[0m[0m
[0m


ipdb>  p self.ct_db_filename


PosixPath('/home/jovyan/work/dask_k8s/project/databases/mm9-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings.feather')


ipdb>  print self.df_cached


*** SyntaxError: Missing parentheses in call to 'print'. Did you mean print(self.df_cached)?


ipdb>  p self.df_cached


None


ipdb>  n


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/ctdb.py[0m(780)[0;36msubset_to_pandas[0;34m()[0m
[0;32m    778 [0;31m        ) = self.has_all_region_or_gene_ids(region_or_gene_ids)
[0m[0;32m    779 [0;31m[0;34m[0m[0m
[0m[0;32m--> 780 [0;31m        [0;32mif[0m [0mcontains_all_input_gene_ids_or_regions_ids[0m [0;32mis[0m [0;32mFalse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    781 [0;31m            raise ValueError(
[0m[0;32m    782 [0;31m                [0;34mf"Not all provided {self.all_region_or_gene_ids.type} are found: {not_found_region_or_gene_ids}"[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  n


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/ctdb.py[0m(785)[0;36msubset_to_pandas[0;34m()[0m
[0;32m    783 [0;31m            )
[0m[0;32m    784 [0;31m[0;34m[0m[0m
[0m[0;32m--> 785 [0;31m        [0mengine[0m [0;34m=[0m [0mengine[0m [0;32mif[0m [0mengine[0m [0;32melse[0m [0mself[0m[0;34m.[0m[0mengine[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    786 [0;31m[0;34m[0m[0m
[0m[0;32m    787 [0;31m        [0;31m# Fetch scores or rankings for input region IDs or gene IDs from cisTarget database file for region IDs or[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  n


> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/ctdb.py[0m(789)[0;36msubset_to_pandas[0;34m()[0m
[0;32m    787 [0;31m        [0;31m# Fetch scores or rankings for input region IDs or gene IDs from cisTarget database file for region IDs or[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    788 [0;31m        [0;31m# gene IDs which were not prefetched in previous calls.[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 789 [0;31m        [0mself[0m[0;34m.[0m[0mprefetch[0m[0;34m([0m[0mregion_or_gene_ids[0m[0;34m=[0m[0mregion_or_gene_ids[0m[0;34m,[0m [0mengine[0m[0;34m=[0m[0mengine[0m[0;34m,[0m [0msort[0m[0;34m=[0m[0;32mTrue[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    790 [0;31m[0;34m[0m[0m
[0m[0;32m    791 [0;31m        [0;32mif[0m [0;32mnot[0m [0mself[0m[0;34m.[0m[0mdf_cached[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  n


FileNotFoundError: [Errno 2] Failed to open local file '/home/jovyan/work/dask_k8s/project/databases/mm9-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings.feather'. Detail: [errno 2] No such file or directory
> [0;32m/opt/conda/lib/python3.8/site-packages/ctxcore/ctdb.py[0m(789)[0;36msubset_to_pandas[0;34m()[0m
[0;32m    787 [0;31m        [0;31m# Fetch scores or rankings for input region IDs or gene IDs from cisTarget database file for region IDs or[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    788 [0;31m        [0;31m# gene IDs which were not prefetched in previous calls.[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 789 [0;31m        [0mself[0m[0;34m.[0m[0mprefetch[0m[0;34m([0m[0mregion_or_gene_ids[0m[0;34m=[0m[0mregion_or_gene_ids[0m[0;34m,[0m [0mengine[0m[0;34m=[0m[0mengine[0m[0;34m,[0m [0msort[0m[0;34m=[0m[0;32mTrue[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    790 [0;31m[0;34m[0m[0m
[0m[0;32m    791

ipdb>  c


FileNotFoundError: [Errno 2] Failed to open local file '/home/jovyan/work/dask_k8s/project/databases/mm9-500bp-upstream-7species.mc9nr.genes_vs_motifs.rankings.feather'. Detail: [errno 2] No such file or directory

In [28]:
%debug

> [0;32m/home/jovyan/work/dask_k8s/project/pyarrow/error.pxi[0m(113)[0;36mpyarrow.lib.check_status[0;34m()[0m



ipdb>  p module


*** NameError: name 'module' is not defined


ipdb>  q


## D. Running prune2df

### i. Normal Run

In [81]:
# Calculate a list of enriched motifs and the corresponding target genes for all modules.
with ProgressBar():
    set_trace()
    df = prune2df(dbs, modules, MOTIF_ANNOTATIONS_FNAME, client_or_address=c)

KeyboardInterrupt: 

### ii. Debug run

In [None]:
%xmode verbose
%pdb on
def prune_debug():
    with ProgressBar():
        #set_trace()
        return prune2df(dbs, modules, MOTIF_ANNOTATIONS_FNAME, client_or_address=c)
    
df = prune_debug()

Exception reporting mode: Verbose
Automatic pdb calling has been turned ON


  (["('from-delayed-c4019a539b12fc6da49b4942189d12b4 ... b4', 82669)"],)
Consider scattering large objects ahead of time
with client.scatter to reduce scheduler burden and 
keep data on workers

    future = client.submit(func, big_data)    # bad

    big_future = client.scatter(big_data)     # good
    future = client.submit(func, big_future)  # good


ipdb>  s


In [33]:
# Create regulons from this table of enriched motifs.
regulons = df2regulons(df)

NameError: name 'df' is not defined

2023-02-22 01:17:07,149 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client
2023-02-22 01:17:07,152 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client


In [None]:
# Save the enriched motifs and the discovered regulons to disk.
df.to_csv(MOTIFS_FNAME)
with open(REGULONS_FNAME, "wb") as f:
    pickle.dump(regulons, f)