In [None]:
import os
import glob
import pickle
import pandas as pd
import anndata as ad
import numpy as np

from dask.diagnostics import ProgressBar

from arboreto.utils import load_tf_names
from arboreto.algo import grnboost2

from ctxcore.rnkdb import FeatherRankingDatabase as RankingDatabase
from pyscenic.utils import modules_from_adjacencies, load_motifs
from pyscenic.prune import prune2df, df2regulons
from pyscenic.aucell import aucell

#import seaborn as sns

In [None]:
# read in expression matrix 
# set a working directory
wdir = "/lustre/groups/ml01/workspace/samantha.bening/Bachelor/"
os.chdir( wdir )

adata = ad.read_h5ad('data2/veo_ibd_balanced.h5ad')
# make expression matrix 
ex_matrix = adata.to_df()

In [None]:
# load ranking databases
db_fnames = glob.glob("/lustre/groups/ml01/workspace/samantha.bening/data/scenic_dbs/hg38_*.genes_vs_motifs.rankings.feather")
def name(fname):
    return os.path.splitext(os.path.basename(fname))[0]
dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
dbs

In [None]:
# load adjacencies
modules = list(modules_from_adjacencies(adjacencies, ex_matrix))

In [None]:
# Calculate a list of enriched motifs and the corresponding target genes for all modules.
with ProgressBar():
    df = prune2df(dbs, modules, "/lustre/groups/ml01/workspace/samantha.bening/data/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl")

# Create regulons from this table of enriched motifs.
regulons = df2regulons(df)

# Save the enriched motifs and the discovered regulons to disk.
df.to_csv("SCENICfiles/motifs.csv")
with open("SCENICfiles/regulons.p", "wb") as f:
    pickle.dump(regulons, f)

In [None]:
# AUcell
auc_mtx = aucell(ex_matrix, regulons, num_workers=4)
sns.clustermap(auc_mtx, figsize=(8,8))