In [None]:
### R
library(MuDataSeurat)
workdir <- './SCENIC/'
if(!dir.exists(workdir)){dir.create(workdir, recursive = TRUE)}
setwd(workdir)
seurat <- ReadH5AD('tumor_bigcell.h5ad')
write.csv(t(as.matrix(seurat@assays$RNA@counts)),file = "input_counts.csv")


In [1]:
### Generate input.loom file using Python code
import loompy as lp
import numpy as np
import scanpy as sc
import os, sys
outdir = "input.loom"
x = sc.read_csv("input_counts.csv")
row_attrs = {"Gene": np.array(x.var_names)}
col_attrs = {"CellID": np.array(x.obs_names)}
lp.create(outdir,x.X.transpose(),row_attrs,col_attrs)


In [1]:
dir=/SCENIC/human  # location of database
tfs=$dir/allTFs_hg38.txt
feather=$dir/hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.genes_vs_motifs.rankings.feather
tbl=$dir/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl

ls $tfs  $feather  $tbl  

input_loom=input.loom 

source activate pyscenic


# Infer the co-expression module of transcription factors and the provided expression matrix genes
echo "pyscenic grn"
pyscenic grn \
--num_workers 20 \
--output adj.sample.tsv \
--method grnboost2 \
$input_loom  \
$tfs 


# Perform TF motif enrichment analysis, identify direct targets, and obtain transcription factors (TFs) and their corresponding direct targets
echo "pyscenic cistarget"
pyscenic ctx \
adj.sample.tsv $feather \
--annotations_fname $tbl \
--expression_mtx_fname $input_loom  \
--mode "dask_multiprocessing" \
--output sce.regulons.csv \
--num_workers 20  \
--mask_dropouts


echo "pyscenic AUCell"
pyscenic aucell \
$input_loom \
sce.regulons.csv \
--output out_SCENIC.loom \
--num_workers 20 

echo "end"