## Script to load in a processed seurat object and run pySCENIC

To get your processed Seurat object into adata format you can use conversion functions, such as:
```r
#load custom functions
source("/pl/active/CSUClinHeme/users/dylan/repos/scrna-seq/analysis-code/customFunctions_Seuratv5.R")

#load in processed R data and subset on cells of interest
seu.obj <- readRDS("../output/s3/240201_bm_cd34_removed_disconnected.rds")


#stash new idents
seu.obj.sub <- subset(seu.obj, subset = clusterID_integrated.harmony %in% c(5,9,13,14,16,10))

cnts <- seu.obj.sub@assays$RNA$counts
cnts <- orthogene::convert_orthologs(gene_df = cnts,
                                        gene_input = "rownames", 
                                        gene_output = "rownames", 
                                        input_species = "dog",
                                        output_species = "human",
                                        non121_strategy = "drop_both_species") 
rownames(cnts) <- unname(rownames(cnts))

seu.obj <- CreateSeuratObject(cnts, project = "humanConvert", assay = "RNA",
                                  min.cells = 0, min.features = 0, names.field = 1,
                                  names.delim = "_", meta.data = seu.obj.sub@meta.data)

#covert back to an older Seurat object for compatiability with 
seu.obj[["RNA"]] <- as(object = seu.obj[["RNA"]], Class = "Assay")
seu.obj <- NormalizeData(seu.obj)

SaveH5Seurat(seu.obj, filename = "../output/s3/main_branches_hu.h5Seurat", verbose = TRUE, overwrite = T)
Convert("../output/s3/main_branches_hu.h5Seurat", dest = "h5ad", overwrite = T)

```

### Note: TF files were obtained from https://resources.aertslab.org/cistarget/

In [1]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import loompy as lp
from matplotlib.pyplot import rc_context
from MulticoreTSNE import MulticoreTSNE as TSNE
from pyscenic.export import export2loom, add_scenic_metadata

import seaborn as sns
import matplotlib.pyplot as plt
import scipy 

import glob

import json
import zlib
import base64

In [2]:
FIGURES_FOLDERNAME="../output/scenic/"
def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.tight_layout()
    fig.savefig(os.path.join(folder, fname), format='svg')

In [3]:
#load in the data
adata = sc.read_h5ad("../output/s3/main_branches_hu.h5ad")
adata

AnnData object with n_obs × n_vars = 4893 × 11606
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'percent.mt', 'percent.hbm', 'percent.ppbp', 'RNA_snn_res.0.1', 'seurat_clusters', 'int.clusID', 'pANN_0.25_0.01_274', 'DF.classifications_0.25_0.01_274', 'doublet', 'S.Score', 'G2M.Score', 'Phase', 'clusters', 'pANN_0.25_0.19_108', 'DF.classifications_0.25_0.19_108', 'pANN_0.25_0.02_98', 'DF.classifications_0.25_0.02_98', 'pANN_0.25_0.11_115', 'DF.classifications_0.25_0.11_115', 'pANN_0.25_0.005_95', 'DF.classifications_0.25_0.005_95', 'unintegrated_clusters', 'RNA_snn_res.0.6', 'clusterID_integrated.cca', 'clusterID_integrated.harmony', 'clusterID_integrated.joint', 'clusterID_integrated.rcpa', 'name', 'colz', 'cellSource', 'minorIdent'
    var: 'features'

In [4]:
#convert from numeric to string
anno = adata.obs
anno['clusterID_integrated.harmony'] = anno['clusterID_integrated.harmony'].astype(str)
adata.obs = anno

In [5]:
#convert index to gene symbol
tempAdata = adata.raw.to_adata()
tempAdata.var_names = adata.var['features']
adata.raw = tempAdata

In [None]:
#cant run b/c UMAP coords weren't brought over
# sc.pl.umap(adata, color=['IGHM','majorID_sub2','name'], use_raw = False)

In [6]:
# create basic row and column attributes for the loom file:
row_attrs = {
    "Gene": np.array(adata.var_names) ,
}
col_attrs = {
    "CellID": np.array(adata.obs_names) ,
    "nGene": np.array( np.sum(adata.X.transpose()>0 , axis=0)).flatten() ,
    "nUMI": np.array( np.sum(adata.X.transpose() , axis=0)).flatten() ,
}
lp.create( "../output/s3/main_branches_hu.loom", adata.X.transpose(), row_attrs, col_attrs)

The easiest way to run the code is to use a sbatch script, so generate a script with:

In [7]:
!echo "#!/usr/bin/env bash" > cute_pyScenic_grn.sbatch
!echo "#SBATCH --job-name=pySCENIC_grn" >> cute_pyScenic_grn.sbatch

!echo "#SBATCH --nodes=1" >> cute_pyScenic_grn.sbatch
!echo "#SBATCH --ntasks=16" >> cute_pyScenic_grn.sbatch
!echo "#SBATCH --time=00:60:00" >> cute_pyScenic_grn.sbatch

!echo "#SBATCH --partition=atesting" >> cute_pyScenic_grn.sbatch
!echo "#SBATCH --qos=normal" >> cute_pyScenic_grn.sbatch

!echo "#SBATCH --mail-type=END" >> cute_pyScenic_grn.sbatch
!echo "#SBATCH --mail-user=dyammons@colostate.edu" >> cute_pyScenic_grn.sbatch
!echo "#SBATCH --output=pySCENIC_grn-%j.log" >> cute_pyScenic_grn.sbatch

!echo "" >> cute_pyScenic_grn.sbatch

!echo "module purge" >> cute_pyScenic_grn.sbatch

!echo "" >> cute_pyScenic_grn.sbatch

!echo "singularity exec -B $PWD/../ ../software/aertslab-pyscenic-scanpy-0.12.1-1.9.1.sif pyscenic grn \\" >> cute_pyScenic_grn.sbatch
!echo "--num_workers \$SLURM_NTASKS \\" >> cute_pyScenic_grn.sbatch
!echo "-o ../output/scenic/adj_main.csv \\" >> cute_pyScenic_grn.sbatch
!echo "../output/s3/main_branches_hu.loom \\" >> cute_pyScenic_grn.sbatch
!echo "./metaData/allTFs_hg38.txt" >> cute_pyScenic_grn.sbatch

In [8]:
#submit the job
!sbatch cute_pyScenic_grn.sbatch

Submitted batch job 4692977


In [9]:
!echo "#!/usr/bin/env bash" > cute_pyScenic_ctx.sbatch
!echo "#SBATCH --job-name=pySCENIC_ctx" >> cute_pyScenic_ctx.sbatch

!echo "#SBATCH --nodes=1" >> cute_pyScenic_ctx.sbatch
!echo "#SBATCH --ntasks=16" >> cute_pyScenic_ctx.sbatch
!echo "#SBATCH --time=00:60:00" >> cute_pyScenic_ctx.sbatch

!echo "#SBATCH --partition=atesting" >> cute_pyScenic_ctx.sbatch
!echo "#SBATCH --qos=normal" >> cute_pyScenic_ctx.sbatch

!echo "#SBATCH --mail-type=END" >> cute_pyScenic_ctx.sbatch
!echo "#SBATCH --mail-user=dyammons@colostate.edu" >> cute_pyScenic_ctx.sbatch
!echo "#SBATCH --output=pySCENIC_ctx-%j.log" >> cute_pyScenic_ctx.sbatch

!echo "" >> cute_pyScenic_ctx.sbatch

!echo "module purge" >> cute_pyScenic_ctx.sbatch

!echo "" >> cute_pyScenic_ctx.sbatch

!echo "singularity exec -B $PWD/../ ../software/aertslab-pyscenic-scanpy-0.12.1-1.9.1.sif pyscenic ctx \\" >> cute_pyScenic_ctx.sbatch
!echo "../output/scenic/adj_main.csv \\" >> cute_pyScenic_ctx.sbatch
!echo "./metaData/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather ./metaData/hg38_500bp_up_100bp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather \\" >> cute_pyScenic_ctx.sbatch
!echo "--annotations_fname ./metaData/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl \\" >> cute_pyScenic_ctx.sbatch
!echo "--expression_mtx_fname ../output/s3/main_branches_hu.loom \\" >> cute_pyScenic_ctx.sbatch
!echo "--output ../output/scenic/reg_main.csv \\" >> cute_pyScenic_ctx.sbatch
!echo "--mask_dropouts \\" >> cute_pyScenic_ctx.sbatch
!echo "--num_workers \$SLURM_NTASKS" >> cute_pyScenic_ctx.sbatch


In [10]:
#submit the job; took 8 min....
!sbatch cute_pyScenic_ctx.sbatch

Submitted batch job 4693046


In [11]:
!echo "#!/usr/bin/env bash" > cute_pyScenic_aucell.sbatch
!echo "#SBATCH --job-name=pySCENIC_auc" >> cute_pyScenic_aucell.sbatch

!echo "#SBATCH --nodes=1" >> cute_pyScenic_aucell.sbatch
!echo "#SBATCH --ntasks=8" >> cute_pyScenic_aucell.sbatch
!echo "#SBATCH --time=0:20:00" >> cute_pyScenic_aucell.sbatch

!echo "#SBATCH --partition=atesting" >> cute_pyScenic_aucell.sbatch
!echo "#SBATCH --qos=normal" >> cute_pyScenic_aucell.sbatch

!echo "#SBATCH --mail-type=END" >> cute_pyScenic_aucell.sbatch
!echo "#SBATCH --mail-user=dyammons@colostate.edu" >> cute_pyScenic_aucell.sbatch
!echo "#SBATCH --output=pySCENIC_auc-%j.log" >> cute_pyScenic_aucell.sbatch

!echo "" >> cute_pyScenic_aucell.sbatch
!echo "module purge" >> cute_pyScenic_aucell.sbatch

!echo "" >> cute_pyScenic_aucell.sbatch

!echo "singularity exec -B $PWD/../ ../software/aertslab-pyscenic-scanpy-0.12.1-1.9.1.sif pyscenic aucell \\" >> cute_pyScenic_aucell.sbatch
!echo "../output/s3/main_branches_hu.loom \\" >> cute_pyScenic_aucell.sbatch
!echo "../output/scenic/reg_main.csv \\" >> cute_pyScenic_aucell.sbatch
!echo "--output ../output/scenic/main_branches_hu_output.loom \\" >> cute_pyScenic_aucell.sbatch
!echo "--num_workers \$SLURM_NTASKS" >> cute_pyScenic_aucell.sbatch


In [12]:
#took 2 min....
!sbatch cute_pyScenic_aucell.sbatch

Submitted batch job 4693076


In [13]:
# collect SCENIC AUCell output
# lf = lp.connect("./output/scenic/dc_scenic_output_231023.loom", mode='r+', validate=False )
lf = lp.connect("../output/scenic/main_branches_hu_output.loom", mode='r+', validate=False )
auc_mtx = pd.DataFrame( lf.ca.RegulonsAUC, index=lf.ca.CellID)
lf.close()

In [14]:
auc_mtx

Unnamed: 0,AR(+),ARID3A(+),ATF1(+),ATF2(+),ATF3(+),ATF5(+),ATF6B(+),BACH1(+),BACH2(+),BCL11A(+),...,ZNF7(+),ZNF711(+),ZNF740(+),ZNF76(+),ZNF770(+),ZNF782(+),ZNF784(+),ZSCAN26(+),ZSCAN29(+),ZSCAN31(+)
BM154801_AAACCCATCTAGGCAT-1,0.006621,0.027321,0.031155,0.038173,0.052802,0.0,0.004978,0.014496,0.008326,0.016392,...,0.012539,0.001163,0.0,0.000000,0.017228,0.0,0.0,0.000000,0.0,0.0
BM154801_AAACGAAAGCAACAAT-1,0.000000,0.088594,0.002606,0.026246,0.035489,0.0,0.005923,0.028538,0.005845,0.034310,...,0.000000,0.014274,0.0,0.003655,0.014911,0.0,0.0,0.010163,0.0,0.0
BM154801_AAACGAAAGGATACGC-1,0.000000,0.042573,0.000882,0.027971,0.031466,0.0,0.000000,0.008295,0.000000,0.021502,...,0.000000,0.000000,0.0,0.000000,0.002330,0.0,0.0,0.000000,0.0,0.0
BM154801_AAACGAAGTAACATCC-1,0.000000,0.023873,0.000000,0.017823,0.028736,0.0,0.000000,0.004984,0.000000,0.021650,...,0.000000,0.000000,0.0,0.000000,0.007368,0.0,0.0,0.000000,0.0,0.0
BM154801_AAACGCTGTGTCCTAA-1,0.002621,0.028912,0.028348,0.022579,0.040733,0.0,0.008593,0.010825,0.003645,0.022512,...,0.032132,0.000000,0.0,0.000000,0.020097,0.0,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C34165634_TTTGATCGTGATTGGG-1,0.000000,0.000398,0.002245,0.014792,0.035632,0.0,0.001098,0.005747,0.003743,0.039002,...,0.000000,0.003769,0.0,0.000000,0.014345,0.0,0.0,0.000000,0.0,0.0
C34165634_TTTGGAGAGGACAAGA-1,0.000000,0.043236,0.000000,0.021953,0.028161,0.0,0.001682,0.018571,0.000000,0.032599,...,0.000000,0.000000,0.0,0.011241,0.010897,0.0,0.0,0.000000,0.0,0.0
C34165634_TTTGGAGAGGAGCAAA-1,0.000000,0.000000,0.000000,0.011509,0.057112,0.0,0.000918,0.011623,0.000000,0.040209,...,0.000000,0.000000,0.0,0.000000,0.012574,0.0,0.0,0.000000,0.0,0.0
C34165634_TTTGGAGCACTCTGCT-1,0.000000,0.000000,0.002366,0.009785,0.036494,0.0,0.000000,0.007634,0.000000,0.019015,...,0.000000,0.001484,0.0,0.000000,0.015160,0.0,0.0,0.000000,0.0,0.0


In [15]:
from pyscenic.rss import regulon_specificity_scores
from pyscenic.plotting import plot_rss
import matplotlib.pyplot as plt
# from adjustText import adjust_text
import seaborn as sns
from pyscenic.binarization import binarize

In [16]:
cellAnnot = pd.concat(
    [
        pd.DataFrame( adata.obs.index.tolist(), index=adata.obs['clusterID_integrated.harmony'].tolist() )
    ],
    axis=1
)
cellAnnot.columns = [
 'clusterID_integrated.harmony']


In [17]:
rss = regulon_specificity_scores(auc_mtx, adata.obs['clusterID_integrated.harmony'])
rss.head()

Unnamed: 0,AR(+),ARID3A(+),ATF1(+),ATF2(+),ATF3(+),ATF5(+),ATF6B(+),BACH1(+),BACH2(+),BCL11A(+),...,ZNF7(+),ZNF711(+),ZNF740(+),ZNF76(+),ZNF770(+),ZNF782(+),ZNF784(+),ZSCAN26(+),ZSCAN29(+),ZSCAN31(+)
5,0.257839,0.428414,0.42123,0.419456,0.301267,0.168672,0.346682,0.368792,0.292697,0.345173,...,0.223967,0.248321,0.175295,0.21034,0.372563,0.172208,0.167445,0.213046,0.226455,0.177055
9,0.175934,0.32622,0.234346,0.293745,0.313841,0.167965,0.262093,0.374885,0.199319,0.275929,...,0.197146,0.252547,0.16875,0.189158,0.292901,0.167445,0.167445,0.184404,0.183088,0.170953
16,0.191293,0.227096,0.21811,0.248959,0.27043,0.175061,0.259711,0.248438,0.231713,0.301974,...,0.240531,0.249848,0.169972,0.202371,0.264634,0.176072,0.167445,0.18043,0.180558,0.167445
13,0.21893,0.198797,0.229328,0.311802,0.294509,0.172197,0.285085,0.302182,0.254675,0.299471,...,0.194734,0.296216,0.176385,0.208832,0.296001,0.170217,0.167445,0.213251,0.191972,0.173867
10,0.27529,0.22157,0.262841,0.345872,0.377495,0.169682,0.317136,0.319606,0.357292,0.336701,...,0.269202,0.349812,0.200894,0.322542,0.370639,0.183552,0.170259,0.208201,0.224735,0.183183


In [41]:
rss = rss.rename(index={'0' : '17',
                        '1' : '30',
                        '2' : '3'})

In [18]:
pd.DataFrame.to_csv(rss, "../output/scenic/main_branches_hu_rss.csv")


```r
library(tidyverse)
library(ggrepel)
library(patchwork)

df <- read.csv("../output/scenic/main_branches_hu_rss.csv", row.names = 1)
colnames(df) <- gsub('\\.\\.\\.',"",colnames(df))
df.all <- t(df) %>% as.data.frame() %>% rownames_to_column() %>% na.omit()

labCut = 10
pi <- lapply(2:ncol(df.all), function(x){
    #prep data
    celltype <- colnames(df.all)[x]
    df <- df.all[ ,c("rowname",celltype)] 
    colnames(df) <- c("gene", "clus")
    df <- df %>% arrange(desc(clus))
    df[ ,"gene"] <- factor(df[ ,"gene"], levels = df[ ,"gene"])
    df <- df %>% mutate(colz = ifelse(row_number() < labCut+1, "#FF6961", "lightblue"),
                        labz = ifelse(row_number() < labCut+1, as.character(gene), NA))
    
    #plot the data
    p <- ggplot(df, aes(x = gene, y = clus, label = labz)) +
    geom_point(colour = df$colz, size = 2) + 
    geom_text_repel(
        force = 0.01,
        nudge_x = nrow(df)*0.5,
        direction = "y",
        seed = 42, 
        box.padding = 0.25,
        #hjust = 0,
        #segment.size = 0.2, 
        size = 4, 
        color = df$colz,
        max.iter = 100000000,
        max.overlaps = 10
    ) + 
    labs(
        title = celltype,
        x = "Regulon",
        y = "rss"
    ) + 
    theme_classic() + 
    theme(
        axis.ticks.x = element_blank(),
        axis.text.x = element_blank(),
        panel.border = element_blank(),
        plot.margin = unit(c(10, 10, 10, 10), "pt"),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()
    ) + 
    scale_y_continuous(
        breaks = function(y) {
            seq(floor(min(y, digits = 1)), 
            ceiling(max(y, digits = 1)), 
            by = 0.1)
        }
    ) + coord_cartesian(expand = TRUE, clip = "off")

    #return/save plot
    ggsave(paste0("../output/scenic/", gsub(" ", "_", celltype), "_rss.png"), height = 5, width = 3)
    return(p)
})


p <- Reduce( `+`, pi) + plot_layout(ncol = 6)
ggsave(plot = p, "../output/scenic/main_branches_rss.png", height = 4, width = 10)

```