# pySCENIC gene regulatory network analysis of pre-processed data

[GitHub](https://github.com/aertslab/pySCENIC?tab=readme-ov-file#id16)  
[Tutorial](https://pyscenic.readthedocs.io/en/latest/tutorial.html)  
[Worflow publication](https://www.nature.com/articles/s41596-020-0336-2)  
[Data base](https://resources.aertslab.org/cistarget/databases/)  

[About group comparisions](https://github.com/aertslab/pySCENIC/issues/292)

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from pyscenic.rss import regulon_specificity_scores

import scanpy as sc
import loompy as lp

import pandas as pd
import numpy as np

import glob

import os

In [6]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

In [7]:
sc.set_figure_params(figsize=(7, 7), transparent=False)

In [8]:
# Warnings Python 
import warnings
warnings.filterwarnings('ignore')

In [9]:
# rpy2 
os.environ['R_HOME'] = '/nobackup/peer/fdeckert/miniconda3/envs/r.4.1.0/lib/R'

In [10]:
# Plotting 
import rpy2.robjects as robjects
color_load = robjects.r.source('plotting_global.R')
color = dict()
for i in range(len(color_load[0])):
    color[color_load[0].names[i]] = {key : color_load[0][i].rx2(key)[0] for key in color_load[0][i].names}

In [11]:
def set_color(adata, categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]
    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)

# Load pre-processed AnnData object

In [12]:
adata = sc.read_h5ad("data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/pp.h5ad")

In [13]:
# Subset genes 
adata = adata[:, (adata.X>=3).sum(axis=0)>=1].copy()

# Export loom file

In [14]:
# Loom 
loom_file = "/research/peer/fdeckert/FD20200109SPLENO/data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/pp.loom"

In [15]:
row_attrs = {
    
    "Gene": np.array(adata.var_names),
}
col_attrs = {
    
    "CellID": np.array(adata.obs_names),
    "nGene": np.array(np.sum(adata.X.transpose()>0, axis=0)).flatten(),
    "nUMI": np.array(np.sum(adata.X.transpose(), axis=0)).flatten(),
    
}

lp.create(loom_file, adata.X.transpose(), row_attrs, col_attrs)

# pySCENIC workflow 

In [16]:
# Scenic data base files 
tf_file = "/nobackup/peer/fdeckert/scenic/mm10/mm_mgi_tfs.txt"
db_file = ' '.join(glob.glob("/nobackup/peer/fdeckert/scenic/mm10/*feather"))
motif_file = "/nobackup/peer/fdeckert/scenic/mm10/motifs-v10nr_clust-nr.mgi-m0.001-o0.0.tbl"

# Result  
result_dir = "/research/peer/fdeckert/FD20200109SPLENO/result/scenic/scRNAseq/"
result_loom = "pyscenic.loom"

# Parallel 
num_workers = 32

In [17]:
os.chdir(result_dir)

In [18]:
!/nobackup/peer/fdeckert/miniconda3/envs/pyscenic/bin/arboreto_with_multiprocessing.py {loom_file} {tf_file} \
    --method grnboost2 \
    --output adj.csv \
    --num_workers {num_workers}

In [19]:
!/nobackup/peer/fdeckert/miniconda3/envs/pyscenic/bin/pyscenic ctx adj.csv {db_file} \
    --annotations_fname {motif_file} \
    --expression_mtx_fname {loom_file} \
    --output reg.csv \
    --mask_dropouts \
    --num_workers {num_workers}

In [20]:
!/nobackup/peer/fdeckert/miniconda3/envs/pyscenic/bin/pyscenic aucell \
    {loom_file} \
    reg.csv \
    --output {result_loom} \
    --num_workers {num_workers}

# Regulon score (AUC)

In [21]:
lf = lp.connect(result_loom, mode='r+', validate=False)
auc_mtx = pd.DataFrame(lf.ca.RegulonsAUC, index=lf.ca.CellID)
auc_mtx.to_csv('auc_mtx.csv')
lf.close()

# Regulon specific score (RSS)

In [35]:
rss_mtx = regulon_specificity_scores(auc_mtx[adata.obs.infection=="Baseline"], adata[adata.obs.infection=="Baseline"].obs['leiden_cell_type_main'])
rss_mtx.to_csv('rss_baseline_mtx.csv')

In [36]:
rss_mtx = regulon_specificity_scores(auc_mtx[adata.obs.infection=="CpG"], adata[adata.obs.infection=="CpG"].obs['leiden_cell_type_main'])
rss_mtx.to_csv('rss_cpg_mtx.csv')