# pySCENIC gene regulatory network analysis of pre-processed data

[GitHub](https://github.com/aertslab/pySCENIC?tab=readme-ov-file#id16)  
[Tutorial](https://pyscenic.readthedocs.io/en/latest/tutorial.html)  
[Worflow publication](https://www.nature.com/articles/s41596-020-0336-2)  
[Data base](https://resources.aertslab.org/cistarget/databases/)  

[About group comparisions](https://github.com/aertslab/pySCENIC/issues/292)

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from pyscenic.rss import regulon_specificity_scores
from pyscenic.plotting import plot_rss

from arboreto.algo import grnboost2

import scanpy as sc
import loompy as lp

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

import glob

import os

In [None]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

In [None]:
# rpy2 
os.environ['R_HOME'] = '/nobackup/peer/fdeckert/miniconda3/envs/r.4.4.1-FD20200109SPLENO/lib/R'

## Plotting 

In [None]:
sc.settings.vector_friendly = False

sc.set_figure_params(figsize=(5, 5), dpi_save=1200, fontsize=12, frameon=False, facecolor="white")
sc.settings.figdir = 'result/figures/'

In [None]:
sns.set()
sns.set(style='whitegrid', font_scale=0.8)

In [None]:
plt.rcParams.update(
    
    {
    
        'font.size': 12,          # Default font size
        'axes.titlesize': 12,     # Title font size
        'axes.labelsize': 12,     # X and Y label font size
        'xtick.labelsize': 10,    # X tick label font size
        'ytick.labelsize': 10,    # Y tick label font size
        'legend.fontsize': 12     # Legend font size

    }
        )

# Create AnnData from bulk RNAseq experiment

In [None]:
adata = sc.AnnData(
    
    pd.read_csv('data/bulkRNAseq/object/cnt.csv', index_col=0).drop(['mgi_symbol', 'ensembl_gene_id'], axis=1).transpose(), 
    obs=pd.read_csv('data/bulkRNAseq/object/meta.csv', index_col=0)
    
)

# Subset 

In [None]:
adata = adata[:, (adata.X>=3).sum(axis=0)>=3]

In [None]:
adata = adata.copy()

# Export loom file

In [None]:
row_attrs = {
    
    "Gene": np.array(adata.var_names),
}

col_attrs = {
    
    "CellID": np.array(adata.obs_names),
    "nGene": np.array(np.sum(adata.X.transpose()>0, axis=0)).flatten(),
    "nUMI": np.array(np.sum(adata.X.transpose(), axis=0)).flatten(),
    
}

lp.create("data/bulkRNAseq/scenic/adata.loom", adata.X.transpose(), row_attrs, col_attrs)

# pySCENIC workflow 

In [None]:
# Scenic data base files 
tf_file = "/nobackup/peer/fdeckert/scenic/mm10/mm_mgi_tfs.txt"
db_file = ' '.join(glob.glob("/nobackup/peer/fdeckert/scenic/mm10/*feather"))
motif_file = "/nobackup/peer/fdeckert/scenic/mm10/motifs-v10nr_clust-nr.mgi-m0.001-o0.0.tbl"

# Loom 
input_loom = "/research/peer/fdeckert/FD20200109SPLENO/data/bulkRNAseq/scenic/adata.loom"

# Result  
result_dir = "/research/peer/fdeckert/FD20200109SPLENO/result/scenic/bulkRNAseq"
result_loom = "pyscenic.loom"

# Available workers 
num_workers = os.cpu_count()

In [None]:
os.chdir(result_dir)

In [None]:
# !/nobackup/peer/fdeckert/miniconda3/envs/pyscenic.0.12.1/bin/pyscenic grn {input_loom} {tf_file} \
#     --method grnboost2 \
#     --output adj.csv \
#     --num_workers {num_workers}

In [None]:
# !/nobackup/peer/fdeckert/miniconda3/envs/pyscenic.0.12.1/bin/pyscenic ctx adj.csv {db_file} \
#     --annotations_fname {motif_file} \
#     --expression_mtx_fname {input_loom} \
#     --output reg.csv \
#     --mask_dropouts \
#     --num_workers {num_workers}

In [None]:
# !/nobackup/peer/fdeckert/miniconda3/envs/pyscenic.0.12.1/bin/pyscenic aucell \
#     {input_loom} \
#     reg.csv \
#     --output {result_loom} \
#     --num_workers {num_workers}

In [None]:
lf = lp.connect(result_loom, mode='r+', validate=False)
auc_mtx = pd.DataFrame(lf.ca.RegulonsAUC, index=lf.ca.CellID)
adata.obsm['X_regulon'] = auc_mtx
lf.close()

In [None]:
sc.pp.neighbors(adata, use_rep='X_regulon')
sc.tl.umap(adata)
sc.pl.umap(adata, color=['celltype'], size=100)