# The detailed parameters of pySCENIC

In [18]:
# import dependencies
import os,sys
import numpy as np
import pandas as pd
import scanpy as sc
import loompy as lp
import anndata as ad
from MulticoreTSNE import MulticoreTSNE as TSNE
import seaborn as sns
import matplotlib.pyplot as plt

In [38]:
# path to unfiltered loom file (this will be created in the optional steps below)
f_loom_path_unfilt = "hswound_pySCENIC.loom" # 

In [36]:
# adata from tajrctory step
adata=sc.read_h5ad('/Users/zhuliu/Desktop/scRNA_STseq/proj_10X_woundhealing/03_results/01-Seurat-PreAnalysis/02_Seurat_BatchCorrection/s2_Seurat_allSample_subclustering/pySCENIC/allNew_scanpy_subkeratins.h5ad')

In [37]:
adata

AnnData object with n_obs × n_vars = 27105 × 25778
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'Patient', 'Gender', 'Age', 'Condition', 'Doublet', 'percent.mt', 'percent.ribo', 'percent.hb', 'percent.malat1', 'S.Score', 'G2M.Score', 'Phase', 'CC.Difference', 'MainCellTypes', 'SubCellTypes', 'SCT_snn_res.0.8', 'seurat_clusters', 'SCT_snn_res.0.5', 'SCT_snn_res.1', 'CellTypes', 'mig_score1', 'monocle3_pseudotime', 'UMAP_1', 'UMAP_2'
    uns: 'CellTypes_colors', 'Condition_colors', 'seurat_clusters_colors'
    obsm: 'X_harmony', 'X_pca', 'X_umap'

In [42]:
row_attrs = { 
    "Gene": np.array(adata.var.index) ,
}
col_attrs = { 
    "CellID": np.array(adata.obs.index) ,
    "nGene": np.array( np.sum(adata.X.transpose()>0 , axis=0)).flatten() ,
    "nUMI": np.array( np.sum(adata.X.transpose() , axis=0)).flatten() ,
    "orig.ident": np.array(adata.obs['orig.ident']).flatten() ,
    "Condition": np.array(adata.obs['Condition']).flatten() ,
    "percent.mt": np.array(adata.obs['percent.mt']).flatten() ,
    "CellTypes": np.array(adata.obs['CellTypes']).flatten() ,
    
}

lp.create(f_loom_path_unfilt, adata.X.transpose(), row_attrs, col_attrs)

In [43]:
adata = sc.read_loom('hswound_pySCENIC.loom')

# Step 1. Phase Ia: GRN inference using the GRNBoost2 algorithm

In [2]:
!pyscenic grn -h

OMP: Info #270: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
usage: pyscenic grn [-h] [-o OUTPUT] [-t] [-m {genie3,grnboost2}]
                    [--seed SEED] [--num_workers NUM_WORKERS]
                    [--client_or_address CLIENT_OR_ADDRESS]
                    [--cell_id_attribute CELL_ID_ATTRIBUTE]
                    [--gene_attribute GENE_ATTRIBUTE] [--sparse]
                    expression_mtx_fname tfs_fname

positional arguments:
  expression_mtx_fname  The name of the file that contains the expression
                        matrix for the single cell experiment. Two file
                        formats are supported: csv (rows=cells x
                        columns=genes) or loom (rows=genes x columns=cells).
  tfs_fname             The name of the file that contains the list of
                        transcription factors (TXT; one TF per line).

optional arguments:
  -h, --help            show this help message and exit
  -o OUTPU

In [1]:
!pyscenic grn {mice_loom_file} {mice_tfs} -o mice_adj.csv -m grnboost2 --num_workers 8 

In [None]:
mice_adjacencies = pd.read_csv("mice_adj.tsv", index_col=False, sep='\t')
mice_adjacencies.head()

# Step 2-3. Regulon prediction aka cisTarget from CLI

In [3]:
!pyscenic ctx -h

OMP: Info #270: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
usage: pyscenic ctx [-h] [-o OUTPUT] [-n] [--chunk_size CHUNK_SIZE]
                    [--mode {custom_multiprocessing,dask_multiprocessing,dask_cluster}]
                    [-a] [-t] [--rank_threshold RANK_THRESHOLD]
                    [--auc_threshold AUC_THRESHOLD]
                    [--nes_threshold NES_THRESHOLD]
                    [--min_orthologous_identity MIN_ORTHOLOGOUS_IDENTITY]
                    [--max_similarity_fdr MAX_SIMILARITY_FDR]
                    --annotations_fname ANNOTATIONS_FNAME
                    [--num_workers NUM_WORKERS]
                    [--client_or_address CLIENT_OR_ADDRESS]
                    [--thresholds THRESHOLDS [THRESHOLDS ...]]
                    [--top_n_targets TOP_N_TARGETS [TOP_N_TARGETS ...]]
                    [--top_n_regulators TOP_N_REGULATORS [TOP_N_REGULATORS ...]]
                    [--min_genes MIN_GENES]
                 

In [1]:
import glob
# ranking databases
f_db_glob = "*feather"
f_db_names = ' '.join(glob.glob(f_db_glob))

# motif databases
f_motif_path = "motifs-v9-nr.mgi-m0.001-o0.0.tbl"

In [2]:
print(f_db_names)
print(f_motif_path)

mm10__refseq-r80__500bp_up_and_100bp_down_tss.mc9nr.feather mm10__refseq-r80__10kb_up_and_down_tss.mc9nr.feather
motifs-v9-nr.mgi-m0.001-o0.0.tbl


In [None]:
!pyscenic ctx \
mice_adj.tsv \
{f_db_names} \
--annotations_fname {f_motif_path} \
--expression_mtx_fname {mice_loom_file} \
--output mice_reg.csv \
--mask_dropouts \
--num_workers 8

# Step 4. Cellular enrichment (aka AUCell) from CLI

In [4]:
!pyscenic aucell -h

OMP: Info #270: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
usage: pyscenic aucell [-h] [-o OUTPUT] [-t] [-w] [--num_workers NUM_WORKERS]
                       [--seed SEED] [--rank_threshold RANK_THRESHOLD]
                       [--auc_threshold AUC_THRESHOLD]
                       [--nes_threshold NES_THRESHOLD]
                       [--cell_id_attribute CELL_ID_ATTRIBUTE]
                       [--gene_attribute GENE_ATTRIBUTE] [--sparse]
                       expression_mtx_fname signatures_fname

positional arguments:
  expression_mtx_fname  The name of the file that contains the expression
                        matrix for the single cell experiment. Two file
                        formats are supported: csv (rows=cells x
                        columns=genes) or loom (rows=genes x columns=cells).
  signatures_fname      The name of the file that contains the gene
                        signatures. Three file formats are supported: gmt

In [None]:
!pyscenic aucell \
{mice_loom_file} \
mice_reg.csv \
--output mice_SCENIC_AUC.loom \
--num_workers 8

# pySCENIC

In [6]:
!pyscenic

OMP: Info #270: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
usage: pyscenic [-h] {grn,add_cor,ctx,aucell} ...

Single-CEll regulatory Network Inference and Clustering (0.11.2)

positional arguments:
  {grn,add_cor,ctx,aucell}
                        sub-command help
    grn                 Derive co-expression modules from expression matrix.
    add_cor             [Optional] Add Pearson correlations based on TF-gene
                        expression to the network adjacencies output from the
                        GRN step, and output these to a new adjacencies file.
                        This will normally be done during the "ctx" step.
    ctx                 Find enriched motifs for a gene signature and
                        optionally prune targets from this signature based on
                        cis-regulatory cues.
    aucell              Quantify activity of gene signatures across single
                        cells.

optional arg

# pySCENIC add_cor

In [8]:
!pyscenic add_cor -h

OMP: Info #270: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
usage: pyscenic add_cor [-h] [-o OUTPUT] [-t]
                        [--cell_id_attribute CELL_ID_ATTRIBUTE]
                        [--gene_attribute GENE_ATTRIBUTE] [--sparse]
                        [--thresholds THRESHOLDS [THRESHOLDS ...]]
                        [--top_n_targets TOP_N_TARGETS [TOP_N_TARGETS ...]]
                        [--top_n_regulators TOP_N_REGULATORS [TOP_N_REGULATORS ...]]
                        [--min_genes MIN_GENES]
                        [--expression_mtx_fname EXPRESSION_MTX_FNAME]
                        [--mask_dropouts]
                        adjacencies expression_mtx_fname

positional arguments:
  adjacencies           The name of the file that contains the GRN adjacencies
                        (output from the GRN step).
  expression_mtx_fname  The name of the file that contains the expression
                        matrix for the single cell 