mm10-based tutorial #25

JBreunig · 2022-08-22T18:49:41Z

Very excited to try this new methodology as a past fan of SCENIC and CisTopic.

I'm sure you have been busy getting the release and manuscript ready. When you have time might you please provide an mm10-based tutorial? (e.g. 10X mouse brain multiome or similar). Or would it be best to contact you by email to request the notebook from Fig. 5?

cbravo93 · 2022-08-22T19:50:36Z

Hi @JBreunig !

For Figure 5:

The pycisTopic analysis is already available here.
PycisTarget was run using the wrapper function:

# Load region binarized topics
import pickle
outDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycisTopic/'
infile = open(outDir+'topic_binarization/binarized_topic_region_otsu.pkl', 'rb')
binarized_topic_region = pickle.load(infile)
infile.close()
# Load DARs
import pickle
infile = open(outDir+'DARs/DARs.pkl', 'rb')
DARs_dict = pickle.load(infile)
infile.close()
# Format region sets
import re
import pyranges as pr
from pycistarget.utils import *
region_sets = {}
region_sets['Topics'] = {key: pr.PyRanges(region_names_to_coordinates(binarized_topic_region[key].index.tolist())) for key in binarized_topic_region.keys()}
region_sets['DARs'] = {re.sub('[^A-Za-z0-9]+', '_', key): pr.PyRanges(region_names_to_coordinates(DARs_dict[key].index.tolist())) for key in DARs_dict.keys()}

# Run pycistarget
# run_without_promoters = True, will run the methods in all regions + the region sets without promoters
import os
os.chdir('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/scenicplus/src/')
from scenicplus.wrappers.run_pycistarget import *
run_pycistarget(region_sets,
                 ctx_db_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/ctx_db/TEW_mouse_cortex.regions_vs_motifs.rankings.feather',
                 species = 'mus_musculus',
                 save_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycistarget_clustered_data_set_specific/',
                 dem_db_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/ctx_db/TEW_mouse_cortex.regions_vs_motifs.scores.feather',
                 run_without_promoters = True,
                 biomart_host = 'http://nov2020.archive.ensembl.org/',
                 promoter_space = 500,
                 ctx_auc_threshold = 0.005,
                 ctx_nes_threshold = 3.0,
                 ctx_rank_threshold = 0.05,
                 dem_log2fc_thr = 0.5,
                 dem_motif_hit_thr = 3.0,
                 dem_max_bg_regions = 500,
                 path_to_motif_annotations = '/staging/leuven/stg_00002/lcb/cbravo/cluster_motif_collection_V10/snapshots/motifs-v10-nr.mgi-m0.001-o0.0_clust.tsv',
                 annotation_version = 'v10nr_clust',
                 annotation = ['Direct_annot', 'Orthology_annot'],
                 n_cpu = 1,
                 _temp_dir = '/scratch/leuven/313/vsc31305/ray_spill')

SCENIC+ was run using the wrapper function

# Load functions
from scenicplus.scenicplus_class import SCENICPLUS, create_SCENICPLUS_object
from scenicplus.preprocessing.filtering import *

outDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycisTopic/'
# Load cisTopic object
import pickle
infile = open(outDir + 'cisTopicObject.pkl', 'rb')
cistopic_obj = pickle.load(infile)
infile.close()
# Load imputed accessibility
import pickle
infile = open(outDir + 'DARs/Imputed_accessibility.pkl', 'rb') #Here I am using pycisTopic gene activity matrix, but could be any :)
imputed_acc_obj = pickle.load(infile)
infile.close()
## RNA - Create Anndata
from loomxpy.loomxpy import SCopeLoom
from pycisTopic.loom import *
import itertools
import anndata
projDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/'
path_to_annotated_rna_loom = projDir + 'data/MO_GEX_seurat_Cortex.loom'
loom = SCopeLoom.read_loom(path_to_annotated_rna_loom)
cell_data = get_metadata(loom)
# Fix names
cell_data = cell_data.replace('TEW__c14e1d__Multiome_RNA_brain_10x_no_perm', '10x_no_perm')
cell_data = cell_data.replace('TEW__3cc0d9__bb22bc__Multiome_brain_TST_NP40_004', 'TST_NP40_004')
cell_data = cell_data.replace('TEW__75da5c__5b0f59__Multiome_brain_TST', 'TST')
cell_data = cell_data.replace('TEW__c3f7c1__1ac906__Multiome_brain_10xcomplex_UC', '10x_complex_UC')
cell_data = cell_data.replace('TEW__d112c8__547ada__Multiome_RNA_brain_10x_complex', '10x_complex')
cell_data['barcode'] = [x.split('___')[0] for x in cell_data.index.tolist()]
cell_data.index = cell_data['barcode'] + '___' + cell_data['sample_id']
expr_mat = loom.ex_mtx
expr_mat.index = cell_data.index
rna_anndata = anndata.AnnData(X=expr_mat)
rna_anndata.obs = cell_data

## Precomputed imputed data
import pickle
infile = open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycistarget_clustered_data_set_specific/menr_DT_nosimilarity.pkl', 'rb') 
menr = pickle.load(infile)
infile.close()

scplus_obj = create_SCENICPLUS_object(
        GEX_anndata = rna_anndata,
        cisTopic_obj = cistopic_obj,
        imputed_acc_obj = imputed_acc_obj,
        menr = menr,
        ACC_prefix = 'ACC_',
        GEX_prefix = 'GEX_',
        bc_transform_func = lambda x: x,
        normalize_imputed_acc = False)

filter_genes(scplus_obj, min_pct = 0.5)
filter_regions(scplus_obj, min_pct = 0.5)

# Save
import pickle
with open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/scenicplus_v10_direct_ortho/scplus_obj.pkl', 'wb') as f:
  pickle.dump(scplus_obj, f)

# For the downstream analyses
outDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/scenicplus_v10_direct_ortho/'
import pickle
infile = open(outDir+'scplus_obj.pkl', 'rb')
scplus_obj = pickle.load(infile)
infile.close()

from scenicplus.wrappers.run_scenicplus import *
run_scenicplus(scplus_obj,
    variable = ['ACC_consensus_cell_type'],
    species = 'mmusculus',
    assembly = 'mm10',
    tf_file = '/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_mm.txt',
    save_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/scenicplus_v10_direct_ortho/',
    biomart_host = 'http://nov2020.archive.ensembl.org/',
    upstream = [1000, 150000],
    downstream = [1000, 150000],
    region_ranking = None,
    gene_ranking = None,   
    calculate_TF_eGRN_correlation = False,
    calculate_DEGs_DARs = True,
    export_to_loom_file = True,
    export_to_UCSC_file = True,
    tree_structure = ('Mouse_cortex_TEW', 'SCENIC+'),
    path_bedToBigBed = '/data/leuven/software/biomed/haswell_centos7/2018a/software/Kent_tools/20190730-linux.x86_64/bin/',
    n_cpu = 20,
    _temp_dir = '/scratch/leuven/313/vsc31305/ray_spill'
    )

And this is it! You can find the mouse precomputed databases at https://resources.aertslab.org/cistarget/databases/mus_musculus/mm10/screen/mc_v10_clust/region_based/ . Motif annotations are available at: https://resources.aertslab.org/cistarget/motif2tf/ . So basically is using the wrapper functions but changing the genome to mm10.

Cheers!

Carmn

JBreunig · 2022-08-22T20:34:35Z

Brilliant...thanks!

cbravo93 closed this as completed Aug 22, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

mm10-based tutorial #25

mm10-based tutorial #25

JBreunig commented Aug 22, 2022

cbravo93 commented Aug 22, 2022 •

edited

Loading

JBreunig commented Aug 22, 2022

mm10-based tutorial #25

mm10-based tutorial #25

Comments

JBreunig commented Aug 22, 2022

cbravo93 commented Aug 22, 2022 • edited Loading

JBreunig commented Aug 22, 2022

cbravo93 commented Aug 22, 2022 •

edited

Loading