Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mm10-based tutorial #25

Closed
JBreunig opened this issue Aug 22, 2022 · 2 comments
Closed

mm10-based tutorial #25

JBreunig opened this issue Aug 22, 2022 · 2 comments

Comments

@JBreunig
Copy link

Very excited to try this new methodology as a past fan of SCENIC and CisTopic.

I'm sure you have been busy getting the release and manuscript ready. When you have time might you please provide an mm10-based tutorial? (e.g. 10X mouse brain multiome or similar). Or would it be best to contact you by email to request the notebook from Fig. 5?

@cbravo93
Copy link
Member

cbravo93 commented Aug 22, 2022

Hi @JBreunig !

For Figure 5:

  • The pycisTopic analysis is already available here.
  • PycisTarget was run using the wrapper function:
# Load region binarized topics
import pickle
outDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycisTopic/'
infile = open(outDir+'topic_binarization/binarized_topic_region_otsu.pkl', 'rb')
binarized_topic_region = pickle.load(infile)
infile.close()
# Load DARs
import pickle
infile = open(outDir+'DARs/DARs.pkl', 'rb')
DARs_dict = pickle.load(infile)
infile.close()
# Format region sets
import re
import pyranges as pr
from pycistarget.utils import *
region_sets = {}
region_sets['Topics'] = {key: pr.PyRanges(region_names_to_coordinates(binarized_topic_region[key].index.tolist())) for key in binarized_topic_region.keys()}
region_sets['DARs'] = {re.sub('[^A-Za-z0-9]+', '_', key): pr.PyRanges(region_names_to_coordinates(DARs_dict[key].index.tolist())) for key in DARs_dict.keys()}

# Run pycistarget
# run_without_promoters = True, will run the methods in all regions + the region sets without promoters
import os
os.chdir('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/scenicplus/src/')
from scenicplus.wrappers.run_pycistarget import *
run_pycistarget(region_sets,
                 ctx_db_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/ctx_db/TEW_mouse_cortex.regions_vs_motifs.rankings.feather',
                 species = 'mus_musculus',
                 save_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycistarget_clustered_data_set_specific/',
                 dem_db_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/ctx_db/TEW_mouse_cortex.regions_vs_motifs.scores.feather',
                 run_without_promoters = True,
                 biomart_host = 'http://nov2020.archive.ensembl.org/',
                 promoter_space = 500,
                 ctx_auc_threshold = 0.005,
                 ctx_nes_threshold = 3.0,
                 ctx_rank_threshold = 0.05,
                 dem_log2fc_thr = 0.5,
                 dem_motif_hit_thr = 3.0,
                 dem_max_bg_regions = 500,
                 path_to_motif_annotations = '/staging/leuven/stg_00002/lcb/cbravo/cluster_motif_collection_V10/snapshots/motifs-v10-nr.mgi-m0.001-o0.0_clust.tsv',
                 annotation_version = 'v10nr_clust',
                 annotation = ['Direct_annot', 'Orthology_annot'],
                 n_cpu = 1,
                 _temp_dir = '/scratch/leuven/313/vsc31305/ray_spill')
  • SCENIC+ was run using the wrapper function
# Load functions
from scenicplus.scenicplus_class import SCENICPLUS, create_SCENICPLUS_object
from scenicplus.preprocessing.filtering import *

outDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycisTopic/'
# Load cisTopic object
import pickle
infile = open(outDir + 'cisTopicObject.pkl', 'rb')
cistopic_obj = pickle.load(infile)
infile.close()
# Load imputed accessibility
import pickle
infile = open(outDir + 'DARs/Imputed_accessibility.pkl', 'rb') #Here I am using pycisTopic gene activity matrix, but could be any :)
imputed_acc_obj = pickle.load(infile)
infile.close()
## RNA - Create Anndata
from loomxpy.loomxpy import SCopeLoom
from pycisTopic.loom import *
import itertools
import anndata
projDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/'
path_to_annotated_rna_loom = projDir + 'data/MO_GEX_seurat_Cortex.loom'
loom = SCopeLoom.read_loom(path_to_annotated_rna_loom)
cell_data = get_metadata(loom)
# Fix names
cell_data = cell_data.replace('TEW__c14e1d__Multiome_RNA_brain_10x_no_perm', '10x_no_perm')
cell_data = cell_data.replace('TEW__3cc0d9__bb22bc__Multiome_brain_TST_NP40_004', 'TST_NP40_004')
cell_data = cell_data.replace('TEW__75da5c__5b0f59__Multiome_brain_TST', 'TST')
cell_data = cell_data.replace('TEW__c3f7c1__1ac906__Multiome_brain_10xcomplex_UC', '10x_complex_UC')
cell_data = cell_data.replace('TEW__d112c8__547ada__Multiome_RNA_brain_10x_complex', '10x_complex')
cell_data['barcode'] = [x.split('___')[0] for x in cell_data.index.tolist()]
cell_data.index = cell_data['barcode'] + '___' + cell_data['sample_id']
expr_mat = loom.ex_mtx
expr_mat.index = cell_data.index
rna_anndata = anndata.AnnData(X=expr_mat)
rna_anndata.obs = cell_data

## Precomputed imputed data
import pickle
infile = open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycistarget_clustered_data_set_specific/menr_DT_nosimilarity.pkl', 'rb') 
menr = pickle.load(infile)
infile.close()

scplus_obj = create_SCENICPLUS_object(
        GEX_anndata = rna_anndata,
        cisTopic_obj = cistopic_obj,
        imputed_acc_obj = imputed_acc_obj,
        menr = menr,
        ACC_prefix = 'ACC_',
        GEX_prefix = 'GEX_',
        bc_transform_func = lambda x: x,
        normalize_imputed_acc = False)

filter_genes(scplus_obj, min_pct = 0.5)
filter_regions(scplus_obj, min_pct = 0.5)

# Save
import pickle
with open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/scenicplus_v10_direct_ortho/scplus_obj.pkl', 'wb') as f:
  pickle.dump(scplus_obj, f)

# For the downstream analyses
outDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/scenicplus_v10_direct_ortho/'
import pickle
infile = open(outDir+'scplus_obj.pkl', 'rb')
scplus_obj = pickle.load(infile)
infile.close()

from scenicplus.wrappers.run_scenicplus import *
run_scenicplus(scplus_obj,
    variable = ['ACC_consensus_cell_type'],
    species = 'mmusculus',
    assembly = 'mm10',
    tf_file = '/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_mm.txt',
    save_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/scenicplus_v10_direct_ortho/',
    biomart_host = 'http://nov2020.archive.ensembl.org/',
    upstream = [1000, 150000],
    downstream = [1000, 150000],
    region_ranking = None,
    gene_ranking = None,   
    calculate_TF_eGRN_correlation = False,
    calculate_DEGs_DARs = True,
    export_to_loom_file = True,
    export_to_UCSC_file = True,
    tree_structure = ('Mouse_cortex_TEW', 'SCENIC+'),
    path_bedToBigBed = '/data/leuven/software/biomed/haswell_centos7/2018a/software/Kent_tools/20190730-linux.x86_64/bin/',
    n_cpu = 20,
    _temp_dir = '/scratch/leuven/313/vsc31305/ray_spill'
    )

And this is it! You can find the mouse precomputed databases at https://resources.aertslab.org/cistarget/databases/mus_musculus/mm10/screen/mc_v10_clust/region_based/ . Motif annotations are available at: https://resources.aertslab.org/cistarget/motif2tf/ . So basically is using the wrapper functions but changing the genome to mm10.

Cheers!

Carmn

@JBreunig
Copy link
Author

Brilliant...thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants