# ScoMAP

In [2]:
# Load functions
from scenicplus.scenicplus_class import SCENICPLUS, create_SCENICPLUS_object
from scenicplus.preprocessing.filtering import *
outDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycisTopic/'

# Load cisTopic object
import pickle
infile = open(outDir + 'cisTopicObject.pkl', 'rb')
cistopic_obj = pickle.load(infile)
infile.close()
# Load imputed accessibility
import pickle
infile = open(outDir + 'DARs/Imputed_accessibility.pkl', 'rb') #Here I am using pycisTopic gene activity matrix, but could be any :)
imputed_acc_obj = pickle.load(infile)
infile.close()
## RNA - Create Anndata
from loomxpy.loomxpy import SCopeLoom
from pycisTopic.loom import *
import itertools
import anndata
projDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/'
path_to_annotated_rna_loom = projDir + 'data/MO_GEX_seurat_Cortex.loom'
loom = SCopeLoom.read_loom(path_to_annotated_rna_loom)
cell_data = get_metadata(loom)
# Fix names
cell_data = cell_data.replace('TEW__c14e1d__Multiome_RNA_brain_10x_no_perm', '10x_no_perm')
cell_data = cell_data.replace('TEW__3cc0d9__bb22bc__Multiome_brain_TST_NP40_004', 'TST_NP40_004')
cell_data = cell_data.replace('TEW__75da5c__5b0f59__Multiome_brain_TST', 'TST')
cell_data = cell_data.replace('TEW__c3f7c1__1ac906__Multiome_brain_10xcomplex_UC', '10x_complex_UC')
cell_data = cell_data.replace('TEW__d112c8__547ada__Multiome_RNA_brain_10x_complex', '10x_complex')
cell_data['barcode'] = [x.split('___')[0] for x in cell_data.index.tolist()]
cell_data.index = cell_data['barcode'] + '___' + cell_data['sample_id']
expr_mat = loom.ex_mtx
expr_mat.index = cell_data.index
rna_anndata = anndata.AnnData(X=expr_mat)
rna_anndata.obs = cell_data

import re
pair = pd.read_csv('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/ScoMAP_pairing.tsv', sep=' ')
patac = {re.sub('ATAC_', '', pair.iloc[i,0]): pair.iloc[i,2] for i in range(pair.shape[0])}
imputed_acc_obj.subset(cells=patac.keys())
cistopic_obj.subset(cells=patac.keys())
imputed_acc_obj.cell_names = [patac[x] for x in imputed_acc_obj.cell_names]
cistopic_obj.cell_names = [patac[x] for x in cistopic_obj.cell_names]
cistopic_obj.cell_data = cistopic_obj.cell_data.loc[patac.keys(),:]
cistopic_obj.cell_data.index = cistopic_obj.cell_names

prna = {re.sub('RNA_', '', pair.iloc[i,1]): pair.iloc[i,2] for i in range(pair.shape[0])}
cell_data = cell_data[rna_anndata.obs_names.isin(prna.keys())]
rna_anndata = rna_anndata[rna_anndata.obs_names.isin(prna.keys())]
rna_anndata.obs_names = [prna[x] for x in rna_anndata.obs_names]
cell_data.index = rna_anndata.obs_names

# Fix region data (bug in old pycistopic versions)
from pycisTopic.utils import region_names_to_coordinates
fragment_matrix = cistopic_obj.fragment_matrix
binary_matrix = cistopic_obj.binary_matrix
region_data = region_names_to_coordinates(cistopic_obj.region_names)
region_data['Width'] = abs(region_data.End -region_data.Start).astype(np.int32)
region_data['cisTopic_nr_frag'] = np.array(
fragment_matrix.sum(axis=1)).flatten()
region_data['cisTopic_log_nr_frag'] = np.log10(
region_data['cisTopic_nr_frag'])
region_data['cisTopic_nr_acc'] = np.array(
binary_matrix.sum(axis=1)).flatten()
region_data['cisTopic_log_nr_acc'] = np.log10(
region_data['cisTopic_nr_acc'])
cistopic_obj.region_data = region_data

## Precomputed imputed data
import pickle
infile = open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycistarget_clustered_data_set_specific_only_annotated_V2/menr.pkl', 'rb') 
menr = pickle.load(infile)
infile.close()

scplus_obj = create_SCENICPLUS_object(
        GEX_anndata = rna_anndata,
        cisTopic_obj = cistopic_obj,
        imputed_acc_obj = imputed_acc_obj,
        menr = menr,
        ACC_prefix = 'ACC_',
        GEX_prefix = 'GEX_',
        bc_transform_func = lambda x: x,
        normalize_imputed_acc = False)

filter_genes(scplus_obj, min_pct = 0.5)
filter_regions(scplus_obj, min_pct = 0.5)

# Save
import pickle
with open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/SCENIC/ScoMAP/scplus_obj.pkl', 'wb') as f:
  pickle.dump(scplus_obj, f)

# For the downstream analyses
outDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/SCENIC/ScoMAP/'
import pickle
infile = open(outDir+'scplus_obj.pkl', 'rb')
scplus_obj = pickle.load(infile)
infile.close()

from scenicplus.wrappers.run_scenicplus import *
run_scenicplus(scplus_obj,
    variable = ['ACC_consensus_cell_type'],
    species = 'mmusculus',
    assembly = 'mm10',
    tf_file = '/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_mm.txt',
    save_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/SCENIC/ScoMAP/',
    biomart_host = 'http://nov2020.archive.ensembl.org/',
    upstream = [1000, 150000],
    downstream = [1000, 150000],
    region_ranking = None,
    gene_ranking = None,   
    calculate_TF_eGRN_correlation = False,
    calculate_DEGs_DARs = True,
    export_to_loom_file = True,
    export_to_UCSC_file = True,
    tree_structure = ('Mouse_cortex_TEW', 'SCENIC+'),
    path_bedToBigBed = '/data/leuven/software/biomed/haswell_centos7/2018a/software/Kent_tools/20190730-linux.x86_64/bin/',
    n_cpu = 20,
    _temp_dir = '/scratch/leuven/313/vsc31305/ray_spill'
    )

In [None]:
#!/bin/bash

#SBATCH --partition=bigmem
#SBATCH --cluster wice
#SBATCH --account lp_wice_pilot
#SBATCH --mail-type=ALL
#SBATCH --mail-user=carmen.bravogonzalezblas@kuleuven.be
#SBATCH --ntasks=1

#SBATCH --cpus-per-task=20
#SBATCH --time=48:00:00
#SBATCH --mem=1000G

#SBATCH --job-name=ScoMAP
#SBATCH --output=ScoMAP.out
#SBATCH --error=ScoMAP.err

singularity exec -B /lustre1,/staging,/data,/vsc-hard-mounts,/scratch,/local_scratch /data/leuven/software/biomed/singularity_images/scenicplus/scenicplus.sif python ScoMAP.py

# SCENIC+

In [2]:
# Load functions
from scenicplus.scenicplus_class import SCENICPLUS, create_SCENICPLUS_object
from scenicplus.preprocessing.filtering import *

outDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycisTopic/'
# Load cisTopic object
import pickle
infile = open(outDir + 'cisTopicObject.pkl', 'rb')
cistopic_obj = pickle.load(infile)
infile.close()
# Load imputed accessibility
import pickle
infile = open(outDir + 'DARs/Imputed_accessibility.pkl', 'rb') #Here I am using pycisTopic gene activity matrix, but could be any :)
imputed_acc_obj = pickle.load(infile)
infile.close()
## RNA - Create Anndata
from loomxpy.loomxpy import SCopeLoom
from pycisTopic.loom import *
import itertools
import anndata
projDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/'
path_to_annotated_rna_loom = projDir + 'data/MO_GEX_seurat_Cortex.loom'
loom = SCopeLoom.read_loom(path_to_annotated_rna_loom)
cell_data = get_metadata(loom)
# Fix names
cell_data = cell_data.replace('TEW__c14e1d__Multiome_RNA_brain_10x_no_perm', '10x_no_perm')
cell_data = cell_data.replace('TEW__3cc0d9__bb22bc__Multiome_brain_TST_NP40_004', 'TST_NP40_004')
cell_data = cell_data.replace('TEW__75da5c__5b0f59__Multiome_brain_TST', 'TST')
cell_data = cell_data.replace('TEW__c3f7c1__1ac906__Multiome_brain_10xcomplex_UC', '10x_complex_UC')
cell_data = cell_data.replace('TEW__d112c8__547ada__Multiome_RNA_brain_10x_complex', '10x_complex')
cell_data['barcode'] = [x.split('___')[0] for x in cell_data.index.tolist()]
cell_data.index = cell_data['barcode'] + '___' + cell_data['sample_id']
expr_mat = loom.ex_mtx
expr_mat.index = cell_data.index
rna_anndata = anndata.AnnData(X=expr_mat)
rna_anndata.obs = cell_data

import re
pair = pd.read_csv('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/SCENIC+_pairing.tsv', sep=' ')
patac = {re.sub('ATAC_', '', pair.iloc[i,0]): pair.iloc[i,2] for i in range(pair.shape[0])}
imputed_acc_obj.subset(cells=patac.keys())
cistopic_obj.subset(cells=patac.keys())
imputed_acc_obj.cell_names = [patac[x] for x in imputed_acc_obj.cell_names]
cistopic_obj.cell_names = [patac[x] for x in cistopic_obj.cell_names]
cistopic_obj.cell_data = cistopic_obj.cell_data.loc[patac.keys(),:]
cistopic_obj.cell_data.index = cistopic_obj.cell_names

prna = {re.sub('RNA_', '', pair.iloc[i,1]): pair.iloc[i,2] for i in range(pair.shape[0])}
cell_data = cell_data[rna_anndata.obs_names.isin(prna.keys())]
rna_anndata = rna_anndata[rna_anndata.obs_names.isin(prna.keys())]
rna_anndata.obs_names = [prna[x] for x in rna_anndata.obs_names]
cell_data.index = rna_anndata.obs_names

# Fix region data (bug in old pycistopic versions)
from pycisTopic.utils import region_names_to_coordinates
fragment_matrix = cistopic_obj.fragment_matrix
binary_matrix = cistopic_obj.binary_matrix
region_data = region_names_to_coordinates(cistopic_obj.region_names)
region_data['Width'] = abs(region_data.End -region_data.Start).astype(np.int32)
region_data['cisTopic_nr_frag'] = np.array(
fragment_matrix.sum(axis=1)).flatten()
region_data['cisTopic_log_nr_frag'] = np.log10(
region_data['cisTopic_nr_frag'])
region_data['cisTopic_nr_acc'] = np.array(
binary_matrix.sum(axis=1)).flatten()
region_data['cisTopic_log_nr_acc'] = np.log10(
region_data['cisTopic_nr_acc'])
cistopic_obj.region_data = region_data

## Precomputed imputed data
import pickle
infile = open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycistarget_clustered_data_set_specific_only_annotated_V2/menr.pkl', 'rb') 
menr = pickle.load(infile)
infile.close()

scplus_obj = create_SCENICPLUS_object(
        GEX_anndata = rna_anndata,
        cisTopic_obj = cistopic_obj,
        imputed_acc_obj = imputed_acc_obj,
        menr = menr,
        ACC_prefix = 'ACC_',
        GEX_prefix = 'GEX_',
        bc_transform_func = lambda x: x,
        normalize_imputed_acc = False)

filter_genes(scplus_obj, min_pct = 0.5)
filter_regions(scplus_obj, min_pct = 0.5)

# Save
import pickle
with open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/SCENIC/SCENIC/scplus_obj.pkl', 'wb') as f:
  pickle.dump(scplus_obj, f)


from scenicplus.wrappers.run_scenicplus import *
run_scenicplus(scplus_obj,
    variable = ['ACC_consensus_cell_type'],
    species = 'mmusculus',
    assembly = 'mm10',
    tf_file = '/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_mm.txt',
    save_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/SCENIC/SCENIC/',
    biomart_host = 'http://nov2020.archive.ensembl.org/',
    upstream = [1000, 150000],
    downstream = [1000, 150000],
    region_ranking = None,
    gene_ranking = None,   
    calculate_TF_eGRN_correlation = False,
    calculate_DEGs_DARs = True,
    export_to_loom_file = True,
    export_to_UCSC_file = True,
    tree_structure = ('Mouse_cortex_TEW', 'SCENIC+'),
    path_bedToBigBed = '/data/leuven/software/biomed/haswell_centos7/2018a/software/Kent_tools/20190730-linux.x86_64/bin/',
    n_cpu = 20,
    _temp_dir = '/scratch/leuven/313/vsc31305/ray_spill'
    )

In [3]:
#!/bin/bash

#SBATCH --partition=bigmem
#SBATCH --cluster wice
#SBATCH --account lp_wice_pilot
#SBATCH --mail-type=ALL
#SBATCH --mail-user=carmen.bravogonzalezblas@kuleuven.be
#SBATCH --ntasks=1

#SBATCH --cpus-per-task=20
#SBATCH --time=48:00:00
#SBATCH --mem=1000G

#SBATCH --job-name=ScoMAP
#SBATCH --output=ScoMAP.out
#SBATCH --error=ScoMAP.err

singularity exec -B /lustre1,/staging,/data,/vsc-hard-mounts,/scratch,/local_scratch /data/leuven/software/biomed/singularity_images/scenicplus/scenicplus.sif python SCENIC.py

MemoryError: 

# FigR

In [2]:
# Load functions
from scenicplus.scenicplus_class import SCENICPLUS, create_SCENICPLUS_object
from scenicplus.preprocessing.filtering import *

outDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycisTopic/'
# Load cisTopic object
import pickle
infile = open(outDir + 'cisTopicObject.pkl', 'rb')
cistopic_obj = pickle.load(infile)
infile.close()
# Load imputed accessibility
import pickle
infile = open(outDir + 'DARs/Imputed_accessibility.pkl', 'rb') #Here I am using pycisTopic gene activity matrix, but could be any :)
imputed_acc_obj = pickle.load(infile)
infile.close()
## RNA - Create Anndata
from loomxpy.loomxpy import SCopeLoom
from pycisTopic.loom import *
import itertools
import anndata
projDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/'
path_to_annotated_rna_loom = projDir + 'data/MO_GEX_seurat_Cortex.loom'
loom = SCopeLoom.read_loom(path_to_annotated_rna_loom)
cell_data = get_metadata(loom)
# Fix names
cell_data = cell_data.replace('TEW__c14e1d__Multiome_RNA_brain_10x_no_perm', '10x_no_perm')
cell_data = cell_data.replace('TEW__3cc0d9__bb22bc__Multiome_brain_TST_NP40_004', 'TST_NP40_004')
cell_data = cell_data.replace('TEW__75da5c__5b0f59__Multiome_brain_TST', 'TST')
cell_data = cell_data.replace('TEW__c3f7c1__1ac906__Multiome_brain_10xcomplex_UC', '10x_complex_UC')
cell_data = cell_data.replace('TEW__d112c8__547ada__Multiome_RNA_brain_10x_complex', '10x_complex')
cell_data['barcode'] = [x.split('___')[0] for x in cell_data.index.tolist()]
cell_data.index = cell_data['barcode'] + '___' + cell_data['sample_id']
expr_mat = loom.ex_mtx
expr_mat.index = cell_data.index
rna_anndata = anndata.AnnData(X=expr_mat)
rna_anndata.obs = cell_data

import re
pair = pd.read_csv('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/figr_pairing.tsv', sep=' ')
patac = {re.sub('ATAC_', '', pair.iloc[i,0]): pair.iloc[i,2] for i in range(pair.shape[0])}
imputed_acc_obj.subset(cells=patac.keys())
cistopic_obj.subset(cells=patac.keys())
imputed_acc_obj.cell_names = [patac[x] for x in imputed_acc_obj.cell_names]
cistopic_obj.cell_names = [patac[x] for x in cistopic_obj.cell_names]
cistopic_obj.cell_data = cistopic_obj.cell_data.loc[patac.keys(),:]
cistopic_obj.cell_data.index = cistopic_obj.cell_names

prna = {re.sub('RNA_', '', pair.iloc[i,1]): pair.iloc[i,2] for i in range(pair.shape[0])}
cell_data = cell_data[rna_anndata.obs_names.isin(prna.keys())]
rna_anndata = rna_anndata[rna_anndata.obs_names.isin(prna.keys())]
rna_anndata.obs_names = [prna[x] for x in rna_anndata.obs_names]
cell_data.index = rna_anndata.obs_names

# Fix region data (bug in old pycistopic versions)
from pycisTopic.utils import region_names_to_coordinates
fragment_matrix = cistopic_obj.fragment_matrix
binary_matrix = cistopic_obj.binary_matrix
region_data = region_names_to_coordinates(cistopic_obj.region_names)
region_data['Width'] = abs(region_data.End -region_data.Start).astype(np.int32)
region_data['cisTopic_nr_frag'] = np.array(
fragment_matrix.sum(axis=1)).flatten()
region_data['cisTopic_log_nr_frag'] = np.log10(
region_data['cisTopic_nr_frag'])
region_data['cisTopic_nr_acc'] = np.array(
binary_matrix.sum(axis=1)).flatten()
region_data['cisTopic_log_nr_acc'] = np.log10(
region_data['cisTopic_nr_acc'])
cistopic_obj.region_data = region_data

## Precomputed imputed data
import pickle
infile = open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycistarget_clustered_data_set_specific_only_annotated_V2/menr.pkl', 'rb') 
menr = pickle.load(infile)
infile.close()

scplus_obj = create_SCENICPLUS_object(
        GEX_anndata = rna_anndata,
        cisTopic_obj = cistopic_obj,
        imputed_acc_obj = imputed_acc_obj,
        menr = menr,
        ACC_prefix = 'ACC_',
        GEX_prefix = 'GEX_',
        bc_transform_func = lambda x: x,
        normalize_imputed_acc = False)

filter_genes(scplus_obj, min_pct = 0.5)
filter_regions(scplus_obj, min_pct = 0.5)

# Save
import pickle
with open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/SCENIC/FigR/scplus_obj.pkl', 'wb') as f:
  pickle.dump(scplus_obj, f)


from scenicplus.wrappers.run_scenicplus import *
run_scenicplus(scplus_obj,
    variable = ['ACC_consensus_cell_type'],
    species = 'mmusculus',
    assembly = 'mm10',
    tf_file = '/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_mm.txt',
    save_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/SCENIC/FigR/',
    biomart_host = 'http://nov2020.archive.ensembl.org/',
    upstream = [1000, 150000],
    downstream = [1000, 150000],
    region_ranking = None,
    gene_ranking = None,   
    calculate_TF_eGRN_correlation = False,
    calculate_DEGs_DARs = True,
    export_to_loom_file = True,
    export_to_UCSC_file = True,
    tree_structure = ('Mouse_cortex_TEW', 'SCENIC+'),
    path_bedToBigBed = '/data/leuven/software/biomed/haswell_centos7/2018a/software/Kent_tools/20190730-linux.x86_64/bin/',
    n_cpu = 20,
    _temp_dir = '/scratch/leuven/313/vsc31305/ray_spill'
    )

In [3]:
#!/bin/bash

#SBATCH --partition=bigmem
#SBATCH --cluster wice
#SBATCH --account lp_wice_pilot
#SBATCH --mail-type=ALL
#SBATCH --mail-user=carmen.bravogonzalezblas@kuleuven.be
#SBATCH --ntasks=1

#SBATCH --cpus-per-task=20
#SBATCH --time=48:00:00
#SBATCH --mem=1000G

#SBATCH --job-name=FigR
#SBATCH --output=FigR.out
#SBATCH --error=FigR.err

singularity exec -B /lustre1,/staging,/data,/vsc-hard-mounts,/scratch,/local_scratch /data/leuven/software/biomed/singularity_images/scenicplus/scenicplus.sif python FigR.py

MemoryError: 

# Random

In [2]:
# Load functions
from scenicplus.scenicplus_class import SCENICPLUS, create_SCENICPLUS_object
from scenicplus.preprocessing.filtering import *

outDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycisTopic/'
# Load cisTopic object
import pickle
infile = open(outDir + 'cisTopicObject.pkl', 'rb')
cistopic_obj = pickle.load(infile)
infile.close()
# Load imputed accessibility
import pickle
infile = open(outDir + 'DARs/Imputed_accessibility.pkl', 'rb') #Here I am using pycisTopic gene activity matrix, but could be any :)
imputed_acc_obj = pickle.load(infile)
infile.close()
## RNA - Create Anndata
from loomxpy.loomxpy import SCopeLoom
from pycisTopic.loom import *
import itertools
import anndata
projDir = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/'
path_to_annotated_rna_loom = projDir + 'data/MO_GEX_seurat_Cortex.loom'
loom = SCopeLoom.read_loom(path_to_annotated_rna_loom)
cell_data = get_metadata(loom)
# Fix names
cell_data = cell_data.replace('TEW__c14e1d__Multiome_RNA_brain_10x_no_perm', '10x_no_perm')
cell_data = cell_data.replace('TEW__3cc0d9__bb22bc__Multiome_brain_TST_NP40_004', 'TST_NP40_004')
cell_data = cell_data.replace('TEW__75da5c__5b0f59__Multiome_brain_TST', 'TST')
cell_data = cell_data.replace('TEW__c3f7c1__1ac906__Multiome_brain_10xcomplex_UC', '10x_complex_UC')
cell_data = cell_data.replace('TEW__d112c8__547ada__Multiome_RNA_brain_10x_complex', '10x_complex')
cell_data['barcode'] = [x.split('___')[0] for x in cell_data.index.tolist()]
cell_data.index = cell_data['barcode'] + '___' + cell_data['sample_id']
expr_mat = loom.ex_mtx
expr_mat.index = cell_data.index
rna_anndata = anndata.AnnData(X=expr_mat)
rna_anndata.obs = cell_data

import re
pair = pd.read_csv('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/random_pairing.tsv', sep=' ')
patac = {re.sub('ATAC_', '', pair.iloc[i,0]): pair.iloc[i,2] for i in range(pair.shape[0])}
imputed_acc_obj.subset(cells=patac.keys())
cistopic_obj.subset(cells=patac.keys())
imputed_acc_obj.cell_names = [patac[x] for x in imputed_acc_obj.cell_names]
cistopic_obj.cell_names = [patac[x] for x in cistopic_obj.cell_names]
cistopic_obj.cell_data = cistopic_obj.cell_data.loc[patac.keys(),:]
cistopic_obj.cell_data.index = cistopic_obj.cell_names

prna = {re.sub('RNA_', '', pair.iloc[i,1]): pair.iloc[i,2] for i in range(pair.shape[0])}
cell_data = cell_data[rna_anndata.obs_names.isin(prna.keys())]
rna_anndata = rna_anndata[rna_anndata.obs_names.isin(prna.keys())]
rna_anndata.obs_names = [prna[x] for x in rna_anndata.obs_names]
cell_data.index = rna_anndata.obs_names

# Fix region data (bug in old pycistopic versions)
from pycisTopic.utils import region_names_to_coordinates
fragment_matrix = cistopic_obj.fragment_matrix
binary_matrix = cistopic_obj.binary_matrix
region_data = region_names_to_coordinates(cistopic_obj.region_names)
region_data['Width'] = abs(region_data.End -region_data.Start).astype(np.int32)
region_data['cisTopic_nr_frag'] = np.array(
fragment_matrix.sum(axis=1)).flatten()
region_data['cisTopic_log_nr_frag'] = np.log10(
region_data['cisTopic_nr_frag'])
region_data['cisTopic_nr_acc'] = np.array(
binary_matrix.sum(axis=1)).flatten()
region_data['cisTopic_log_nr_acc'] = np.log10(
region_data['cisTopic_nr_acc'])
cistopic_obj.region_data = region_data

## Precomputed imputed data
import pickle
infile = open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/TEW_cortex/pycistarget_clustered_data_set_specific_only_annotated_V2/menr.pkl', 'rb') 
menr = pickle.load(infile)
infile.close()

scplus_obj = create_SCENICPLUS_object(
        GEX_anndata = rna_anndata,
        cisTopic_obj = cistopic_obj,
        imputed_acc_obj = imputed_acc_obj,
        menr = menr,
        ACC_prefix = 'ACC_',
        GEX_prefix = 'GEX_',
        bc_transform_func = lambda x: x,
        normalize_imputed_acc = False)

filter_genes(scplus_obj, min_pct = 0.5)
filter_regions(scplus_obj, min_pct = 0.5)

# Save
import pickle
with open('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/SCENIC/Random/scplus_obj.pkl', 'wb') as f:
  pickle.dump(scplus_obj, f)


from scenicplus.wrappers.run_scenicplus import *
run_scenicplus(scplus_obj,
    variable = ['ACC_consensus_cell_type'],
    species = 'mmusculus',
    assembly = 'mm10',
    tf_file = '/staging/leuven/stg_00002/lcb/cflerin/resources/allTFs_mm.txt',
    save_path = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/10x_multiome_mouse_cortex/pair_benchmark/SCENIC/Random/',
    biomart_host = 'http://nov2020.archive.ensembl.org/',
    upstream = [1000, 150000],
    downstream = [1000, 150000],
    region_ranking = None,
    gene_ranking = None,   
    calculate_TF_eGRN_correlation = False,
    calculate_DEGs_DARs = True,
    export_to_loom_file = True,
    export_to_UCSC_file = True,
    tree_structure = ('Mouse_cortex_TEW', 'SCENIC+'),
    path_bedToBigBed = '/data/leuven/software/biomed/haswell_centos7/2018a/software/Kent_tools/20190730-linux.x86_64/bin/',
    n_cpu = 20,
    _temp_dir = '/scratch/leuven/313/vsc31305/ray_spill'
    )

In [3]:
#!/bin/bash

#SBATCH --partition=bigmem
#SBATCH --cluster wice
#SBATCH --account lp_wice_pilot
#SBATCH --mail-type=ALL
#SBATCH --mail-user=carmen.bravogonzalezblas@kuleuven.be
#SBATCH --ntasks=1

#SBATCH --cpus-per-task=20
#SBATCH --time=48:00:00
#SBATCH --mem=1000G

#SBATCH --job-name=Random
#SBATCH --output=Random.out
#SBATCH --error=Random.err

singularity exec -B /lustre1,/staging,/data,/vsc-hard-mounts,/scratch,/local_scratch /data/leuven/software/biomed/singularity_images/scenicplus/scenicplus.sif python Random.py

MemoryError: 