In [2]:
import os
import pandas as pd
import scipy
from multimeasure import MultiAnnData

**Option 1** Read the data from .txt files (Remember to change files path according to your set-up)

In [11]:
file_dir = "../../files"

RNA_matrix = os.path.join(file_dir, "GSM3271040_RNA_sciCAR_A549_gene_count.txt.gz")
RNA_cell = os.path.join(file_dir, "GSM3271040_RNA_sciCAR_A549_cell.txt.gz") #obs
RNA_gene = os.path.join(file_dir, "GSM3271040_RNA_sciCAR_A549_gene.txt.gz") #var

ATAC_matrix = os.path.join(file_dir, "GSM3271041_ATAC_sciCAR_A549_peak_count.txt.gz")
ATAC_cell = os.path.join(file_dir, "GSM3271041_ATAC_sciCAR_A549_cell.txt.gz") #obs
ATAC_peak = os.path.join(file_dir, "GSM3271041_ATAC_sciCAR_A549_peak.txt.gz") #var

m = MultiAnnData()
m.add_modality("RNA", RNA_matrix, file_obs=RNA_cell, file_var=RNA_gene, transpose_x=True)
m.measures

{'RNA': AnnData object with n_obs × n_vars = 6093 × 113153 
     obs: 'sample', 'cell_name', 'experiment', 'treatment_time'
     var: 'gene_id', 'gene_type', 'gene_short_name'}

In [12]:
m.add_modality("ATAC", ATAC_matrix, file_obs=ATAC_cell, file_var=ATAC_peak, transpose_x=True)
m.measures

  if self.run_code(code, result):


{'ATAC': AnnData object with n_obs × n_vars = 6085 × 189603 
     obs: 'sample', 'source', 'group', 'experiment'
     var: 'id', 'peak', 'chr', 'start', 'end',
 'RNA': AnnData object with n_obs × n_vars = 6093 × 113153 
     obs: 'sample', 'cell_name', 'experiment', 'treatment_time'
     var: 'gene_id', 'gene_type', 'gene_short_name'}

In [None]:
#if you need just AnnData try extracting it like this:
rna = m.measures["RNA"]
atac = m.measures["ATAC"]

**Option 2** If you want to read data from .h5ad files # much quicker! :)

In [13]:
file_dir = "../.."

RNA_obj = os.path.join(file_dir, "sciCAR_mouse_rna_scanpy.h5ad")
ATAC_obj = os.path.join(file_dir, "sciCAR_mouse_atac_scanpy.h5ad")

import scanpy as sc

rna = sc.read(RNA_obj) 
atac = sc.read(ATAC_obj)
# both of those are AnnData objects you can work on

In [15]:
# to create MultiAnnData object run
m = MultiAnnData([rna, atac],["RNA", "ATAC"])
m.measures

{'ATAC': AnnData object with n_obs × n_vars = 13395 × 252741 
     obs: 'replicate', 'louvain'
     var: 'peak', 'chr', 'start', 'end'
     uns: 'louvain', 'neighbors'
     obsm: 'X_pca', 'X_umap',
 'RNA': AnnData object with n_obs × n_vars = 13893 × 12 
     obs: 'source', 'replicate', 'experiment', 'tsne_1', 'tsne_2', 'cell_name', 'louvain'
     var: 'gene_type', 'gene_short_name', 'means', 'dispersions', 'dispersions_norm'
     uns: 'cell_name_colors', 'louvain', 'louvain_sizes', 'neighbors', 'paga'
     obsm: 'X_umap'}