In [1]:
from scmmib.metrics import paired_graph_metrics,  mosaic_latent_metrics,\
    unpaired_latent_metrics
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
import os

  init_styles()
  init_styles()


#### 1. We first provide a demo for evaluate unpair scRNA and scATAC diagonal integration output.


In [2]:
def test_unpair():
    # 1. load the latent files and metadata files, all demo files are deposited in the github folder
    metadata = "../manuscript_figure_script_and_data/stage2_res/metadata/SHARE_RNA+ATAC_raw_metadata.csv.gz"
    meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')
    myfiles = ["../test/SHARE-multiome-raw-scRNA+scATAC-bindSC-ATAC-latent.csv.gz",
               "../test/SHARE-multiome-raw-scRNA+scATAC-bindSC-RNA-latent.csv.gz"]
    latent_atac = pd.read_csv(myfiles[0], index_col=0, header=0)
    latent_rna = pd.read_csv(myfiles[1], index_col=0, header=0)
    latent_atac_reindex=latent_atac.reindex(meta.index)
    latent_rna_reindex= latent_rna.reindex(meta.index)
    # 2. generate the ann data format and calculate the metrics
    adata_unpaired = sc.AnnData(latent_rna_reindex, obs=meta, dtype='float32')
    adata_unpaired.obsm['RNA'] = latent_rna_reindex
    adata_unpaired.obsm['ATAC'] = latent_atac_reindex
    out = unpaired_latent_metrics(adata_unpaired, method = "bindSC", cluster = 'louvain', batch = None, label = 'cell_type', mods = ["RNA","ATAC"], outf=None, embed_acc=True) # outf=None,return stdout, or return the path or "outf" param, for example, outf="./test.txt"
    # embed_acc determine wheter calculate the accuracy metrics for each embed in "mods" params
    return out

In [3]:
test_unpair()

Unnamed: 0,Output,nCell,ARI-RNA,AMI-RNA,graph_cLISI-RNA,ARI-ATAC,AMI-ATAC,graph_cLISI-ATAC,FOSCTTM,nearest_cell_barcode,nearest_cell_barcode-RNA,nearest_cell_barcode-ATAC,nearest_cell_celltype,nearest_cell_celltype-RNA,nearest_cell_celltype-ATAC
bindSC,Embedding,34774.0,0.150792,0.311001,0.909428,0.175393,0.406811,0.948815,0.159328,0.000532,0.000661,0.000403,0.32507,0.281561,0.36858


#### 2. Then we provide a demo for method with graph output rather than embedding.


In [8]:
def test_graph():
    # 1. load the graph files and metadata files, all demo files are deposited in the github folde
    metadata = "../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_p10_metadata.csv.gz"
    latent = pd.read_csv("../test/BMMC-CITE_seq-p10-CITE_seq-SeuratV4-multi-graph.csv.gz", index_col=0, header=0)
    meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')
    latent_reordered = latent.reindex(meta.index)
    
    # 2. generate the ann data format and calculate the metrics
    adata = sc.AnnData(latent_reordered, obs=meta, dtype='float32')
    method = "SeuratV4"
    adata.obsp[method] = latent_reordered
    out = paired_graph_metrics(adata, method = "SeuratV4", cluster = 'louvain', batch = 'batch', label = 'cell_type', outf=None)
    return out

In [9]:
test_graph()

         Falling back to preprocessing with `sc.pp.pca` and default params.


Unnamed: 0,Output,nCell,graph_connectivity,graph_connectivity.l1,graph_iLISI,ARI,ARI.l1,AMI,AMI.l1,graph_cLISI,graph_cLISI.l1
SeuratV4,Graph,9026.0,0.912629,0.959633,0.145544,0.406453,0.379547,0.680174,0.6496,0.987117,0.996


### 3. Finally we provide a demo for Seurat v5 bridge mosaic scRNA and ADT integration.


In [40]:
def test_mosaic():
    # 1. load Seurat v5 generated latent embeddings of paired, unpaired RNA and unpaired ADT, as well as metadata
    metadata = "../manuscript_figure_script_and_data/stage2_res/metadata/BMMC_RNA+ADT_s2d1_s3d6_metadata.csv.gz"

    myfiles = ["../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_adt_reduc_latent.csv.gz","../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_multi_lap_latent.csv.gz","../test/BMMC-CITE_seq-s2d1_s3d6-scRNA+ADT-SeuratV5_rna_reduc_latent.csv.gz"]
    meta = pd.read_csv(metadata, index_col='barcode', header=0, dtype='category')
    paired = "s3d6"
    unpaired = "s2d1"
    batch="batch"
    # 2. match paired and unpaired cell information in metadata, and input the latents and metadatas for metrics evaluation
    # pair_cells = meta[meta[batch] == paired].index
    # unpair_cells = meta[meta[batch] == unpaired].index
    metadatas = [meta[meta[batch] == paired],meta[meta[batch] == unpaired],meta[meta[batch] == unpaired]]
    latent_pair=pd.read_csv(myfiles[1], index_col=0, header=0)
    latent_rna = pd.read_csv(myfiles[2], index_col=0, header=0)
    latent_mod2 = pd.read_csv(myfiles[0], index_col=0, header=0)
    
    latents = [latent_pair, latent_rna, latent_mod2]
    out = mosaic_latent_metrics(latents=latents,metadatas=metadatas,paired="s2d1", unpaired="s3d6", mod2="adt", batch="batch",label="cell_type",latent_path=myfiles[0], method='SeuratV5', writef=False)
    # if writef=True, then two metrics files will be generated in same path of input embeddings.
    return out

In [42]:
mosaic_metrics_all = test_mosaic() 
# the output contains 3 metrics dataframe, including 1.pair metrics for all cells, 2. unpair metrics for unpair cells, and 3. pair metrics for CITE+RNA.

  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,
  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,


writing to ./text.txt


  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key=batch, group_key=cluster,
  Batch_ASW = scib.metrics.silhouette_batch(adata, batch_key='Site', group_key=cluster,


In [43]:
#demo unpair output.
mosaic_metrics_all[1]


Unnamed: 0,Output,nCell,FOSCTTM,nearest_cell_barcode,nearest_cell_barcode-RNA,nearest_cell_barcode-ADT,nearest_cell_celltype,nearest_cell_celltype-RNA,nearest_cell_celltype-ADT,nearest_cell_celltype.l1,nearest_cell_celltype.l1-RNA,nearest_cell_celltype.l1-ADT
SeuratV5,Embedding,11035.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
