In [3]:
adata_unintegrated = ""
integrated_adata = {}
tools = []
data_dir = ""
batch = ""
label = ""

Author: Erno Hänninen

Created: 24.01.2023

Title: integration_result.ipynb

Description:
- Notebook in where the result of benchmarked integration methods is visualized. Computes scib-metrics and visualizes the integrated data using PAGA graph and Force-directed graph drawing plot

Procedure
- Read the integration method output and store it to adata.obsm column
- Compute  and visualize the integration metrics. FOr computational reasons the metrics are computed only for subset of methods
- Plot the unintegrated data using Force directed graph drawing algorithm
- For each integration method
    - COmpute and visualize PAGA graph
    - Compute and visualize FOrce-directed graph drawing, which is initialized with the PAGA

List of non-standard modules:
- matplotlib, plottable, scib_metrics, scib

Usage:
- This script is launced from the pipeline

In [21]:
# Packages
import matplotlib.pyplot as plt
from plottable import Table
from scib_metrics.benchmark import Benchmarker, BatchCorrection, BioConservation
import scanpy as sc
import scib
import shutil, os, sys
import warnings
warnings.filterwarnings('ignore')
sc.settings.verbosity = 0

### Prepare data, and run the integration metrics

In [None]:
# Read output of benchmarked integration methods
adata = sc.read(f"{data_dir}/unintegrated.h5ad")
for tool in tools: 
    if not tool == "bbknn" and not tool == "seurat_cca" and not tool == "conos" and not tool == "seurat_rpca" and not tool == "fastmnn":
        adata_int = sc.read(integrated_adata[tool])
        adata.obsm[tool] = adata_int.obsm[tool]
    elif tool == "fastmnn":
        adata_int = scib.pp.read_seurat(integrated_adata[tool])
        adata.obsm[tool] = adata_int.obsm["FASTMNN"]
    elif not tool == "bbknn" and not tool == "conos":
        adata_int = scib.pp.read_seurat(integrated_adata[tool])        
        adata.obsm[tool] = adata_int.X.todense()
  
tools.append("unintegrated")
tools_to_benchmark = ['unintegrated', 'scvi', 'scanorama', 'harmony', 'fastmnn', 'scanvi', "scgen"] #The metrics are computed only for subset of tools

# COmpute metrics
bm = Benchmarker(
    adata,
    batch_key=batch,
    label_key=label,
    embedding_obsm_keys=tools_to_benchmark,
    n_jobs=15,
)
bm.benchmark()

In [5]:
# Create output dir for metrics plot
output_dir = os.path.dirname(data_dir)
metrics_dir = output_dir + "/Metrics"
if os.path.exists(metrics_dir):
    shutil.rmtree(metrics_dir)
os.makedirs(metrics_dir + "/Non_scaled")

In [None]:
# Plot metrics
bm.plot_results_table(min_max_scale=False, save_dir = metrics_dir + "/Non_scaled")

In [8]:
# Save metrics df
from rich import print
df = bm.get_results(min_max_scale=False)
df.to_csv(metrics_dir + "/non_scaled_metrics.csv")


The following code blocks visualizes the output of each benchmarked integration method. For the unintegrated data force directed graph drawing plot is computed and plotted. FOr each integration method PAGA graph and force directed graph drawing plot, which is intialized with PAGA, are computed and plotted

# Unintegrated

In [None]:
if "unintegrated" in tools:
    adata_unintegrated = adata.copy() # initialize adata
    sc.pp.neighbors(adata_unintegrated, use_rep="X_pca") # Compute neighbors using unintegrated embedding
    sc.tl.draw_graph(adata_unintegrated) # Compute draw graph
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}): # Plotting settings
        print("CELL TYPES")
        sc.pl.draw_graph(adata_unintegrated,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_unintegrated_benchmark.png") # Plot cell types
        print("BATCHES")
        sc.pl.draw_graph(adata_unintegrated,color=batch, legend_fontsize="xx-small", frameon=False, title="") # Plot batches

# SCVI

In [None]:
if "scvi" in tools:
    adata_scvi = adata.copy() # Initialize adata
    sc.pp.neighbors(adata_scvi, use_rep="scvi") #Compute metrics using integrated embedding
    sc.tl.paga(adata_scvi, groups=label) # Compute paga
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}): # Plotting settings
        print("PAGA")
        sc.pl.paga(adata_scvi, color=label,frameon=False, save="_scvi_paga.png") # Plot paga 
    sc.tl.draw_graph(adata_scvi, init_pos="paga")  # Compute draw graph, initialized with PAGA
    # Plotting
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_scvi,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_scVI_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_scvi,color=batch, legend_fontsize="xx-small", frameon=False, title="")

# SCANVI

In [None]:
if "scanvi" in tools:
    adata_scanvi = adata.copy()
    sc.pp.neighbors(adata_scanvi, use_rep="scanvi")
    sc.tl.paga(adata_scanvi, groups=label)
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
        print("PAGA")
        sc.pl.paga(adata_scanvi, color=label,frameon=False, save="_scANVI_paga.png")     
    sc.tl.draw_graph(adata_scanvi, init_pos="paga") 
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_scanvi,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_scANVI_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_scanvi,color=batch, legend_fontsize="xx-small", frameon=False, title="")
    

# SCGEN

In [None]:
if "scgen" in tools:
    adata_scgen = adata.copy()
    sc.pp.neighbors(adata_scgen, use_rep="scgen")
    sc.tl.paga(adata_scgen, groups=label)
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
        print("PAGA")
        sc.pl.paga(adata_scgen, color=label,frameon=False, save="_scgen_paga.png")     
    sc.tl.draw_graph(adata_scgen, init_pos="paga") 
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_scgen,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_scgen_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_scgen,color=batch, legend_fontsize="xx-small", frameon=False, title="")

# TRVAE

In [None]:
if "trvae" in tools:
    adata_trvae = adata.copy()
    
    sc.pp.neighbors(adata_trvae, use_rep="trvae")
    sc.tl.paga(adata_trvae, groups=label)
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
        print("PAGA")
        sc.pl.paga(adata_trvae, color=label,frameon=False, save="_trvae_paga.png")     
    sc.tl.draw_graph(adata_trvae, init_pos="paga") 
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_trvae,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_trvae_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_trvae,color=batch, legend_fontsize="xx-small", frameon=False, title="")

# Harmony

In [None]:
if "harmony" in tools:
    adata_harmony = adata.copy()
    sc.pp.neighbors(adata_harmony, use_rep="harmony")
    sc.tl.paga(adata_harmony, groups=label)
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
        print("PAGA")
        sc.pl.paga(adata_harmony, color=label,frameon=False, save="_harmony_paga.png")     
    sc.tl.draw_graph(adata_harmony, init_pos="paga") 
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_harmony,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_harmony_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_harmony,color=batch, legend_fontsize="xx-small", frameon=False, title="")

# Combat

In [None]:
if "combat" in tools:
    adata_combat = adata.copy()
    sc.pp.neighbors(adata_combat, use_rep="combat")
    sc.tl.paga(adata_combat, groups=label)
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
        print("PAGA")
        sc.pl.paga(adata_combat, color=label,frameon=False, save="_combat_paga.png")     
    sc.tl.draw_graph(adata_combat, init_pos="paga") 
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_combat,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_combat_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_combat,color=batch, legend_fontsize="xx-small", frameon=False, title="")


# Scanorama

In [None]:
if "scanorama" in tools:
    adata_scanorama = adata.copy()
    sc.pp.neighbors(adata_scanorama, use_rep="scanorama")
    sc.tl.paga(adata_scanorama, groups=label)
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
        print("PAGA")
        sc.pl.paga(adata_scanorama, color=label,frameon=False, save="_scanorama_paga.png")     
    sc.tl.draw_graph(adata_scanorama, init_pos="paga") 
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_scanorama,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_scanorama_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_scanorama,color=batch, legend_fontsize="xx-small", frameon=False, title="")

# Desc

In [None]:
if "desc" in tools:
    adata_desc = adata.copy()
    sc.pp.neighbors(adata_desc, use_rep="desc")
    sc.tl.paga(adata_desc, groups=label)
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
        print("PAGA")
        sc.pl.paga(adata_desc, color=label,frameon=False, save="_desc_paga.png")     
    sc.tl.draw_graph(adata_desc, init_pos="paga") 
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_desc,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_desc_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_desc,color=batch, legend_fontsize="xx-small", frameon=False, title="")

# Liger

In [None]:
if "liger" in tools:
    adata_liger = adata.copy()
    sc.pp.neighbors(adata_liger, use_rep="liger")
    sc.tl.paga(adata_liger, groups=label)
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
        print("PAGA")
        sc.pl.paga(adata_liger, color=label,frameon=False, save="_liger_paga.png")     
    sc.tl.draw_graph(adata_liger, init_pos="paga") 
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_liger,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_liger_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_liger,color=batch, legend_fontsize="xx-small", frameon=False, title="")

# Seurat cca

In [None]:
if "seurat_cca" in tools:
    adata_cca = adata.copy()
    sc.pp.neighbors(adata_cca, use_rep="seurat_cca")
    sc.tl.paga(adata_cca, groups=label)
        with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
            print("PAGA")
            sc.pl.paga(adata_cca, color=label,frameon=False, save="_cca_paga.png")     
        sc.tl.draw_graph(adata_cca, init_pos="paga") 
        with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
            print("CELL TYPES")
            sc.pl.draw_graph(adata_cca,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_cca_benchmark.png")
            print("BATCHES")
            sc.pl.draw_graph(adata_cca,color=batch, legend_fontsize="xx-small", frameon=False, title="")

# Seurat rpca

In [None]:
if "seurat_rpca" in tools:
    adata_rpca = adata.copy()
    sc.pp.neighbors(adata_rpca, use_rep="seurat_rpca")
    sc.tl.paga(adata_rpca, groups=label)
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
        print("PAGA")
        sc.pl.paga(adata_rpca, color=label,frameon=False, save="_rpca_paga.png")     
    sc.tl.draw_graph(adata_rpca, init_pos="paga") 
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_rpca,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_rpca_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_rpca,color=batch, legend_fontsize="xx-small", frameon=False, title="")

# Fastmnn

In [None]:
if "fastmnn" in tools:
    adata_fastmnn = adata.copy()
    sc.pp.neighbors(adata_fastmnn, use_rep="fastmnn")
    sc.tl.paga(adata_fastmnn, groups=label)
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
        print("PAGA")
        sc.pl.paga(adata_fastmnn, color=label,frameon=False, save="_fastmnn_paga.png")     
    sc.tl.draw_graph(adata_fastmnn, init_pos="paga") 
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_fastmnn,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_fastmnn_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_fastmnn,color=batch, legend_fontsize="xx-small", frameon=False, title="")

# BBKNN

In [None]:
if "bbknn" in tools:
    adata_bbknn = sc.read(integrated_adata["bbknn"])
    sc.tl.umap(adata_bbknn)
    sc.tl.paga(adata_bbknn, groups=label)
    with plt.rc_context({ "figure.figsize": [6, 3],"figure.dpi": 350}):
        print("PAGA")
        sc.pl.paga(adata_bbknn, color=label,frameon=False, save="_bbknn_paga.png")     
    sc.tl.draw_graph(adata_bbknn, init_pos="paga") 
    with plt.rc_context({"figure.figsize": [2.5, 2.5],  "figure.dpi": 350}):
        print("CELL TYPES")
        sc.pl.draw_graph(adata_bbknn,color=label, legend_fontsize="xx-small",frameon=False, title="", save="_bbknn_benchmark.png")
        print("BATCHES")
        sc.pl.draw_graph(adata_bbknn,color=batch, legend_fontsize="xx-small", frameon=False, title="")