In [None]:
import os
import subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

import Cell_BLAST as cb
import exputils

rgb2hex = lambda rgb: '#%02x%02x%02x' % tuple(np.round(item * 255).astype(int) for item in rgb)

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = exputils.pick_gpu_lowest_memory()
cb.config.RANDOM_SEED = 0
cb.config.N_JOBS = 4
plt.rcParams['svg.fonttype'] = "none"
plt.rcParams['font.family'] = "Arial"
PATH = "./multilevel_batch"
if not os.path.exists(PATH):
    os.makedirs(PATH)

## Read data

In [None]:
dataset = cb.data.ExprDataSet.read_dataset(
    "../../Datasets/data/Baron_human+Muraro+Enge+Segerstolpe+Xin_2016+Lawlor/data.h5")
dataset.obs["Cell ontology class"] = pd.Categorical(dataset.obs["cell_ontology_class"])
dataset.obs["Dataset name"] = pd.Categorical(dataset.obs["dataset_name"])

In [None]:
dataset.latent = cb.data.read_hybrid_path(
    "../../Results/Cell_BLAST/Baron_human+Muraro+Enge+Segerstolpe+Xin_2016+Lawlor/"
    "dim_10_rmbatch0.01/seed_0/result.h5//latent"
)
tsne = cb.data.read_hybrid_path(
    "../../Results/Cell_BLAST/Baron_human+Muraro+Enge+Segerstolpe+Xin_2016+Lawlor/"
    "dim_10_rmbatch0.01/seed_0/tSNE.h5//visualization"
)
dataset.obs["tSNE1"] = tsne[:, 0]
dataset.obs["tSNE2"] = tsne[:, 1]

## Single-level

In [None]:
dataset.obs.loc[
    dataset.obs["dataset_name"] == "Baron_human", "Baron_human donor"
] = dataset.obs["donor"]
dataset.obs["Baron_human donor"] = pd.Categorical(dataset.obs["Baron_human donor"])
fig, ax = plt.subplots(figsize=(4, 4))
ax = dataset.visualize_latent(ax=ax, scatter_kws=dict(color="#CCCCCC", rasterized=True))
ax = dataset.visualize_latent("Baron_human donor", ax=ax, scatter_kws=dict(rasterized=True))
sub_dataset = dataset[~cb.utils.isnan(dataset.obs["Baron_human donor"]), :]
sas = cb.metrics.seurat_alignment_score(
    sub_dataset.latent,
    cb.utils.encode_integer(sub_dataset.obs["Baron_human donor"])[0],
    n=10, k=0.01
)
ax.legend(
    frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left"
).get_texts()[0].set_text(f"Baron_human donor\n(SAS = {sas:.3f})")
fig.savefig(os.path.join(PATH, "simple_baron_human.pdf"), dpi=300, bbox_inches="tight")

In [None]:
dataset.obs.loc[
    dataset.obs["dataset_name"] == "Enge", "Enge donor"
] = dataset.obs["donor"]
dataset.obs["Enge donor"] = pd.Categorical(dataset.obs["Enge donor"])
fig, ax = plt.subplots(figsize=(4, 4))
ax = dataset.visualize_latent(ax=ax, scatter_kws=dict(color="#CCCCCC", rasterized=True))
ax = dataset.visualize_latent("Enge donor", ax=ax, scatter_kws=dict(rasterized=True))
sub_dataset = dataset[~cb.utils.isnan(dataset.obs["Enge donor"]), :]
sas = cb.metrics.seurat_alignment_score(
    sub_dataset.latent,
    cb.utils.encode_integer(sub_dataset.obs["Enge donor"])[0],
    n=10, k=0.01
)
ax.legend(
    frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left"
).get_texts()[0].set_text(f"Enge donor\n(SAS = {sas:.3f})")
fig.savefig(os.path.join(PATH, "simple_enge.pdf"), dpi=300, bbox_inches="tight")

In [None]:
dataset.obs.loc[
    dataset.obs["dataset_name"] == "Muraro", "Muraro donor"
] = dataset.obs["donor"]
dataset.obs["Muraro donor"] = pd.Categorical(dataset.obs["Muraro donor"])
fig, ax = plt.subplots(figsize=(4, 4))
ax = dataset.visualize_latent(ax=ax, scatter_kws=dict(color="#CCCCCC", rasterized=True))
ax = dataset.visualize_latent("Muraro donor", ax=ax, scatter_kws=dict(rasterized=True))
sub_dataset = dataset[~cb.utils.isnan(dataset.obs["Muraro donor"]), :]
sas = cb.metrics.seurat_alignment_score(
    sub_dataset.latent,
    cb.utils.encode_integer(sub_dataset.obs["Muraro donor"])[0],
    n=10, k=0.01
)
ax.legend(
    frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left"
).get_texts()[0].set_text(f"Muraro donor\n(SAS = {sas:.3f})")
fig.savefig(os.path.join(PATH, "simple_muraro.pdf"), dpi=300, bbox_inches="tight")

## Multi-level

In [None]:
model = cb.directi.fit_DIRECTi(
    dataset, dataset.uns["seurat_genes"],
    latent_dim=10, cat_dim=20, batch_effect=[
        "dataset_name", "Baron_human donor", "Enge donor", "Muraro donor"
    ],  rmbatch_module_kwargs=dict(lambda_reg=0.005),
    epoch=200, patience=30
)

In [None]:
dataset.latent = model.inference(dataset)

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
ax = dataset.visualize_latent("Cell ontology class", ax=ax, scatter_kws=dict(rasterized=True))
map = cb.metrics.mean_average_precision_from_latent(
    dataset.latent,
    cb.utils.encode_integer(dataset.obs["Cell ontology class"])[0],
    k=0.01
)
ax.legend(
    frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left"
).get_texts()[0].set_text(f"Cell ontology class\n(MAP = {map:.3f})")
fig.savefig(os.path.join(PATH, "complex_cl.pdf"), dpi=300, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
ax = dataset.visualize_latent("Dataset name", ax=ax, scatter_kws=dict(rasterized=True))
sas = cb.metrics.seurat_alignment_score(
    dataset.latent,
    cb.utils.encode_integer(dataset.obs["Dataset name"])[0],
    n=10, k=0.01
)
ax.legend(
    frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left"
).get_texts()[0].set_text(f"Dataset name\n(SAS = {sas:.3f})")
fig.savefig(os.path.join(PATH, "complex_ds.pdf"), dpi=300, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
ax = dataset.visualize_latent(ax=ax, scatter_kws=dict(color="#CCCCCC", rasterized=True))
ax = dataset.visualize_latent("Baron_human donor", ax=ax, scatter_kws=dict(rasterized=True))
sub_dataset = dataset[~cb.utils.isnan(dataset.obs["Baron_human donor"]), :]
sas = cb.metrics.seurat_alignment_score(
    sub_dataset.latent,
    cb.utils.encode_integer(sub_dataset.obs["Baron_human donor"])[0],
    n=10, k=0.01
)
ax.legend(
    frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left"
).get_texts()[0].set_text(f"Baron_human donor\n(SAS = {sas:.3f})")
fig.savefig(os.path.join(PATH, "complex_baron_human.pdf"), dpi=300, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
ax = dataset.visualize_latent(ax=ax, scatter_kws=dict(color="#CCCCCC", rasterized=True))
ax = dataset.visualize_latent("Enge donor", ax=ax, scatter_kws=dict(rasterized=True))
sub_dataset = dataset[~cb.utils.isnan(dataset.obs["Enge donor"]), :]
sas = cb.metrics.seurat_alignment_score(
    sub_dataset.latent,
    cb.utils.encode_integer(sub_dataset.obs["Enge donor"])[0],
    n=10, k=0.01
)
ax.legend(
    frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left"
).get_texts()[0].set_text(f"Enge donor\n(SAS = {sas:.3f})")
fig.savefig(os.path.join(PATH, "complex_enge.pdf"), dpi=300, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
ax = dataset.visualize_latent(ax=ax, scatter_kws=dict(color="#CCCCCC", rasterized=True))
ax = dataset.visualize_latent("Muraro donor", ax=ax, scatter_kws=dict(rasterized=True))
sub_dataset = dataset[~cb.utils.isnan(dataset.obs["Muraro donor"]), :]
sas = cb.metrics.seurat_alignment_score(
    sub_dataset.latent,
    cb.utils.encode_integer(sub_dataset.obs["Muraro donor"])[0],
    n=10, k=0.01
)
ax.legend(
    frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left"
).get_texts()[0].set_text(f"Muraro donor\n(SAS = {sas:.3f})")
fig.savefig(os.path.join(PATH, "complex_muraro.pdf"), dpi=300, bbox_inches="tight")

## Other methods

### scVI

In [None]:
tsne = cb.data.read_hybrid_path(
    "../../Results/scVI/Baron_human+Muraro+Enge+Segerstolpe+Xin_2016+Lawlor/"
    "dim_5_rmbatchNA/seed_0/tSNE.h5//visualization"
)
dataset.obs["tSNE1"] = tsne[:, 0]
dataset.obs["tSNE2"] = tsne[:, 1]

In [None]:
dataset.obs.loc[
    dataset.obs["dataset_name"] == "Baron_human", "Baron_human donor"
] = dataset.obs["donor"]
dataset.obs["Baron_human donor"] = pd.Categorical(dataset.obs["Baron_human donor"])
fig, ax = plt.subplots(figsize=(4, 4))
ax = dataset.visualize_latent(ax=ax, scatter_kws=dict(color="#CCCCCC", rasterized=True))
ax = dataset.visualize_latent("Baron_human donor", ax=ax, scatter_kws=dict(rasterized=True))
fig.savefig(os.path.join(PATH, "simple_baron_human_scvi.pdf"), dpi=300, bbox_inches="tight")

### scPhere

In [None]:
dataset.write_dataset(os.path.join(PATH, "data.h5"))
p = subprocess.Popen([
    "python", "../../Evaluation/run_scPhere.py",
    "-i", os.path.join(PATH, "data.h5"),
    "-o", os.path.join(PATH, "scphere.h5"),
    "-g", "seurat_genes",
    "-b", "dataset_name", "'Baron_human donor'", "'Enge donor'", "'Muraro donor'",
    "-d", "2", "-s", "0"
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(output, err) = p.communicate()
exit_code = p.wait()
print(output.decode())
print(err.decode())

In [None]:
dataset.latent = cb.data.read_hybrid_path(os.path.join(PATH, "scphere.h5//latent"))

In [None]:
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')
palette = sns.color_palette("tab20", np.unique(dataset.obs["cell_ontology_class"]).size)

cmap = {
    ct: rgb2hex(col) for ct, col in
    zip(np.unique(dataset.obs["cell_ontology_class"]), palette)
}
ax.scatter(
    dataset.obs["latent_1"],
    dataset.obs["latent_2"],
    dataset.obs["latent_3"],
    c=[cmap[item] for item in dataset.obs["cell_ontology_class"]],
    s=5.0, rasterized=True
)
map = cb.metrics.mean_average_precision_from_latent(dataset.latent, dataset.obs["cell_ontology_class"], k=0.01)
ax.xaxis.pane.set_color("#EEEEEE")
ax.yaxis.pane.set_color("#EEEEEE")
ax.zaxis.pane.set_color("#EEEEEE")
plt.legend(
    handles=[
        mpatches.Patch(color=c, label=l)
        for l, c in cmap.items()
    ], frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left",
    title=f"Cell ontology class (MAP = {map:.3f})"
)
fig.savefig(os.path.join(PATH, "scphere_complex_cl.pdf"), dpi=300, bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')
palette = sns.color_palette("tab10", np.unique(dataset.obs["dataset_name"]).size)
cmap = {
    ct: rgb2hex(col) for ct, col in
    zip(np.unique(dataset.obs["dataset_name"]), palette)
}
ax.scatter(
    dataset.obs["latent_1"],
    dataset.obs["latent_2"],
    dataset.obs["latent_3"],
    c=[cmap[item] for item in dataset.obs["dataset_name"]],
    s=5.0, rasterized=True
)
sas = cb.metrics.seurat_alignment_score(dataset.latent, dataset.obs["dataset_name"], n=10, k=0.01)
ax.xaxis.pane.set_color("#EEEEEE")
ax.yaxis.pane.set_color("#EEEEEE")
ax.zaxis.pane.set_color("#EEEEEE")
plt.legend(
    handles=[
        mpatches.Patch(color=c, label=l)
        for l, c in cmap.items()
    ], frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left",
    title=f"Dataset name\n(SAS = {sas:.3f})"
)
fig.savefig(os.path.join(PATH, "scphere_complex_ds.pdf"), dpi=300, bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')
dataset.obs["Baron_human donor"] = dataset.obs["Baron_human donor"].astype(object).fillna("Other datasets")
valid_donors = np.setdiff1d(np.unique(dataset.obs["Baron_human donor"]), "Other datasets")
palette = sns.color_palette("tab10", valid_donors.size)
cmap = {
    ct: rgb2hex(col) for ct, col in zip(valid_donors, palette) if ct != "Other datasets"
}
cmap["Other datasets"] = "#CCCCCC"
ordering = np.argsort(np.asarray(dataset.obs["Baron_human donor"] != "Other datasets"))
ax.scatter(
    dataset.obs["latent_1"].iloc[ordering],
    dataset.obs["latent_2"].iloc[ordering],
    dataset.obs["latent_3"].iloc[ordering],
    c=[cmap[item] for item in dataset.obs["Baron_human donor"].iloc[ordering]],
    s=5.0, rasterized=True
)
sub_dataset = dataset[dataset.obs["Baron_human donor"] != "Other datasets", :]
sas = cb.metrics.seurat_alignment_score(sub_dataset.latent, sub_dataset.obs["Baron_human donor"], n=10, k=0.01)
ax.xaxis.pane.set_color("#EEEEEE")
ax.yaxis.pane.set_color("#EEEEEE")
ax.zaxis.pane.set_color("#EEEEEE")
plt.legend(
    handles=[
        mpatches.Patch(color=c, label=l)
        for l, c in cmap.items() if l != "Other datasets"
    ], frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left",
    title=f"Baron_human donor\n(SAS = {sas:.3f})"
)
fig.savefig(os.path.join(PATH, "scphere_complex_baron_human.pdf"), dpi=300, bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')
dataset.obs["Enge donor"] = dataset.obs["Enge donor"].astype(object).fillna("Other datasets")
valid_donors = np.setdiff1d(np.unique(dataset.obs["Enge donor"]), "Other datasets")
palette = sns.color_palette("tab10", valid_donors.size)
cmap = {
    ct: rgb2hex(col) for ct, col in zip(valid_donors, palette) if ct != "Other datasets"
}
cmap["Other datasets"] = "#CCCCCC"
ordering = np.argsort(np.asarray(dataset.obs["Enge donor"] != "Other datasets"))
ax.scatter(
    dataset.obs["latent_1"].iloc[ordering],
    dataset.obs["latent_2"].iloc[ordering],
    dataset.obs["latent_3"].iloc[ordering],
    c=[cmap[item] for item in dataset.obs["Enge donor"].iloc[ordering]],
    s=5.0, rasterized=True
)
sub_dataset = dataset[dataset.obs["Enge donor"] != "Other datasets", :]
sas = cb.metrics.seurat_alignment_score(sub_dataset.latent, sub_dataset.obs["Enge donor"], n=10, k=0.01)
ax.xaxis.pane.set_color("#EEEEEE")
ax.yaxis.pane.set_color("#EEEEEE")
ax.zaxis.pane.set_color("#EEEEEE")
plt.legend(
    handles=[
        mpatches.Patch(color=c, label=l)
        for l, c in cmap.items() if l != "Other datasets"
    ], frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left",
    title=f"Enge donor\n(SAS = {sas:.3f})"
)
fig.savefig(os.path.join(PATH, "scphere_complex_enge.pdf"), dpi=300, bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')
dataset.obs["Muraro donor"] = dataset.obs["Muraro donor"].astype(object).fillna("Other datasets")
valid_donors = np.setdiff1d(np.unique(dataset.obs["Muraro donor"]), "Other datasets")
palette = sns.color_palette("tab10", valid_donors.size)
cmap = {
    ct: rgb2hex(col) for ct, col in zip(valid_donors, palette) if ct != "Other datasets"
}
cmap["Other datasets"] = "#CCCCCC"
ordering = np.argsort(np.asarray(dataset.obs["Muraro donor"] != "Other datasets"))
ax.scatter(
    dataset.obs["latent_1"].iloc[ordering],
    dataset.obs["latent_2"].iloc[ordering],
    dataset.obs["latent_3"].iloc[ordering],
    c=[cmap[item] for item in dataset.obs["Muraro donor"].iloc[ordering]],
    s=5.0, rasterized=True
)
sub_dataset = dataset[dataset.obs["Muraro donor"] != "Other datasets", :]
sas = cb.metrics.seurat_alignment_score(sub_dataset.latent, sub_dataset.obs["Muraro donor"], n=10, k=0.01)
ax.xaxis.pane.set_color("#EEEEEE")
ax.yaxis.pane.set_color("#EEEEEE")
ax.zaxis.pane.set_color("#EEEEEE")
plt.legend(
    handles=[
        mpatches.Patch(color=c, label=l)
        for l, c in cmap.items() if l != "Other datasets"
    ], frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left",
    title=f"Muraro donor\n(SAS = {sas:.3f})"
)
fig.savefig(os.path.join(PATH, "scphere_complex_muraro.pdf"), dpi=300, bbox_inches="tight")