In [None]:
import os
import Cell_BLAST as cb
import utils
os.environ["CUDA_VISIBLE_DEVICES"] = utils.pick_gpu_lowest_memory()
cb.config.RANDOM_SEED = 0
cb.config.N_JOBS = 4
fixed_model_kwargs = dict(
    latent_dim=10, cat_dim=20,
    epoch=500, patience=20
)

In [None]:
cb.__version__

---

# Human

## Baron_human

In [None]:
baron_human = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Baron_human/data.h5")
utils.peek(baron_human, "build/pancreas/Baron_human")
baron_human.obs.head()

In [None]:
baron_human.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
baron_human_model = cb.directi.fit_DIRECTi(
    baron_human, baron_human.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
baron_human.latent = baron_human_model.inference(baron_human)

In [None]:
ax = baron_human.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Baron_human/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = baron_human.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Baron_human/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = baron_human.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Baron_human/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
baron_human.write_dataset("build/pancreas/Baron_human/Baron_human.h5")

In [None]:
%%capture capio
baron_human_models = [baron_human_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    baron_human_models.append(cb.directi.fit_DIRECTi(
        baron_human, baron_human.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
baron_human_blast = cb.blast.BLAST(baron_human_models, baron_human)
baron_human_blast.save("build/pancreas/Baron_human")

In [None]:
with open("build/pancreas/Baron_human/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/pancreas/Baron_human/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(baron_human_blast, "build/pancreas/Baron_human")

In [None]:
%%writefile build/pancreas/Baron_human/predictable.txt
cell_ontology_class
cell_type1

## Muraro

In [None]:
muraro = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Muraro/data.h5")
utils.peek(muraro, "build/pancreas/Muraro")
muraro.obs.head()

In [None]:
muraro.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
muraro_model = cb.directi.fit_DIRECTi(
    muraro, muraro.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
muraro.latent = muraro_model.inference(muraro)

In [None]:
ax = muraro.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Muraro/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = muraro.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Muraro/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = muraro.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Muraro/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
muraro.write_dataset("build/pancreas/Muraro/Muraro.h5")

In [None]:
%%capture capio
muraro_models = [muraro_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    muraro_models.append(cb.directi.fit_DIRECTi(
        muraro, muraro.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
muraro_blast = cb.blast.BLAST(muraro_models, muraro)
muraro_blast.save("build/pancreas/Muraro")

In [None]:
with open("build/pancreas/Muraro/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/pancreas/Muraro/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(muraro_blast, "build/pancreas/Muraro")

In [None]:
%%writefile build/pancreas/Muraro/predictable.txt
cell_ontology_class
cell_type1

## Enge

In [None]:
enge = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Enge/data.h5")
utils.peek(enge, "build/pancreas/Enge")
enge.obs.head()

In [None]:
enge.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
enge_model = cb.directi.fit_DIRECTi(
    enge, enge.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
enge.latent = enge_model.inference(enge)

In [None]:
ax = enge.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Enge/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = enge.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Enge/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = enge.visualize_latent("lifestage", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Enge/lifestage.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = enge.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Enge/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
enge.write_dataset("build/pancreas/Enge/Enge.h5")

In [None]:
%%capture capio
enge_models = [enge_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    enge_models.append(cb.directi.fit_DIRECTi(
        enge, enge.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
enge_blast = cb.blast.BLAST(enge_models, enge)
enge_blast.save("build/pancreas/Enge")

In [None]:
with open("build/pancreas/Enge/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/pancreas/Enge/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(enge_blast, "build/pancreas/Enge")

In [None]:
%%writefile build/pancreas/Enge/predictable.txt
cell_ontology_class
cell_type1

## Human aligned

In [None]:
if not os.path.exists("build/pancreas/ALIGNED_Homo_sapiens_Pancreas"):
    os.makedirs("build/pancreas/ALIGNED_Homo_sapiens_Pancreas")
xin = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Xin_2016/data.h5")
segerstolpe = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Segerstolpe/data.h5")
lawlor = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Lawlor/data.h5")

In [None]:
baron_human.obs["baron_human_donor"] = baron_human.obs["donor"]
muraro.obs["muraro_donor"] = muraro.obs["donor"]
enge.obs["enge_donor"] = enge.obs["donor"]

In [None]:
human = cb.data.ExprDataSet.merge_datasets(dict(
    baron_human=baron_human,
    segerstolpe=segerstolpe,
    muraro=muraro,
    xin=xin,
    lawlor=lawlor,
    enge=enge
), merge_uns_slots=["seurat_genes"])

In [None]:
human.obs.dtypes

In [None]:
opt_model_kwargs = dict(
    batch_effect=["dataset_name", "baron_human_donor", "muraro_donor", "enge_donor"],
    rmbatch_module_kwargs=dict(lambda_reg=0.005)
)  # Many batch effects are being aligned, decrease regularization strength accordingly
human_model = cb.directi.fit_DIRECTi(
    human, human.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
human.latent = human_model.inference(human)

In [None]:
ax = human.visualize_latent("cell_ontology_class", reuse=False, scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/ALIGNED_Homo_sapiens_Pancreas/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = human.visualize_latent("dataset_name", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/ALIGNED_Homo_sapiens_Pancreas/dataset_name.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
human.write_dataset("build/pancreas/ALIGNED_Homo_sapiens_Pancreas/ALIGNED_Homo_sapiens_Pancreas.h5")

In [None]:
%%capture capio
human_models = [human_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    human_models.append(cb.directi.fit_DIRECTi(
        human, human.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
human_blast = cb.blast.BLAST(human_models, human)
human_blast.save("build/pancreas/ALIGNED_Homo_sapiens_Pancreas")

In [None]:
with open("build/pancreas/ALIGNED_Homo_sapiens_Pancreas/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/pancreas/ALIGNED_Homo_sapiens_Pancreas/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(human_blast, "build/pancreas/ALIGNED_Homo_sapiens_Pancreas")

In [None]:
%%writefile build/pancreas/ALIGNED_Homo_sapiens_Pancreas/predictable.txt
cell_ontology_class

Slightly decrease aligning strength?

---

# Mouse

## Baron_mouse

In [None]:
baron_mouse = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Baron_mouse/data.h5")
utils.peek(baron_mouse, "build/pancreas/Baron_mouse")
baron_mouse.obs.head()

In [None]:
baron_mouse.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
baron_mouse_model = cb.directi.fit_DIRECTi(
    baron_mouse, baron_mouse.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
baron_mouse.latent = baron_mouse_model.inference(baron_mouse)

In [None]:
ax = baron_mouse.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Baron_mouse/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = baron_mouse.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Baron_mouse/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = baron_mouse.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Baron_mouse/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
baron_mouse.write_dataset("build/pancreas/Baron_mouse/Baron_mouse.h5")

In [None]:
%%capture capio
baron_mouse_models = [baron_mouse_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    baron_mouse_models.append(cb.directi.fit_DIRECTi(
        baron_mouse, baron_mouse.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
baron_mouse_blast = cb.blast.BLAST(baron_mouse_models, baron_mouse)
baron_mouse_blast.save("build/pancreas/Baron_mouse")

In [None]:
with open("build/pancreas/Baron_mouse/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/pancreas/Baron_mouse/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(baron_mouse_blast, "build/pancreas/Baron_mouse")

In [None]:
%%writefile build/pancreas/Baron_mouse/predictable.txt
cell_ontology_class
cell_type1

## Quake_Smart-seq2_Pancreas

In [None]:
quake_smart_seq2_pancreas = cb.data.ExprDataSet.read_dataset(
    "../../Datasets/data/Quake_Smart-seq2_Pancreas/data.h5")
utils.peek(quake_smart_seq2_pancreas, "build/pancreas/Quake_Smart-seq2_Pancreas")
quake_smart_seq2_pancreas.obs.head()

In [None]:
quake_smart_seq2_pancreas.obs["cluster"] = "cluster_" + quake_smart_seq2_pancreas.obs["cluster"].astype(int).astype(str)
quake_smart_seq2_pancreas.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
quake_smart_seq2_pancreas_model = cb.directi.fit_DIRECTi(
    quake_smart_seq2_pancreas, quake_smart_seq2_pancreas.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
quake_smart_seq2_pancreas.latent = quake_smart_seq2_pancreas_model.inference(quake_smart_seq2_pancreas)

In [None]:
ax = quake_smart_seq2_pancreas.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Quake_Smart-seq2_Pancreas/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_smart_seq2_pancreas.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Quake_Smart-seq2_Pancreas/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_smart_seq2_pancreas.visualize_latent("cluster", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Quake_Smart-seq2_Pancreas/cluster.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_smart_seq2_pancreas.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Quake_Smart-seq2_Pancreas/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
quake_smart_seq2_pancreas.write_dataset("build/pancreas/Quake_Smart-seq2_Pancreas/Quake_Smart-seq2_Pancreas.h5")

In [None]:
%%capture capio
quake_smart_seq2_pancreas_models = [quake_smart_seq2_pancreas_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    quake_smart_seq2_pancreas_models.append(cb.directi.fit_DIRECTi(
        quake_smart_seq2_pancreas, quake_smart_seq2_pancreas.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
quake_smart_seq2_pancreas_blast = cb.blast.BLAST(
    quake_smart_seq2_pancreas_models, quake_smart_seq2_pancreas
)
quake_smart_seq2_pancreas_blast.save("build/pancreas/Quake_Smart-seq2_Pancreas")

In [None]:
with open("build/pancreas/Quake_Smart-seq2_Pancreas/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/pancreas/Quake_Smart-seq2_Pancreas/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(quake_smart_seq2_pancreas_blast, "build/pancreas/Quake_Smart-seq2_Pancreas")

In [None]:
%%writefile build/pancreas/Quake_Smart-seq2_Pancreas/predictable.txt
cell_ontology_class
cell_type1
cluster

## Qiu

In [None]:
qiu = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Qiu/data.h5")
utils.peek(qiu, "build/pancreas/Qiu")
qiu.obs.head()

In [None]:
qiu.obs.dtypes

In [None]:
qiu_model = cb.directi.fit_DIRECTi(
    qiu, qiu.uns["seurat_genes"],
    **fixed_model_kwargs
)
qiu.latent = qiu_model.inference(qiu)

In [None]:
ax = qiu.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Qiu/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = qiu.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Qiu/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
qiu.write_dataset("build/pancreas/Qiu/Qiu.h5")

In [None]:
%%capture capio
qiu_models = [qiu_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    qiu_models.append(cb.directi.fit_DIRECTi(
        qiu, qiu.uns["seurat_genes"],
        **fixed_model_kwargs,
        random_seed=i
    ))
qiu_blast = cb.blast.BLAST(qiu_models, qiu)
qiu_blast.save("build/pancreas/Qiu")

In [None]:
with open("build/pancreas/Qiu/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/pancreas/Qiu/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(qiu_blast, "build/pancreas/Qiu")

In [None]:
%%writefile "build/pancreas/Qiu/predictable.txt"
cell_ontology_class
cell_type1

## Mouse aligned

In [None]:
if not os.path.exists("build/pancreas/ALIGNED_Mus_musculus_Pancreas"):
    os.makedirs("build/pancreas/ALIGNED_Mus_musculus_Pancreas")
baron_mouse.obs["baron_mouse_donor"] = baron_mouse.obs["donor"]
quake_smart_seq2_pancreas.obs["quake_smart_seq2_pancreas_donor"] = quake_smart_seq2_pancreas.obs["donor"]

In [None]:
mouse = cb.data.ExprDataSet.merge_datasets(dict(
    baron_mouse=baron_mouse, quake_smart_seq2_pancreas=quake_smart_seq2_pancreas, qiu=qiu
), merge_uns_slots=["seurat_genes"])

In [None]:
mouse.obs.dtypes

In [None]:
opt_model_kwargs = dict(
    batch_effect=["dataset_name", "baron_mouse_donor", "quake_smart_seq2_pancreas_donor"]
)
mouse_model = cb.directi.fit_DIRECTi(
    mouse, baron_mouse.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
mouse.latent = mouse_model.inference(mouse)

In [None]:
ax = mouse.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/ALIGNED_Mus_musculus_Pancreas/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = mouse.visualize_latent("dataset_name", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/ALIGNED_Mus_musculus_Pancreas/dataset_name.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
mouse.write_dataset("build/pancreas/ALIGNED_Mus_musculus_Pancreas/ALIGNED_Mus_musculus_Pancreas.h5")

In [None]:
%%capture capio
mouse_models = [mouse_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    mouse_models.append(cb.directi.fit_DIRECTi(
        mouse, mouse.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
mouse_blast = cb.blast.BLAST(mouse_models, mouse)
mouse_blast.save("build/pancreas/ALIGNED_Mus_musculus_Pancreas")

In [None]:
with open("build/pancreas/ALIGNED_Mus_musculus_Pancreas/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/pancreas/ALIGNED_Mus_musculus_Pancreas/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(mouse_blast, "build/pancreas/ALIGNED_Mus_musculus_Pancreas")

In [None]:
%%writefile build/pancreas/ALIGNED_Mus_musculus_Pancreas/predictable.txt
cell_ontology_class

---

# Zebrafish

## Singh

In [None]:
singh = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Singh/data.h5")
utils.peek(singh, "build/pancreas/Singh")
singh.obs.head()

In [None]:
singh.obs.dtypes

In [None]:
opt_model_kwargs = dict(h_dim=64, batch_size=64)
singh_model = cb.directi.fit_DIRECTi(
    singh, singh.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
singh.latent = singh_model.inference(singh)

In [None]:
ax = singh.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Singh/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = singh.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Singh/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = singh.visualize_latent("lifestage", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/pancreas/Singh/lifestage.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
singh.write_dataset("build/pancreas/Singh/Singh.h5")

In [None]:
%%capture capio
singh_models = [singh_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    singh_models.append(cb.directi.fit_DIRECTi(
        singh, singh.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
singh_blast = cb.blast.BLAST(singh_models, singh)
singh_blast.save("build/pancreas/Singh")

In [None]:
with open("build/pancreas/Singh/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/pancreas/Singh/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(singh_blast, "build/pancreas/Singh")

In [None]:
%%writefile build/pancreas/Singh/predictable.txt
cell_ontology_class
cell_type1
lifestage