In [None]:
import os
import functools
import numpy as np
import Cell_BLAST as cb
import utils
os.environ["CUDA_VISIBLE_DEVICES"] = utils.pick_gpu_lowest_memory()
cb.config.RANDOM_SEED = 0
cb.config.N_JOBS = 4
fixed_model_kwargs = dict(
    latent_dim=10, cat_dim=20,
    epoch=500, patience=20
)

In [None]:
cb.__version__

---

# Mouse

## Karaiskos_mouse

In [None]:
karaiskos_mouse = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Karaiskos_mouse/data.h5")
utils.peek(karaiskos_mouse, "build/kidney/Karaiskos_mouse")
karaiskos_mouse.obs.head()

In [None]:
karaiskos_mouse.obs.dtypes

In [None]:
opt_model_kwargs = dict(prob_module_kwargs=dict(lambda_reg=0.01))  # We have numerical issue here so `lambda_prob_reg` is used.
karaiskos_mouse_model = cb.directi.fit_DIRECTi(
    karaiskos_mouse, karaiskos_mouse.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
karaiskos_mouse.latent = karaiskos_mouse_model.inference(karaiskos_mouse)

In [None]:
ax = karaiskos_mouse.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Karaiskos_mouse/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = karaiskos_mouse.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Karaiskos_mouse/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
karaiskos_mouse.obs["replicate"] = "replicate_" + karaiskos_mouse.obs["replicate"]
ax = karaiskos_mouse.visualize_latent("replicate", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Karaiskos_mouse/replicate.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
karaiskos_mouse.write_dataset("build/kidney/Karaiskos_mouse/Karaiskos_mouse.h5")

In [None]:
%%capture capio
karaiskos_mouse_models = [karaiskos_mouse_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    karaiskos_mouse_models.append(cb.directi.fit_DIRECTi(
        karaiskos_mouse, karaiskos_mouse.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
karaiskos_mouse_blast = cb.blast.BLAST(karaiskos_mouse_models, karaiskos_mouse)
karaiskos_mouse_blast.save("build/kidney/Karaiskos_mouse")

In [None]:
with open("build/kidney/Karaiskos_mouse/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/Karaiskos_mouse/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(karaiskos_mouse_blast, "build/kidney/Karaiskos_mouse")

In [None]:
%%writefile build/kidney/Karaiskos_mouse/predictable.txt
cell_ontology_class
cell_type1

## Adam

In [None]:
adam = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Adam/data.h5")
utils.peek(adam, "build/kidney/Adam")
adam.obs.head()

In [None]:
adam.obs.dtypes

In [None]:
adam_model = cb.directi.fit_DIRECTi(
    adam, adam.uns["seurat_genes"],
    **fixed_model_kwargs
)
adam.latent = adam_model.inference(adam)

In [None]:
ax = adam.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Adam/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = adam.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Adam/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
adam.write_dataset("build/kidney/Adam/Adam.h5")

In [None]:
%%capture capio
adam_models = [adam_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    adam_models.append(cb.directi.fit_DIRECTi(
        adam, adam.uns["seurat_genes"],
        **fixed_model_kwargs,
        random_seed=i
    ))
adam_blast = cb.blast.BLAST(adam_models, adam)
adam_blast.save("build/kidney/Adam")

In [None]:
with open("build/kidney/Adam/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/Adam/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(adam_blast, "build/kidney/Adam")

In [None]:
%%writefile build/kidney/Adam/predictable.txt
cell_ontology_class
cell_type1

## Quake_10x_Kidney

In [None]:
quake_10x_kidney = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Quake_10x_Kidney/data.h5")
utils.peek(quake_10x_kidney, "build/kidney/Quake_10x_Kidney")
quake_10x_kidney.obs.head()

In [None]:
quake_10x_kidney.obs["cluster"] = "cluster_" + quake_10x_kidney.obs["cluster"].astype(int).astype(str)
quake_10x_kidney.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
quake_10x_kidney_model = cb.directi.fit_DIRECTi(
    quake_10x_kidney, quake_10x_kidney.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
quake_10x_kidney.latent = quake_10x_kidney_model.inference(quake_10x_kidney)

In [None]:
ax = quake_10x_kidney.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Quake_10x_Kidney/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_10x_kidney.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Quake_10x_Kidney/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_10x_kidney.visualize_latent("cluster", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Quake_10x_Kidney/cluster.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_10x_kidney.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Quake_10x_Kidney/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
quake_10x_kidney.write_dataset("build/kidney/Quake_10x_Kidney/Quake_10x_Kidney.h5")

In [None]:
%%capture capio
quake_10x_kidney_models = [quake_10x_kidney_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    quake_10x_kidney_models.append(cb.directi.fit_DIRECTi(
        quake_10x_kidney, quake_10x_kidney.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
quake_10x_kidney_blast = cb.blast.BLAST(quake_10x_kidney_models, quake_10x_kidney)
quake_10x_kidney_blast.save("build/kidney/Quake_10x_Kidney")

In [None]:
with open("build/kidney/Quake_10x_Kidney/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/Quake_10x_Kidney/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(quake_10x_kidney_blast, "build/kidney/Quake_10x_Kidney")

In [None]:
%%writefile build/kidney/Quake_10x_Kidney/predictable.txt
cell_ontology_class
cell_type1
cluster

## Mouse aligned

In [None]:
if not os.path.exists("build/kidney/ALIGNED_Mus_musculus_Kidney"):
    os.makedirs("build/kidney/ALIGNED_Mus_musculus_Kidney")
quake_10x_kidney.obs["quake_10x_kidney_donor"] = quake_10x_kidney.obs["donor"]
quake_smart_seq2_kidney = cb.data.ExprDataSet.read_dataset(
    "../../Datasets/data/Quake_Smart-seq2_Kidney/data.h5"
)
quake_smart_seq2_kidney.obs["cluster"] = quake_smart_seq2_kidney.obs["cluster"].astype(int).astype(str)
mouse = cb.data.ExprDataSet.merge_datasets(dict(
    karaiskos_mouse=karaiskos_mouse, adam=adam,
    quake_smart_seq2_kidney=quake_smart_seq2_kidney,
    quake_10x_kidney=quake_10x_kidney
), merge_uns_slots=["seurat_genes"])

In [None]:
mouse.obs.dtypes

Subsampling to make cell type distribution comparable.

In [None]:
np.unique(karaiskos_mouse.obs["cell_type1"], return_counts=True)

In [None]:
np.random.seed(0)
mask_dict = {}
for ct in np.unique(karaiskos_mouse.obs["cell_type1"]):
    mask_dict[ct] = (karaiskos_mouse.obs["cell_type1"] == ct).values
mask_dict["Podocytes"][np.random.choice(
    np.where(mask_dict["Podocytes"])[0],
    size=9000, replace=False
)] = False
mask_dict["Endothelium"][np.random.choice(
    np.where(mask_dict["Endothelium"])[0],
    size=700, replace=False
)] = False
karaiskos_mouse_downsampled = karaiskos_mouse[
    functools.reduce(np.logical_or, mask_dict.values()), :
]

In [None]:
np.unique(karaiskos_mouse_downsampled.obs["cell_type1"], return_counts=True)

In [None]:
mouse_downsampled = cb.data.ExprDataSet.merge_datasets(dict(
    karaiskos_mouse=karaiskos_mouse_downsampled, adam=adam,
    quake_smart_seq2_kidney=quake_smart_seq2_kidney,
    quake_10x_kidney=quake_10x_kidney
), merge_uns_slots=["seurat_genes"])

In [None]:
opt_model_kwargs = dict(
    batch_effect=["dataset_name", "quake_10x_kidney_donor"],
    rmbatch_module_kwargs=dict(lambda_reg=0.005),
    prob_module_kwargs=dict(lambda_reg=0.01)
)
mouse_model = cb.directi.fit_DIRECTi(
    mouse_downsampled, mouse_downsampled.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
mouse.latent = mouse_model.inference(mouse)

In [None]:
ax = mouse.visualize_latent("cell_ontology_class", width=10, height=10, size=5, scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/ALIGNED_Mus_musculus_Kidney/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = mouse.visualize_latent("dataset_name", width=10, height=10, size=5, scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/ALIGNED_Mus_musculus_Kidney/study.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
mouse.write_dataset("build/kidney/ALIGNED_Mus_musculus_Kidney/ALIGNED_Mus_musculus_Kidney.h5")

In [None]:
%%capture capio
mouse_models = [mouse_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    mouse_models.append(cb.directi.fit_DIRECTi(
        mouse, mouse.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
mouse_blast = cb.blast.BLAST(mouse_models, mouse)
mouse_blast.save("build/kidney/ALIGNED_Mus_musculus_Kidney")

In [None]:
with open("build/kidney/ALIGNED_Mus_musculus_Kidney/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/ALIGNED_Mus_musculus_Kidney/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(mouse_blast, "build/kidney/ALIGNED_Mus_musculus_Kidney")

In [None]:
%%writefile build/kidney/ALIGNED_Mus_musculus_Kidney/predictable.txt
cell_ontology_class

---

# Human

## Wang_Kidney

In [None]:
wang_kidney = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Wang_Kidney/data.h5")
utils.peek(wang_kidney, "build/kidney/Wang_Kidney")
wang_kidney.obs.head()

In [None]:
wang_kidney.obs.dtypes

In [None]:
wang_kidney_model = cb.directi.fit_DIRECTi(
    wang_kidney, wang_kidney.uns["seurat_genes"],
    **fixed_model_kwargs
)
wang_kidney.latent = wang_kidney_model.inference(wang_kidney)

In [None]:
ax = wang_kidney.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Wang_Kidney/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = wang_kidney.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Wang_Kidney/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = wang_kidney.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Wang_Kidney/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
wang_kidney.write_dataset("build/kidney/Wang_Kidney/Wang_Kidney.h5")

In [None]:
%%capture capio
wang_kidney_models = [wang_kidney_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    wang_kidney_models.append(cb.directi.fit_DIRECTi(
        wang_kidney, wang_kidney.uns["seurat_genes"],
        **fixed_model_kwargs,
        random_seed=i
    ))
wang_kidney_blast = cb.blast.BLAST(wang_kidney_models, wang_kidney)
wang_kidney_blast.save("build/kidney/Wang_Kidney")

In [None]:
with open("build/kidney/Wang_Kidney/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/Wang_Kidney/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(wang_kidney_blast, "build/kidney/Wang_Kidney")

In [None]:
%%writefile build/kidney/Wang_Kidney/predictable.txt
cell_ontology_class
cell_type1

\>\>\> Discard in alignment

## Wu_human

In [None]:
wu_human = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Wu_human/data.h5")
utils.peek(wu_human, "build/kidney/Wu_human")
wu_human.obs.head()

In [None]:
wu_human.obs.dtypes

In [None]:
wu_human_model = cb.directi.fit_DIRECTi(
    wu_human, wu_human.uns["seurat_genes"],
    **fixed_model_kwargs
)
wu_human.latent = wu_human_model.inference(wu_human)

In [None]:
ax = wu_human.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Wu_human/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = wu_human.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Wu_human/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
wu_human.write_dataset("build/kidney/Wu_human/Wu_human.h5")

In [None]:
%%capture capio
wu_human_models = [wu_human_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    wu_human_models.append(cb.directi.fit_DIRECTi(
        wu_human, wu_human.uns["seurat_genes"],
        **fixed_model_kwargs,
        random_seed=i
    ))
wu_human_blast = cb.blast.BLAST(wu_human_models, wu_human)
wu_human_blast.save("build/kidney/Wu_human")

In [None]:
with open("build/kidney/Wu_human/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/Wu_human/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(wu_human_blast, "build/kidney/Wu_human")

In [None]:
%%writefile build/kidney/Wu_human/predictable.txt
cell_ontology_class
cell_type1

## Young

In [None]:
young = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Young/data.h5")
utils.peek(young, "build/kidney/Young")
young.obs.head()

In [None]:
young.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
young_model = cb.directi.fit_DIRECTi(
    young, young.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
young.latent = young_model.inference(young)

In [None]:
ax = young.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Young/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = young.visualize_latent("cell_type0", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Young/cell_type0.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = young.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Young/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = young.visualize_latent("cell_type2", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Young/cell_type2.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = young.visualize_latent("ClusterID", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Young/ClusterID.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = young.visualize_latent("region", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Young/region.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = young.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Young/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
young.write_dataset("build/kidney/Young/Young.h5")

In [None]:
%%capture capio
young_models = [young_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    young_models.append(cb.directi.fit_DIRECTi(
        young, young.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
young_blast = cb.blast.BLAST(young_models, young)
young_blast.save("build/kidney/Young")

In [None]:
with open("build/kidney/Young/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/Young/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(young_blast, "build/kidney/Young")

In [None]:
%%writefile build/kidney/Young/predictable.txt
cell_ontology_class
cell_type0
cell_type1
cell_type2
ClusterID
region

## Park

In [None]:
park = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Park/data.h5")
utils.peek(park, "build/kidney/Park")
park.obs.head()

In [None]:
park.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
park_model = cb.directi.fit_DIRECTi(
    park, park.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
park.latent = park_model.inference(park)

In [None]:
ax = park.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Park/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = park.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Park/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = park.visualize_latent("full_name", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Park/full_name.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = park.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Park/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
park.write_dataset("build/kidney/Park/Park.h5")

In [None]:
%%capture capio
park_models = [park_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    park_models.append(cb.directi.fit_DIRECTi(
        park, park.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
park_blast = cb.blast.BLAST(park_models, park)
park_blast.save("build/kidney/Park")

In [None]:
with open("build/kidney/Park/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/Park/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(park_blast, "build/kidney/Park")

In [None]:
%%writefile build/kidney/Park/predictable.txt
cell_ontology_class
cell_type1
full_name

## Hochane

In [None]:
hochane = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Hochane/data.h5")
utils.peek(hochane, "build/kidney/Hochane")
hochane.obs.head()

In [None]:
hochane.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
hochane_model = cb.directi.fit_DIRECTi(
    hochane, hochane.uns["seurat_genes"], 
    **fixed_model_kwargs, **opt_model_kwargs
)
hochane.latent = hochane_model.inference(hochane)

In [None]:
ax = hochane.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Hochane/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = hochane.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Hochane/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = hochane.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Hochane/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
hochane.write_dataset("build/kidney/Hochane/Hochane.h5")

In [None]:
%%capture capio
hochane_models = [hochane_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    hochane_models.append(cb.directi.fit_DIRECTi(
        hochane, hochane.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
hochane_blast = cb.blast.BLAST(hochane_models, hochane)
hochane_blast.save("build/kidney/Hochane")

In [None]:
with open("build/kidney/Hochane/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/Hochane/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(hochane_blast, "build/kidney/Hochane")

In [None]:
%%writefile build/kidney/Hochane/predictable.txt
cell_ontology_class
cell_type1

## Stewart_Mature

In [None]:
stewart_mature = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Stewart_Mature/data.h5")
utils.peek(stewart_mature, "build/kidney/Stewart_Mature")
stewart_mature.obs.head()

In [None]:
stewart_mature.obs.dtypes

In [None]:
stewart_mature.uns["scanpy_genes"].shape

In [None]:
opt_model_kwargs = dict(
    prob_module_kwargs=dict(lambda_reg=0.01), 
    batch_effect="donor", 
    rmbatch_module="MNNAdversarial",
    rmbatch_module_kwargs=dict(lambda_reg=0.001)
)
stewart_mature_model = cb.directi.fit_DIRECTi(
    stewart_mature, stewart_mature.uns["scanpy_genes"], 
    **fixed_model_kwargs, **opt_model_kwargs
)
stewart_mature.latent = stewart_mature_model.inference(stewart_mature)

In [None]:
ax = stewart_mature.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Stewart_Mature/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = stewart_mature.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Stewart_Mature/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = stewart_mature.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Stewart_Mature/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
stewart_mature.write_dataset("build/kidney/Stewart_Mature/Stewart_Mature.h5")

In [None]:
%%capture capio
stewart_mature_models = [stewart_mature_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    stewart_mature_models.append(cb.directi.fit_DIRECTi(
        stewart_mature, stewart_mature.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
stewart_mature_blast = cb.blast.BLAST(stewart_mature_models, stewart_mature)
stewart_mature_blast.save("build/kidney/Stewart_Mature")

In [None]:
with open("build/kidney/Stewart_Mature/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/Stewart_Mature/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(stewart_mature_blast, "build/kidney/Stewart_Mature")

In [None]:
%%writefile build/kidney/Stewart_Mature/predictable.txt
cell_ontology_class
cell_type1
cell_type0

## Stewart_Fetal

In [None]:
stewart_fetal = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Stewart_Fetal/data.h5")
utils.peek(stewart_fetal, "build/kidney/Stewart_Fetal")
stewart_fetal.obs.head()

In [None]:
stewart_fetal.obs.dtypes

In [None]:
stewart_fetal.uns["scanpy_genes"].shape

In [None]:
opt_model_kwargs = dict(
    prob_module_kwargs=dict(lambda_reg=0.01), 
    batch_effect="donor", 
    rmbatch_module="MNNAdversarial",
    rmbatch_module_kwargs=dict(lambda_reg=0.001)
)
stewart_fetal_model = cb.directi.fit_DIRECTi(
    stewart_fetal, stewart_fetal.uns["scanpy_genes"], 
    **fixed_model_kwargs, **opt_model_kwargs
)
stewart_fetal.latent = stewart_fetal_model.inference(stewart_fetal)

In [None]:
ax = stewart_fetal.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Stewart_Fetal/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = stewart_fetal.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Stewart_Fetal/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = stewart_fetal.visualize_latent("sample", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Stewart_Fetal/sample.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = stewart_fetal.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Stewart_Fetal/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
stewart_fetal.write_dataset("build/kidney/Stewart_Fetal/Stewart_Fetal.h5")

In [None]:
%%capture capio
stewart_fetal_models = [stewart_fetal_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    stewart_fetal_models.append(cb.directi.fit_DIRECTi(
        stewart_fetal, stewart_fetal.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
stewart_fetal_blast = cb.blast.BLAST(stewart_fetal_models, stewart_fetal)
stewart_fetal_blast.save("build/kidney/Stewart_Fetal")

In [None]:
with open("build/kidney/Stewart_Fetal/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/Stewart_Fetal/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(stewart_fetal_blast, "build/kidney/Stewart_Fetal")

In [None]:
%%writefile build/kidney/Stewart_Fetal/predictable.txt
cell_ontology_class
cell_type1

## Human aligned

In [None]:
if not os.path.exists("build/kidney/ALIGNED_Homo_sapiens_Kidney"):
    os.makedirs("build/kidney/ALIGNED_Homo_sapiens_Kidney")
young.obs["young_donor"] = young.obs["donor"]
hochane.obs["hochane_donor"] = hochane.obs["donor"]
stewart_mature.obs["stewart_mature_donor"] = stewart_mature.obs["donor"]
stewart_fetal.obs["stewart_fetal_donor"] = stewart_fetal.obs["donor"]
# Change "scanpy_genes" to merge uns here
stewart_mature.uns["seurat_genes"]=stewart_mature.uns["scanpy_genes"]
stewart_fetal.uns["seurat_genes"]=stewart_fetal.uns["scanpy_genes"]
human = cb.data.ExprDataSet.merge_datasets(dict(
    wu_human=wu_human, young=young, hochane=hochane, stewart_mature=stewart_mature, stewart_fetal=stewart_fetal
), merge_uns_slots=["seurat_genes"])

In [None]:
human.obs.dtypes

In [None]:
opt_model_kwargs = dict(
    batch_effect=["dataset_name", "young_donor", "hochane_donor", "stewart_mature_donor", "stewart_fetal_donor"],
    rmbatch_module=["MNNAdversarial", "Adversarial", "Adversarial", "MNNAdversarial", "MNNAdversarial"],
    rmbatch_module_kwargs=dict(lambda_reg=0.001),
    prob_module_kwargs=dict(lambda_reg=0.01)
)
human_model = cb.directi.fit_DIRECTi(
    human, human.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
human.latent = human_model.inference(human)

In [None]:
ax = human.visualize_latent("cell_ontology_class", width=10, height=10, size=5, scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/ALIGNED_Homo_sapiens_Kidney/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = human.visualize_latent("dataset_name", width=10, height=10, size=5, scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/ALIGNED_Homo_sapiens_Kidney/study.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = human.visualize_latent("donor", width=10, height=10, size=5, scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/ALIGNED_Homo_sapiens_Kidney/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
human.write_dataset("build/kidney/ALIGNED_Homo_sapiens_Kidney/ALIGNED_Homo_sapiens_Kidney.h5")

In [None]:
%%capture capio
human_models = [human_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    human_models.append(cb.directi.fit_DIRECTi(
        human, human.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
human_blast = cb.blast.BLAST(human_models, human)
human_blast.save("build/kidney/ALIGNED_Homo_sapiens_Kidney")

In [None]:
with open("build/kidney/ALIGNED_Homo_sapiens_Kidney/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/ALIGNED_Homo_sapiens_Kidney/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(human_blast, "build/kidney/ALIGNED_Homo_sapiens_Kidney")

In [None]:
%%writefile build/kidney/ALIGNED_Homo_sapiens_Kidney/predictable.txt
cell_ontology_class

---
# Zebrafish
## Alemany_Kidney

In [None]:
alemany_kidney = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Alemany_Kidney/data.h5")
utils.peek(alemany_kidney, "build/kidney/Alemany_Kidney")
alemany_kidney.obs.head()

In [None]:
alemany_kidney.obs["ClusterID"] = "cluster_"+ alemany_kidney.obs["ClusterID"].astype(int).astype(str)
alemany_kidney.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
alemany_kidney_model = cb.directi.fit_DIRECTi(
    alemany_kidney, alemany_kidney.uns["seurat_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
alemany_kidney.latent = alemany_kidney_model.inference(alemany_kidney)

In [None]:
ax = alemany_kidney.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Alemany_Kidney/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = alemany_kidney.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Alemany_Kidney/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = alemany_kidney.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/kidney/Alemany_Kidney/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
alemany_kidney.write_dataset("build/kidney/Alemany_Kidney/Alemany_Kidney.h5")

In [None]:
%%capture capio
alemany_kidney_models = [alemany_kidney_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    alemany_kidney_models.append(cb.directi.fit_DIRECTi(
        alemany_kidney, alemany_kidney.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
alemany_kidney_blast = cb.blast.BLAST(alemany_kidney_models, alemany_kidney)
alemany_kidney_blast.save("build/kidney/Alemany_Kidney")

In [None]:
with open("build/kidney/Alemany_Kidney/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/kidney/Alemany_Kidney/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(alemany_kidney_blast, "build/kidney/Alemany_Kidney")

In [None]:
%%writefile build/kidney/Alemany_Kidney/predictable.txt
cell_ontology_class
cell_type1