In [None]:
import os
import Cell_BLAST as cb
import utils
os.environ["CUDA_VISIBLE_DEVICES"] = utils.pick_gpu_lowest_memory()
cb.config.RANDOM_SEED = 0
cb.config.N_JOBS = 4
fixed_model_kwargs = dict(
    latent_dim=10, cat_dim=20,
    epoch=500, patience=20
)

In [None]:
cb.__version__

---

# Mouse

## Wang_Lung

In [None]:
wang_lung = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Wang_Lung/data.h5")
utils.peek(wang_lung, "build/lung/Wang_Lung")
wang_lung.obs.head()

In [None]:
wang_lung.obs.dtypes

In [None]:
wang_lung_model = cb.directi.fit_DIRECTi(
    wang_lung, wang_lung.uns["seurat_genes"],
    **fixed_model_kwargs
)
wang_lung.latent = wang_lung_model.inference(wang_lung)

In [None]:
ax = wang_lung.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Wang_Lung/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = wang_lung.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Wang_Lung/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = wang_lung.visualize_latent("lifestage", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Wang_Lung/lifestage.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
wang_lung.write_dataset("build/lung/Wang_Lung/Wang_Lung.h5")

In [None]:
%%capture capio
wang_lung_models = [wang_lung_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    wang_lung_models.append(cb.directi.fit_DIRECTi(
        wang_lung, wang_lung.uns["seurat_genes"],
        **fixed_model_kwargs,
        random_seed=i
    ))
wang_lung_blast = cb.blast.BLAST(wang_lung_models, wang_lung)
wang_lung_blast.save("build/lung/Wang_Lung")

In [None]:
with open("build/lung/Wang_Lung/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/lung/Wang_Lung/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(wang_lung_blast, "build/lung/Wang_Lung")

In [None]:
%%writefile build/lung/Wang_Lung/predictable.txt
cell_ontology_class
cell_type1
lifestage

## Quake_10x_Lung

In [None]:
quake_10x_lung = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Quake_10x_Lung/data.h5")
utils.peek(quake_10x_lung, "build/lung/Quake_10x_Lung")
quake_10x_lung.obs.head()

In [None]:
quake_10x_lung.obs["cluster"] = "cluster_" + quake_10x_lung.obs["cluster"].astype(int).astype(str)
quake_10x_lung.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
quake_10x_lung_model = cb.directi.fit_DIRECTi(
    quake_10x_lung, quake_10x_lung.uns["seurat_genes"],
    **fixed_model_kwargs,  **opt_model_kwargs
)
quake_10x_lung.latent = quake_10x_lung_model.inference(quake_10x_lung)

In [None]:
ax = quake_10x_lung.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_10x_Lung/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_10x_lung.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_10x_Lung/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_10x_lung.visualize_latent("cluster", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_10x_Lung/cluster.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_10x_lung.visualize_latent("free_annotation", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_10x_Lung/free_annotation.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_10x_lung.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_10x_Lung/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_10x_lung.visualize_latent("gender", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_10x_Lung/gender.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
quake_10x_lung.write_dataset("build/lung/Quake_10x_Lung/Quake_10x_Lung.h5")

In [None]:
%%capture capio
quake_10x_lung_models = [quake_10x_lung_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    quake_10x_lung_models.append(cb.directi.fit_DIRECTi(
        quake_10x_lung, quake_10x_lung.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
quake_10x_lung_blast = cb.blast.BLAST(quake_10x_lung_models, quake_10x_lung)
quake_10x_lung_blast.save("build/lung/Quake_10x_Lung")

In [None]:
with open("build/lung/Quake_10x_Lung/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/lung/Quake_10x_Lung/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(quake_10x_lung_blast, "build/lung/Quake_10x_Lung")

In [None]:
%%writefile build/lung/Quake_10x_Lung/predictable.txt
cell_ontology_class
cell_type1
cluster
free_annotation

## Quake_Smart-seq2_Lung

In [None]:
quake_smart_seq2_lung = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Quake_Smart-seq2_Lung/data.h5")
utils.peek(quake_smart_seq2_lung, "build/lung/Quake_Smart-seq2_Lung")
quake_smart_seq2_lung.obs.head()

In [None]:
quake_smart_seq2_lung.obs["cluster"] = "cluster_" + quake_smart_seq2_lung.obs["cluster"].astype(int).astype(str)
quake_smart_seq2_lung.obs.dtypes

In [None]:
opt_model_kwargs = dict(batch_effect="donor")
quake_smart_seq2_lung_model = cb.directi.fit_DIRECTi(
    quake_smart_seq2_lung, quake_smart_seq2_lung.uns["seurat_genes"],
    **fixed_model_kwargs,  **opt_model_kwargs
)
quake_smart_seq2_lung.latent = quake_smart_seq2_lung_model.inference(quake_smart_seq2_lung)

In [None]:
ax = quake_smart_seq2_lung.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_Smart-seq2_Lung/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_smart_seq2_lung.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_Smart-seq2_Lung/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_smart_seq2_lung.visualize_latent("cluster", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_Smart-seq2_Lung/cluster.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_smart_seq2_lung.visualize_latent("free_annotation", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_Smart-seq2_Lung/free_annotation.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_smart_seq2_lung.visualize_latent("region", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_Smart-seq2_Lung/region.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_smart_seq2_lung.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_Smart-seq2_Lung/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = quake_smart_seq2_lung.visualize_latent("gender", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Quake_Smart-seq2_Lung/gender.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
quake_smart_seq2_lung.write_dataset("build/lung/Quake_Smart-seq2_Lung/Quake_Smart-seq2_Lung.h5")

In [None]:
%%capture capio
quake_smart_seq2_lung_models = [quake_smart_seq2_lung_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    quake_smart_seq2_lung_models.append(cb.directi.fit_DIRECTi(
        quake_smart_seq2_lung, quake_smart_seq2_lung.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
quake_smart_seq2_lung_blast = cb.blast.BLAST(quake_smart_seq2_lung_models, quake_smart_seq2_lung)
quake_smart_seq2_lung_blast.save("build/lung/Quake_Smart-seq2_Lung")

In [None]:
with open("build/lung/Quake_Smart-seq2_Lung/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/lung/Quake_Smart-seq2_Lung/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(quake_smart_seq2_lung_blast, "build/lung/Quake_Smart-seq2_Lung")

In [None]:
%%writefile build/lung/Quake_Smart-seq2_Lung/predictable.txt
cell_ontology_class
cell_type1
cluster
free_annotation
region

## Mouse aligned

In [None]:
if not os.path.exists("build/lung/ALIGNED_Mus_musculus_Lung"):
    os.makedirs("build/lung/ALIGNED_Mus_musculus_Lung")
lee = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Lee/data.h5")
quake_10x_lung.obs["quake_10x_lung_donor"] = quake_10x_lung.obs["donor"]
quake_smart_seq2_lung.obs["quake_smart_seq2_lung_donor"] = quake_smart_seq2_lung.obs["donor"]
mouse = cb.data.ExprDataSet.merge_datasets(dict(
    wang_lung=wang_lung,
    lee=lee,
    quake_10x_lung=quake_10x_lung,
    quake_smart_seq2_lung=quake_smart_seq2_lung
), merge_uns_slots=["seurat_genes"])

In [None]:
mouse.obs.dtypes

In [None]:
opt_model_kwargs = dict(
    batch_effect=["dataset_name", "quake_10x_lung_donor", "quake_smart_seq2_lung_donor"],
    rmbatch_module_kwargs=[dict(lambda_reg=0.003), dict(lambda_reg=0.005), dict(lambda_reg=0.005)]
)
mouse_model = cb.directi.fit_DIRECTi(
    mouse, mouse.uns["seurat_genes"],
    **fixed_model_kwargs,  **opt_model_kwargs
)
mouse.latent = mouse_model.inference(mouse)

In [None]:
ax = mouse.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/ALIGNED_Mus_musculus_Lung/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = mouse.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/ALIGNED_Mus_musculus_Lung/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = mouse.visualize_latent("dataset_name", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/ALIGNED_Mus_musculus_Lung/dataset_name.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = mouse.visualize_latent("gender", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/ALIGNED_Mus_musculus_Lung/gender.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
mouse.write_dataset("build/lung/ALIGNED_Mus_musculus_Lung/ALIGNED_Mus_musculus_Lung.h5")

In [None]:
%%capture capio
mouse_models = [mouse_model]
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    mouse_models.append(cb.directi.fit_DIRECTi(
        mouse, mouse.uns["seurat_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
mouse_blast = cb.blast.BLAST(mouse_models, mouse)
mouse_blast.save("build/lung/ALIGNED_Mus_musculus_Lung")

In [None]:
with open("build/lung/ALIGNED_Mus_musculus_Lung/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/lung/ALIGNED_Mus_musculus_Lung/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(mouse_blast, "build/lung/ALIGNED_Mus_musculus_Lung")

In [None]:
%%writefile build/lung/ALIGNED_Mus_musculus_Lung/predictable.txt
cell_ontology_class

---

# Human

## Madissoon_Lung

In [None]:
madissoon_lung = cb.data.ExprDataSet.read_dataset("../../Datasets/data/Madissoon_Lung/data.h5")
utils.peek(madissoon_lung, "build/lung/Madissoon_Lung")
madissoon_lung.obs.head()

In [None]:
madissoon_lung.obs.dtypes

In [None]:
madissoon_lung.uns["scanpy_genes"].shape

In [None]:
opt_model_kwargs = dict(prob_module_kwargs=dict(lambda_reg=0.01), batch_effect="donor")
madissoon_lung_model = cb.directi.fit_DIRECTi(
    madissoon_lung, madissoon_lung.uns["scanpy_genes"],
    **fixed_model_kwargs, **opt_model_kwargs
)
madissoon_lung.latent = madissoon_lung_model.inference(madissoon_lung)

In [None]:
ax = madissoon_lung.visualize_latent("cell_ontology_class", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Madissoon_Lung/cell_ontology_class.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = madissoon_lung.visualize_latent("cell_type1", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Madissoon_Lung/cell_type1.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = madissoon_lung.visualize_latent("donor", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Madissoon_Lung/donor.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
ax = madissoon_lung.visualize_latent("sample", scatter_kws=dict(rasterized=True))
ax.get_figure().savefig("build/lung/Madissoon_Lung/sample.svg", dpi=utils.DPI, bbox_inches="tight")

In [None]:
madissoon_lung.write_dataset("build/lung/Madissoon_Lung/Madissoon_Lung.h5")

In [None]:
%%capture capio
madissoon_lung_models = [madissoon_lung_model]
opt_model_kwargs = dict(prob_module_kwargs=dict(lambda_reg=0.01))
for i in range(1, cb.config.N_JOBS):
    print("==== Model %d ====" % i)
    madissoon_lung_models.append(cb.directi.fit_DIRECTi(
        madissoon_lung, madissoon_lung.uns["scanpy_genes"],
        **fixed_model_kwargs, **opt_model_kwargs,
        random_seed=i
    ))
madissoon_lung_blast = cb.blast.BLAST(
    madissoon_lung_models, madissoon_lung, 
)
madissoon_lung_blast.save("build/lung/Madissoon_Lung")

In [None]:
with open("build/lung/Madissoon_Lung/stdout.txt", "w") as f:
    f.write(capio.stdout)
with open("build/lung/Madissoon_Lung/stderr.txt", "w") as f:
    f.write(capio.stderr)

In [None]:
utils.self_projection(madissoon_lung_blast, "build/lung/Madissoon_Lung")

In [None]:
%%writefile "build/lung/Madissoon_Lung/predictable.txt"
cell_ontology_class
cell_type1