In [1]:
import conducto as co
import commands

In [2]:
RAW = "/conducto/data/user/examples/genomics"
DIR = "/conducto/data/pipeline"

GET_DATA_CMD = f"""
set -ex

DIR={RAW}
if [[ ! -d "$DIR/genes.tsv" ]]; then 
    cd /tmp
    wget http://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz -O data.tar.gz
    tar -xzvf data.tar.gz
    mkdir -p $DIR
    cp -prv filtered_gene_bc_matrices/hg19/* $DIR
else
    echo "Data already downloaded"
    find $DIR
fi
"""

img = co.Image(
    "python:3.8-slim",
    copy_dir=".",
    install_packages=["wget"],
    install_pip=["conducto", "numpy", "pandas", "scanpy", "ipdb", "leidenalg", "tabulate"]
)

In [3]:
%%html
<style>
.jp-OutputArea-output pre {
    white-space: pre;
}
</style>

In [8]:
root = co.Serial(title="PBMC3K pipeline", image=img, env={"PYTHONBREAKPOINT":"ipdb.set_trace"})
root

[1;34m/[0m

In [9]:
root["Download"] = co.Exec(GET_DATA_CMD)
root["Preprocess"] = co.Exec(commands.preprocess, RAW, f"{DIR}/processed.h5ad")
root["PCA"] = co.Exec(commands.pca, f"{DIR}/processed.h5ad", f"{DIR}/pca.h5ad")
root["Neighborhood"] = co.Exec(commands.neighborhood, f"{DIR}/pca.h5ad", f"{DIR}/neighborhood.h5ad")
root

[1;34m/[0m
├─0 [1;36mDownload[0m   set -ex\n\nDIR=/conducto/data/user/examples/genomics\nif [[ ! -d "$DIR/genes.tsv" ]]; then \n    cd /tmp\n    wget http://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz -O data.tar.gz\n    tar -xzvf data.tar.gz\n    mkdir -p $DIR\n    cp -prv filtered_gene_bc_matrices/hg19/* $DIR\nelse\n    echo "Data already downloaded"\n    find $DIR\nfi
├─1 [1;36mPreprocess[0m   conducto commands.py preprocess --in_path=/conducto/data/user/examples/genomics --out_path=/conducto/data/pipeline/processed.h5ad
├─2 [1;36mPCA[0m   conducto commands.py pca --in_path=/conducto/data/pipeline/processed.h5ad --out_path=/conducto/data/pipeline/pca.h5ad
└─3 [1;36mNeighborhood[0m   conducto commands.py neighborhood --in_path=/conducto/data/pipeline/pca.h5ad --out_path=/conducto/data/pipeline/neighborhood.h5ad

In [10]:
markers = co.Parallel()
for method in "t-test", "wilcoxon", "logreg":
    markers[method] = co.Exec(commands.marker, method, f"{DIR}/neighborhood.h5ad", f"{DIR}/result-{method}.h5ad")
root["Markers"] = markers
root

[1;34m/[0m
├─0 [1;36mDownload[0m   set -ex\n\nDIR=/conducto/data/user/examples/genomics\nif [[ ! -d "$DIR/genes.tsv" ]]; then \n    cd /tmp\n    wget http://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz -O data.tar.gz\n    tar -xzvf data.tar.gz\n    mkdir -p $DIR\n    cp -prv filtered_gene_bc_matrices/hg19/* $DIR\nelse\n    echo "Data already downloaded"\n    find $DIR\nfi
├─1 [1;36mPreprocess[0m   conducto commands.py preprocess --in_path=/conducto/data/user/examples/genomics --out_path=/conducto/data/pipeline/processed.h5ad
├─2 [1;36mPCA[0m   conducto commands.py pca --in_path=/conducto/data/pipeline/processed.h5ad --out_path=/conducto/data/pipeline/pca.h5ad
├─3 [1;36mNeighborhood[0m   conducto commands.py neighborhood --in_path=/conducto/data/pipeline/pca.h5ad --out_path=/conducto/data/pipeline/neighborhood.h5ad
└─4 [1;34mMarkers[0m
  ├─ [1;36mt-test[0m   conducto commands.py marker --method=t-test --in_path=/conducto/data/pip

In [7]:
#root.launch()