## Preparing data for monocle3

In [1]:
import os
import h5py
import scanpy as sc
from scipy import sparse

In [2]:
ROOT_DIR = os.path.abspath("")
RAW_PATH = os.path.join(ROOT_DIR, "data", "raw_data.h5")
DATA_PATH = os.path.join(ROOT_DIR, "data", "processed_data.h5")

In [3]:
adata = sc.datasets.pbmc68k_reduced()
adata.X = sparse.csr_matrix(adata.X)

#### Writing data

In [4]:
with h5py.File(DATA_PATH, "w") as f:
  # Write expression matrix
  exp_matrix = f.create_group("expression_matrix")
  exp_matrix.create_dataset("barcodes", data=adata.obs_names.to_numpy())
  exp_matrix.create_dataset("features", data=adata.var_names.to_numpy())
  exp_matrix.create_dataset("data", data=adata.X.data)
  exp_matrix.create_dataset("indices", data=adata.X.indices)
  exp_matrix.create_dataset("indptr", data=adata.X.indptr)

  # Write UMAP and Louvain clustering
  f.create_dataset("UMAP", data=adata.obsm["X_umap"])
  f.create_dataset("Louvain", data=adata.obs["louvain"].to_numpy(dtype=int))