In [None]:
import numpy as np
import pandas as pd
import scanpy as sc

#### Convert to snRNAseq samples
##### Load the raw dataset distributed with scanpy, write to an h5ad file

In [None]:
fpath = "./data/" # file path
file_accession = "sample" # file name
raw_data = sc.read(fpath + file_accession)
sc.write("./" + file_accession + "_pre.h5ad", raw_data) 

#### Prepare workflow
##### same as the $\textrm{cellxgene prepare}$ command

In [None]:
## Step 1: Calculate QC metrics and store in the anndata object
sc.pp.calculate_qc_metrics(raw_data, inplace=True)

In [None]:
## Step 2: Normalize with a very vanilla recipe
normalized_data = sc.pp.recipe_seurat(raw_data, copy=True)

In [None]:
## Step 3: Do some basic preprocessing to run PCA and compute the neighbor graph  
sc.pp.pca(normalized_data)
sc.pp.neighbors(normalized_data)

In [None]:
## Step 4: Infer clusters with the Louvain algorithm  
sc.tl.louvain(normalized_data)

In [None]:
## Step 5: Compute tsne and umap embeddings  
sc.tl.umap(normalized_data)

In [None]:
## Write to output file  
sc.write("./" + file_accession + ".h5ad",  normalized_data)