In [None]:
import pegasus as pg

In [None]:
# read in the data
data = pg.read_input(snakemake.input[0])

# remove lowly expressed genes
pg.identify_robust_genes(data, percent_cells=0.05)

# normalize
pg.log_norm(data)

# select HVGs
pg.highly_variable_features(data)
pg.hvfplot(data, dpi=200)

# calculate gene module scores
for sig in ["cell_cycle_human", "apoptosis_human","gender_human"]:
	pg.calc_signature_score(data, sig)

In [None]:
# PCA
sigs = ["percent_mito","n_counts", "G1/S", "G2/M"]

pg.pca(data)
fig = pg.scatter(data, attrs=['Channel',*sigs], basis='pca', dpi=200, return_fig=True)
fig.set(title = "PCA before regressing out nuisance signals")

pg.regress_out(data, keys=sigs)
pg.pca(data)
fig = pg.scatter(data, attrs=['Channel',*sigs], basis='pca', dpi=200, return_fig=True)
fig.set(title = "PCA before regressing out nuisance signals")

pg.elbowplot(data, dpi=200)

In [None]:
# Make KNN graph
pg.neighbors(data)

# Run UMAP
pg.umap(data, n_jobs = snakemake.threads)
fig = pg.scatter(data, attrs=['Channel'], basis='umap', return_fig=True, dpi=200)
fig.set(title = "UMAP before batch correction")

# Run scanorama to correct batch effects
pg.run_scanorama(data)

# Remake KNN graph
pg.neighbors(data)

# Rerun UMAP
pg.umap(data, n_jobs = snakemake.threads)
fig = pg.scatter(data, attrs=['Channel'], basis='umap', return_fig=True, dpi=200)
fig.set(title = "UMAP before after correction")

pg.write_output(data, snakemake.output['h5ad'])