In [None]:
from IPython.display import Markdown as md
import pegasus as pg

In [None]:
md(f"# Batch Correction report for {snakemake.wildcards.soloFeatures}")

## 1. Read in the data

In [None]:
data = pg.read_input(snakemake.input[0])

## 2. Remove lowly expressed genes

In [None]:
pg.identify_robust_genes(data, percent_cells=0.05)

## 3. Normalize to counts per 1e5 and log transform

In [None]:
pg.log_norm(data)

## 4. Select highly variable genes

In [None]:
pg.highly_variable_features(data)
pg.hvfplot(data, dpi=100)

## 5. Run PCA

In [None]:
pg.pca(data)
pg.elbowplot(data, dpi=100)

## 6. Run Scanorama to intergrate the data

In [None]:
pg.run_scanorama(data)

## 7. Investigate Scanorama embedding for nuisance variables

In [None]:
# calculate gene module scores if specified
if "signatures" in snakemake.config["preprocess"].keys():
    for sig in snakemake.config["preprocess"]["signatures"]:
        pg.calc_signature_score(data, sig)

regress_vars = snakemake.config["preprocess"]["regress_out"]
for rep in ["pca", "scanorama"]:
    pg.regress_out(data, attrs=regress_vars, rep=rep)
    for r in [rep, f"{rep}_regressed"]:
        pg.neighbors(data, rep=r, n_jobs=snakemake.threads)
        pg.umap(data, rep=r, n_jobs=snakemake.threads, out_basis=f"umap_{r}")

## 8. Compare the integrated data to the original data

Each metrics is displayed on eight plots in the following format:

|      | uncorrected | regressed | batch correction | batch correction + regression |
|------|-------------|-----------|------------------|-------------------------------|
| PCA  | p 		     | p         | p                | p                             |   
| UMAP |  p          | p         | p                | p                             |

In [None]:
for i in ["Channel", *regress_vars]:
    pg.scatter(
        data,
        attrs=i,
        basis=[
            "pca",
            "pca_regressed",
            "scanorama",
            "scanorama_regressed",
            "umap_pca",
            "umap_pca_regressed",
            "umap_scanorama",
            "umap_scanorama_regressed",
        ],
        dpi=100,
        nrows=2,
        ncols=4,
    )

## 9. Save the integrated data

In [None]:
pg.write_output(data, snakemake.output["h5ad"])
pg.write_output(data, snakemake.output["zarr"])