# Compute ATAC QC metrics

In [None]:
# Load libraries
import snapatac2 as snap
import pandas as pd
from snakemake.script import snakemake

In [None]:
# Read input and output paths from Snakemake
fragment_file = snakemake.input["fragments"]
barcodes_file = snakemake.input["barcodes"]
output_file = snakemake.output[0]
n_jobs = snakemake.threads
blacklist_file = snakemake.params.get("blacklist", None)

In [None]:
# Extract list of barcodes
whitelist = pd.read_csv(barcodes_file, header=None)[0].tolist()
whitelist

In [None]:
# Start from fragments file
adata = snap.pp.import_fragments(
    fragment_file=fragment_file,
    chrom_sizes=snap.genome.hg38,
    file=output_file,
    sorted_by_barcode=False,
    whitelist=whitelist,
    n_jobs=n_jobs,
)
adata

In [None]:
# Plot fragment size distribution
snap.pl.frag_size_distr(adata, interactive=False)

In [None]:
# Compute TSSe scores of individual cells
snap.metrics.tsse(adata, snap.genome.hg38, n_jobs=n_jobs)
adata

In [None]:
# Plot TSSe scores against number of unique fragments for each cell
snap.pl.tsse(adata, interactive=False)

In [None]:
# Create a cell by bin matrix
snap.pp.add_tile_matrix(adata, n_jobs=n_jobs)
adata

In [None]:
# Select variable features (necessary for scrublet)
snap.pp.select_features(adata, blacklist=blacklist_file, n_jobs=n_jobs)
adata

In [None]:
# Doublet detection
snap.pp.scrublet(adata, n_jobs=n_jobs)
adata

In [None]:
# Close object (saves to disk)
adata.close()
adata