# ATAC-seq analysis

In [None]:
import scanpy as sc
import snapatac2 as snap
import celltypist
from snakemake.script import snakemake

In [None]:
# Read input and output paths from Snakemake
input_file = snakemake.input[0]
output_file = snakemake.output[0]
celltypist_model = snakemake.params.celltypist_model

In [None]:
# Load the data
adata = snap.read(input_file, backed="r").copy(output_file)
adata

In [None]:
# Dimensionality reduction
snap.tl.spectral(adata)
adata

In [None]:
# UMAP
snap.tl.umap(adata)
snap.pl.umap(adata, interactive=False)

In [None]:
# Clustering
snap.pp.knn(adata)
snap.tl.leiden(adata)
snap.pl.umap(adata, color="leiden", interactive=False)
adata

In [None]:
# Calculate gene activity scores
gene_matrix = snap.pp.make_gene_matrix(adata, snap.genome.hg38)
gene_matrix

In [None]:
# Preprocess the gene matrix
sc.pp.normalize_total(gene_matrix, target_sum=1e4)
sc.pp.log1p(gene_matrix)
gene_matrix.raw = gene_matrix
gene_matrix

In [None]:
# Celltypist for cell type annotation
predictions = celltypist.annotate(
    gene_matrix,
    model="Human_Endometrium_Atlas.pkl",
    majority_voting=True,
    over_clustering="leiden",
)
celltypist.dotplot(
    predictions,
    use_as_reference="leiden",
    use_as_prediction="predicted_labels",
)
celltypist.dotplot(
    predictions,
    use_as_reference="leiden",
    use_as_prediction="majority_voting",
)


In [None]:
# Visualize cell types on UMAP
adata.obs["predicted_labels"] = predictions.predicted_labels["predicted_labels"]
adata.obs["majority_voting"] = predictions.predicted_labels["majority_voting"]
snap.pl.umap(adata, color="majority_voting", interactive=False)

In [None]:
# Close object (saves to disk)
adata.close()
adata