# RNA-seq analysis

In [None]:
# Load libraries
import scanpy as sc
import celltypist
from snakemake.script import snakemake

In [None]:
# Read input and output paths from Snakemake
input_file = snakemake.input[0]
output_file = snakemake.output[0]
celltypist_model = snakemake.params.celltypist_model

In [None]:
# Load the data
adata = sc.read(input_file)
adata

In [None]:
# Log-normalize the data
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
adata.raw = adata
adata

In [None]:
# Feature selection
sc.pp.highly_variable_genes(adata)
sc.pl.highly_variable_genes(adata)

In [None]:
# Scale log-normalized counts to zero mean and unit variance
sc.pp.scale(adata, max_value=10)

In [None]:
# PCA
sc.tl.pca(adata, svd_solver="arpack")
sc.pl.pca_variance_ratio(adata, log=True)

In [None]:
# Neighbors
sc.pp.neighbors(adata)

In [None]:
# Nonlinear dimensionality reduction
sc.tl.umap(adata)
sc.pl.umap(adata, color=["pct_counts_mt", "doublet_score"])

In [None]:
# Clustering
sc.tl.leiden(adata)
sc.pl.umap(adata, color="leiden", legend_loc="on data")

In [None]:
# Celltypist for cell type annotation
predictions = celltypist.annotate(
    adata,
    model=celltypist_model,
    majority_voting=True,
    over_clustering="leiden",
)
celltypist.dotplot(
    predictions,
    use_as_reference="leiden",
    use_as_prediction="predicted_labels",
)
celltypist.dotplot(
    predictions,
    use_as_reference="leiden",
    use_as_prediction="majority_voting",
)

In [None]:
# Visualize cell types on UMAP
adata.obs["predicted_labels"] = predictions.predicted_labels["predicted_labels"]
adata.obs["majority_voting"] = predictions.predicted_labels["majority_voting"]
sc.pl.umap(adata, color="majority_voting", legend_loc="on data")

In [None]:
# Save the analyzed data
adata.write(output_file)