# PBMC Scanpy Workflow
This notebook demonstrates how to run the `annotate_anndata` helper with batching, caching, and guardrails for a small PBMC dataset.

In [None]:
from pathlib import Path
import scanpy as sc
from gpt_cell_annotator import BatchOptions, DiskAnnotationCache, GuardrailConfig, annotate_anndata

adata = sc.read_h5ad("data/demo/pbmc_demo.h5ad")
sc.tl.rank_genes_groups(adata, groupby="leiden", n_genes=5, method="wilcoxon")

cache = DiskAnnotationCache(Path('~/.cache/gca/notebooks').expanduser())
result = annotate_anndata(
    adata,
    cluster_key="leiden",
    species="Homo sapiens",
    tissue="Peripheral blood",
    batch_options=BatchOptions(size=24, concurrency=2),
    guardrails=GuardrailConfig(min_marker_overlap=1),
    annotation_cache=cache,
)
result.report.summary

## Inspect annotations
The `ScanpyAnnotationResult` exposes the updated AnnData object and validation warnings.

In [None]:
result.adata.obs[["leiden", "gptca_label", "gptca_status"]].head()