In [None]:
# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "anndata",
#     "dask-expr",
#     "datashader",
#     "decoupler",
#     "holoviews",
#     "hvplot",
#     "shapely",
#     "spatialpandas",
#     "scanpy",
#     "omnipath",
# ]
# ///

# Cell type annotation from marker genes

Reproducing the deoulpeR [vignette for annotation](https://decoupler-py.readthedocs.io/en/latest/notebooks/cell_annotation.html).

In [None]:
import scanpy as sc
import decoupler as dc
import numpy as np

In [None]:
import anndata as ad
adata = ad.read_h5ad('adata.h5ad')

In [None]:
adata

In [None]:
markers = dc.get_resource('PanglaoDB')
markers

In [None]:
# Filter by canonical_marker and human
markers = markers[markers['human'] & markers['canonical_marker'] & (markers['human_sensitivity'] > 0.5)]

# Remove duplicated entries
markers = markers[~markers.duplicated(['cell_type', 'genesymbol'])]
markers

In [None]:
dc.run_ora(
    mat=adata,
    net=markers,
    source='cell_type',
    target='genesymbol',
    min_n=3,
    verbose=True,
    use_raw=False
)

In [None]:
adata.obsm['ora_estimate']

In [None]:
acts = dc.get_acts(adata, obsm_key='ora_estimate')

# We need to remove inf and set them to the maximum value observed for pvals=0
acts_v = acts.X.ravel()
max_e = np.nanmax(acts_v[np.isfinite(acts_v)])
acts.X[~np.isfinite(acts.X)] = max_e

acts

In [None]:
sc.pl.umap(acts, color=['NK cells', 'leiden'], cmap='RdBu_r')
sc.pl.violin(acts, keys=['NK cells'], groupby='leiden')

In [None]:
df = dc.rank_sources_groups(acts, groupby='leiden', reference='rest', method='t-test_overestim_var')
df

In [None]:
n_ctypes = 3
ctypes_dict = df.groupby('group').head(n_ctypes).groupby('group')['names'].apply(lambda x: list(x)).to_dict()
ctypes_dict

In [None]:
sc.pl.matrixplot(acts, ctypes_dict, 'leiden', dendrogram=True, standard_scale='var',
                 colorbar_title='Z-scaled scores', cmap='RdBu_r')

In [None]:
sc.pl.violin(acts, keys=['Gamma delta T cells', 'B cells', 'Platelets', 'Enterocytes', 'NK cells'], groupby='leiden')

In [None]:
annotation_dict = df.groupby('group').head(1).set_index('group')['names'].to_dict()
annotation_dict

In [None]:
# Add cell type column based on annotation
adata.obs['cell_type'] = [annotation_dict[clust] for clust in adata.obs['leiden']]

# Visualize
sc.pl.umap(adata, color='cell_type')

In [None]:
adata.write(filename='adata-annotated.h5ad')