# Refine and Visualize Annotations

Refine cell type annotations based on Leiden clustering, label transfer results, and validation with spatial localization within the tissue using customized visualization functions.

**Pinned Environment:** [`envs/sc-spatial.yaml`](../../envs/sc-spatial.yaml)  

In [None]:
import os
from pathlib import Path
import sys
import scanpy as sc
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns
import session_info

In [None]:
plt.rcParams["figure.figsize"] = (8, 8)
plt.rcParams["figure.dpi"] = 150

## Paths

In [None]:
sys.path.append(str(Path.cwd().resolve().parents[1]))

from config.paths import BASE_DIR

input_dir = BASE_DIR / "data/h5ad/export_04"
output_dir = BASE_DIR / "data/h5ad/export_07"
output_dir.mkdir(parents=True, exist_ok=True)

# Note: export_05 and export_06 are skipped — both were superseded by the finalized clustering & cell labeling workflow

## Import data

In [None]:
adata = sc.read_h5ad(
    os.path.join(input_dir, "adata-scanvi-labels.h5ad")
)

In [None]:
sample_to_group = {
    "TIS09473_Control": "Control",
    "TIS09471_Control": "Control",
    "TIS09472_Control": "Control",
    "TIS09475_Trpv1+": "Trpv1-cre",
    "TIS09474_Trpv1+": "Trpv1-cre",
    "TIS09476_Trpv1+": "Trpv1-cre",
}
adata.obs["group"] = adata.obs["sample_id"].map(sample_to_group)

adata.obs.group.value_counts()

## Visualize groupings

In [None]:
sc.pl.umap(
    adata, color="leiden", frameon=False, title="", legend_loc="on data"
)

In [None]:
sc.pl.umap(adata, color="scanvi_labels_xenium")

In [None]:
sc.pl.umap(adata, color="group", frameon=False, title="")

## Create `adata.obs['cell_type']` annotation

In [None]:
adata.obs["cell_type"] = adata.obs["cell_type"].astype("category")
adata.obs["scanvi_labels_xenium"] = adata.obs["scanvi_labels_xenium"].astype("category")
adata.obs["leiden_scVI_1.2"] = adata.obs["leiden_scVI_1.2"].astype("category")

## Replace entries in adata.obs

In [None]:
adata.obs["scanvi_labels_xenium"] = adata.obs["scanvi_labels_xenium"].replace(
    {"Tuft": "Tuft_cell"}
)
adata.obs["scanvi_labels_xenium"] = adata.obs["scanvi_labels_xenium"].replace(
    {"Goblet": "Mature_goblet"}
)
adata.obs["scanvi_labels_xenium"] = adata.obs["scanvi_labels_xenium"].replace(
    {"Early_Enterocyte": "Early_enterocyte"}
)
adata.obs["scanvi_labels_xenium"] = adata.obs["scanvi_labels_xenium"].replace(
    {"Neuron": "Neural_2"}
)
# rename neural_1 cluster at end of script with leiden cluster

Apply subcluster mappings for ISC/Paneth and SMC1/SMC2 clusters:

In [None]:
# Initialize with scanvi label annotations as our starting point
adata.obs["cell_type"] = adata.obs["scanvi_labels_xenium"].copy()

In [None]:
adata.obs["subcluster_mapping"] = adata.obs["subcluster_mapping"].astype(str)
adata.obs["cell_type"] = adata.obs["cell_type"].astype(str)

mask = adata.obs["subcluster_mapping"] != "Other"
adata.obs.loc[mask, "cell_type"] = adata.obs.loc[mask, "subcluster_mapping"]

adata.obs["cell_type"] = adata.obs["cell_type"].astype("category")

In [None]:
sc.pl.umap(adata, color="cell_type", frameon=False)

## Update annotations based on Leiden clustering results

This block of code updates cell type annotations in adata.obs["cell_type"] based on Leiden clustering results:

In [None]:
adata.obs["leiden"] = adata.obs["leiden"].astype(str)
adata.obs["cell_type"] = adata.obs["cell_type"].astype(str)


# Define Leiden clusters that need to be updated
leiden_to_update = {
    "0": "Enterocyte_1",
    "1": "Early_enterocyte",
    "2": "Transit_Amplifying",
    "3": "Enterocyte_2",
    "13": "Immature_goblet",
    "10": "Mature_goblet",
    "15": "Tuft_cell",
    "21": "Neural_1",
    "22": "Enteroendocrine",
}

adata.obs.loc[
    adata.obs["leiden"].isin(leiden_to_update.keys()), "cell_type"
] = adata.obs["leiden"].map(leiden_to_update)

print(adata.obs[["scanvi_labels_xenium", "leiden", "cell_type"]].head())

## Additional label refinement

Note: Cell type refinements were performed iteratively across analysis steps, based on marker expression, clustering, and spatial localization.

In [None]:
# Correct Pdgfrb+ Fibroblast
adata.obs["cell_type"] = adata.obs["cell_type"].astype(str)
condition = adata.obs["leiden"] == "17"
adata.obs.loc[condition, "cell_type"] = "Fibroblast_Pdgfrb+"

In [None]:
# Correct ILC labels
## Logic: If it's in the vascular endothelial cluster (11) and is not a Pdgfrb+ Fibroblast

condition = (adata.obs["leiden"] == "11") & (
    adata.obs["cell_type"] != "Fibroblast_Pdgfrb+"
)
adata.obs.loc[condition, "cell_type"] = "Vascular Endothelial"

In [None]:
# leiden number 12 is a Fibroblast population
condition = adata.obs["leiden"] == "12"
adata.obs["cell_type"] = (
    adata.obs["cell_type"].astype("category").cat.add_categories(["Fibroblast_2"])
)
adata.obs.loc[condition, "cell_type"] = "Fibroblast_2"

Update `adata.obs['Class']` assignments as well:

In [None]:
adata.obs["cell_type"] = adata.obs["cell_type"].replace({"Enterocyte_3": "Enterocyte_1"}) 
adata.obs["cell_type"] = adata.obs["cell_type"].replace({"Neural_2": "Fibroblast"})
adata.obs["cell_type"] = adata.obs["cell_type"].replace({"Neural_1": "Neural"})

In [None]:
adata.obs["cell_type"] = adata.obs["cell_type"].astype("category")
adata.obs["cell_type"] = adata.obs["cell_type"].cat.remove_unused_categories()

In [None]:
subtype_to_class = {
    # Epithelial
    "Enterocyte_1": "Epithelial",
    "Early_enterocyte": "Epithelial",
    "Transit_Amplifying": "Epithelial",
    "Enterocyte_2": "Epithelial",
    "Enterocyte_3": "Epithelial",
    "Mature_goblet": "Epithelial",
    "Immature_goblet": "Epithelial",
    "Paneth": "Epithelial",
    "Enteroendocrine": "Epithelial",
    "Tuft_cell": "Epithelial",
    "ISC": "Epithelial",
    # Stromal
    "Myofibroblast": "Stromal",
    "Fibroblast": "Stromal",
    "Fibroblast_2": "Stromal",
    "Resting Fibroblast": "Stromal",
    "Fibroblast_Pdgfrb+": "Stromal",
    "Fibroblast_Pdgfra+": "Stromal",
    "Fibroblast_Ncam1": "Stromal",
    "Complement_Fibroblast": "Stromal",
    "Vascular Endothelial": "Stromal",
    "Lymphatic": "Stromal",
    "SMC_1": "Stromal",
    "SMC_2": "Stromal",
    # Immune
    "ILC": "Immune",
    "Macrophage": "Immune",
    "Monocyte": "Immune",
    "B-Cell": "Immune",
    "Cd4_T-Cell": "Immune",
    "Cd8_T-Cell_aa+": "Immune",
    "Cd8_T-Cell_ab+": "Immune",
    "T-Cell": "Immune",
    "T-Cell gd": "Immune",
    "MAIT": "Immune",
    "NK-Cell": "Immune",
    "cDC1": "Immune",
    "DC2": "Immune",
    "Eosinophil": "Immune",
    # Neural
    "Neural_1": "Neural",
    "Neural_2": "Neural",
}

adata.obs["Class"] = adata.obs["cell_type"].map(subtype_to_class)

## Custom visualization functions

In [None]:
def plot_umap_highlight(adata, label_key, groups, size=2, unlabeled="lightgray"):
    """Plots a UMAP with selected groups highlighted while keeping all others gray."""

    selected_palette = palette[: len(groups)]

    color_dict = {group: color for group, color in zip(groups, selected_palette)}
    color_dict.update(
        {
            label: unlabeled
            for label in adata.obs[label_key].astype(str).unique()
            if label not in groups
        }
    )

    sc.pl.umap(
        adata, color=label_key, palette=color_dict, frameon=False, title="", size=size
    )
    plt.close()

In [None]:
def plot_spatial_highlight_zoom(
    adata, basis, label_key, groups, fov=None, size=50, unlabeled="lightgray"
):
    
    selected_palette = palette[: len(groups)]  # Use a globally defined palette

    color_dict = {group: color for group, color in zip(groups, selected_palette)}
    color_dict.update(
        {
            label: unlabeled
            for label in adata.obs[label_key].astype(str).unique()
            if label not in groups
        }
    )

    fig, ax = plt.subplots(figsize=(8, 8))
    sc.pl.embedding(
        adata,
        basis=basis,
        color=label_key,
        palette=color_dict,
        ax=ax,
        show=False,
        size=size,
        frameon=False,
        title="",
    )

    if fov:
        xmin, xmax, ymin, ymax = fov
        ax.set_xlim(xmin, xmax)
        ax.set_ylim(ymin, ymax)

    plt.show()

In [None]:
# high contrast palette
palette = [
    "red",
    "blue",
    "green",
    "orange",
    "cyan",
    "magenta",
    "blueviolet",
    "darkturquoise",
    "chartreuse",
    "crimson",
    "black",
    "sienna",
    "navy",
    "tomato",
]

## Visualize cluster-spatial embedding mappings

This section maps UMAPs in the full anndata (`adata`) and maps the cluster colors to a single control sample (`bdata`) for visual validation of spatially-resolved populations.

In [None]:
bdata = adata[adata.obs["sample_id"] == "TIS09472_Control"].copy()

In [None]:
epithelial = [
    "Enterocyte_1",
    "Enterocyte_2",
    "Early_enterocyte",
    "ISC",
    "Mature_goblet",
    "Immature_goblet",
    "Paneth",
    "Transit_Amplifying",
    "Tuft_cell",
    "Enteroendocrine",
]

plot_umap_highlight(adata, "cell_type", epithelial, size=1)

In [None]:
plot_spatial_highlight_zoom(
    bdata,
    basis="spatial",
    label_key="cell_type",
    groups=epithelial,
    fov=(5000, 6000, 4000, 5000),  #  xmin, xmax, ymin, ymax
    size=175,
)

In [None]:
# Create lists with groups of cells to create spatial embeddings for

neuromuscular = ["SMC_1", "SMC_2", "Neural_1"]
fibro = [
    "Fibroblast",
    "Fibroblast_2",
    "Fibroblast_Ncam1",
    "Fibroblast_Pdgfra+",
    "Fibroblast_Pdgfrb+",
    "Complement_Fibroblast",
    "Resting Fibroblast",
    "Myofibroblast",
]
vascular = ["Lymphatic", "Vascular Endothelial"]

In [None]:
plot_umap_highlight(adata, "cell_type", neuromuscular)

In [None]:
plot_spatial_highlight_zoom(
    bdata,
    basis="spatial",
    label_key="cell_type",
    groups=neuromuscular,
    fov=(5000, 6000, 4000, 5000),  #  xmin, xmax, ymin, ymax
    size=200,
)

## Export

Prepare data:

In [None]:
def assign_cell_type_colors(adata, key="cell_type"):
    """Assigns a HUSL color palette to a categorical obs field."""
    adata.obs[key] = adata.obs[key].astype("category")
    num_categories = len(adata.obs[key].cat.categories)

    palette = sns.color_palette("husl", n_colors=num_categories)
    adata.uns[f"{key}_colors"] = [mcolors.to_hex(c) for c in palette]

    print(f"Assigned {num_categories} HUSL colors for `{key}`.")

assign_cell_type_colors(adata, key="cell_type")

In [None]:
plt.rcParams["figure.figsize"] = (8, 8)
sc.pl.umap(adata, color="cell_type", frameon=False, size=0.75)

Export:

In [None]:
filename = os.path.join(output_dir, "adata-scanvi-labels-refined.h5ad")

In [None]:
adata.write_h5ad(filename, compression="gzip")