# Prepare data for downstream analyses

Processes `AnnData` with initial CVA predictions, preparing it for downstream analysis and figure generation. Min-max scaling is applied for axis normalization.

**Pinned Environment:** [`envs/sc-spatial.yaml`](../../envs/sc-spatial.yaml)  

In [None]:
import os
from pathlib import Path
import scanpy as sc
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as clr
from scipy.spatial import KDTree
import matplotlib.cm as cm
from matplotlib.colors import Normalize
import sys

In [None]:
sys.path.append(str(Path.cwd().resolve().parents[1]))

from config.paths import BASE_DIR

h5ad_dir = BASE_DIR / "data/h5ad/export_10"
input_file = h5ad_dir / "resolvi-corrected.h5ad"
output_file = h5ad_dir / "resolvi-corrected-prepped.h5ad"

h5ad_dir.mkdir(parents=True, exist_ok=True)

# Note: ResolVI-adjusted data were tested but not used in the final manuscript.

In [None]:
adata = sc.read_h5ad(input_file)

### Custom plotting function

In [None]:
def plot_spatial_highlight_zoom(
    adata, basis, label_key, fov=None, size=50, palette="viridis", vmin=-0.5, vmax=0.5
):

    # Extract spatial coordinates and continuous values
    x_coords = adata.obsm[basis][:, 0]
    y_coords = adata.obsm[basis][:, 1]
    continuous_values = adata.obs[label_key].values

    # Clip the values to focus on the desired range (using dynamic vmin and vmax)
    continuous_values_clipped = np.clip(continuous_values, vmin, vmax)

    # Normalize values to fit colormap range
    norm = Normalize(vmin=vmin, vmax=vmax)

    # Get the colormap (use Zissou palette or specified palette)
    if palette == "zissou":
        cmap = zissou_colormap
    else:
        cmap = cm.get_cmap(palette)

    # Convert normalized values to RGBA colors
    colors = cmap(norm(continuous_values_clipped))

    # Create figure and axis
    fig, ax = plt.subplots(figsize=(6, 2))  # 10,10

    # Scatter plot with color mapping
    ax.scatter(x_coords, y_coords, c=colors, s=size, edgecolors="none", alpha=0.85)

    # Add colorbar
    sm = cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])
    cbar = plt.colorbar(sm, ax=ax, fraction=0.02, pad=0.04)
    cbar.set_label(label_key, fontsize=12)

    # Apply zooming if FOV is provided
    if fov:
        xmin, xmax, ymin, ymax = fov
        ax.set_xlim(xmin, xmax)
        ax.set_ylim(ymin, ymax)

    # Remove spines, ticks, labels
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)
    ax.spines["bottom"].set_visible(False)

    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.set_title("")

    plt.show()

In [None]:
zissou = [
    "#3A9AB2",
    "#6FB2C1",
    "#91BAB6",
    "#A5C2A3",
    "#BDC881",
    "#DCCB4E",
    "#E3B710",
    "#E79805",
    "#EC7A05",
    "#EF5703",
    "#F11B00",
]
zissou_colormap = clr.LinearSegmentedColormap.from_list("Zissou", zissou)


Prepare data for plotting to visually validate CV axis scores and effects of scaling:

In [None]:
cdata = adata[adata.obs["sample_id"] == "TIS09472_Control"].copy()

## Crypt-villus axis scaling

Before scaling:

In [None]:
plot_spatial_highlight_zoom(
    cdata,
    basis="spatial",
    label_key="crypt_villus_axis",
    fov=(2916, 4321, 5200, 5700),
    size=10,
    palette="zissou",
    vmin=0,
    vmax=1,
)

Perform scaling:

In [None]:
# This performs min-max scaling per entire dataset
adata.obs["crypt_villus_axis_scaled"] = (
    adata.obs["crypt_villus_axis"] - adata.obs["crypt_villus_axis"].min()
) / (adata.obs["crypt_villus_axis"].max() - adata.obs["crypt_villus_axis"].min())

After scaling:

In [None]:
cdata = adata[adata.obs["sample_id"] == "TIS09472_Control"].copy()

plot_spatial_highlight_zoom(
    cdata,
    basis="spatial",
    label_key="crypt_villus_axis_scaled",
    fov=(2916, 4321, 5000, 5700),
    size=10,
    palette="zissou",
    vmin=0,
    vmax=1,
)

In [None]:
iec = adata[adata.obs["Class"] == "Epithelial"].copy()

# Export data

In [None]:
adata.write_h5ad(output_file, compression="gzip")

In [None]:
iec.write_h5ad(
    os.path.join(output_dir, "iec-subset-resolvi-v2.h5ad"), compression="gzip"
)