# Figure 3 Analysis Notebook

This notebook reconstructs Figure 3 (3A, 3C, 3E) entirely from the raw Xenium outputs in `./data/`.

## 0. Setup & Imports

In [None]:
import os
from pathlib import Path
import math
import numpy as np
import pandas as pd
import scanpy as sc
import squidpy as sq
import matplotlib.pyplot as plt
import cv2

from skimage.transform import rescale
from skimage.morphology import remove_small_objects, remove_small_holes, opening, closing, disk
from scipy.ndimage import distance_transform_edt
from spatialdata import SpatialData
from spatialdata_io.readers.xenium import xenium, xenium_aligned_image

# Create results directory
output_folder = Path("./results/figure3")
output_folder.mkdir(parents=True, exist_ok=True)

## 1. Load SpatialData Without Zarr

In [None]:
def load_xenium_sample(sample_name: str, data_dir: Path) -> SpatialData:
    """Build a SpatialData from morphology + transcripts + boundaries parquet files."""
    ome     = data_dir / f"morphology_{sample_name}.ome.tif"
    expr    = data_dir / f"transcripts_{sample_name}.parquet"
    cb      = data_dir / f"cell_boundaries_{sample_name}.parquet"
    cells   = data_dir / f"cells_{sample_name}.parquet"
    nucleus = data_dir / f"nucleus_boundaries_{sample_name}.parquet"

    return xenium(
        image=ome,
        matrix=expr,
        cell_boundaries=cb,
        cells=cells,
        nucleus_boundaries=nucleus
    )

# Example load for PE-Aged
data_dir = Path("./data")
sdata = load_xenium_sample("PE-Aged", data_dir)
print("Tables:", list(sdata.tables.keys()))
# Add aligned H&E image
he_image = xenium_aligned_image(data_dir / "morphology_PE-Aged.ome.tif",
                                data_dir / "transcripts_PE-Aged.parquet")
sdata.images["he_image_aligned"] = he_image
print("Images:", list(sdata.images.keys()))

## 2. Figure 3A: H&E Transformation & Overlay

In [None]:
# 2.1 Preprocess H&E image
img = sdata.images["he_image_aligned"].values.transpose(1,2,0)
img = np.flipud(np.rot90(np.fliplr(img), k=1))
scale_factor = 1/2.5
img_rescaled = rescale(img, scale_factor, anti_aliasing=True, channel_axis=-1)

# 2.2 Segment pale-blue fibers
def segment_pale_blue(image, lower=(90,30,40), upper=(130,255,255), min_size=500):
    hsv = cv2.cvtColor((255*image).astype(np.uint8), cv2.COLOR_RGB2HSV)
    mask = cv2.inRange(hsv, np.array(lower), np.array(upper))
    mask = remove_small_holes(remove_small_objects(opening(mask, disk(3)).astype(bool), min_size), min_size)
    return mask

fibers_mask = segment_pale_blue(img_rescaled)

# 2.3 Plot overlay
fig, ax = plt.subplots(figsize=(8,8))
ax.imshow(img_rescaled, origin='lower')
ax.imshow(fibers_mask, cmap='jet', alpha=0.4, origin='lower')
ax.axis('off')
plt.title("Figure 3A: Segmented Fibers Overlay", fontsize=16)
fig.savefig(output_folder / "Figure3A_overlay.png", dpi=300, bbox_inches='tight')
plt.show()

## 3. Figure 3C: Cell Niche Distribution (100% Stacked Bar)

In [None]:
# Assume `adata` with obs['cell_niche'] exists after xenium->AnnData conversion
# Here we convert table to AnnData
adata = sc.AnnData(sdata.tables["cells"])
adata.obs["cell_niche"] = sdata.tables["cells"]["cell_niche"]

# Compute percent per niche
niches = ["Fibrotic Niche","Ultraproximal","Proximal","Intermediate","Remote"]
df = {}
for niche in niches + ["All"]:
    if niche == "All":
        series = adata.obs["cell_niche"]
    else:
        series = adata.obs.loc[adata.obs["cell_niche"]==niche, "cell_niche"]
    df[niche] = series.value_counts(normalize=True)

df_percent = pd.DataFrame(df).T.fillna(0)
ax = df_percent.plot(kind='bar', stacked=True, figsize=(10,6), colormap='tab10')
ax.set_ylabel("Proportion")
ax.legend(bbox_to_anchor=(1.02,1), loc='upper left')
plt.title("Figure 3C: Cell Niche Distribution", fontsize=16)
plt.tight_layout()
fig = ax.get_figure()
fig.savefig(output_folder / "Figure3C_stacked_bar.png", dpi=300)
plt.show()

## 4. Figure 3E: Subtype Dotplot

In [None]:
# Subdivision logic here (assumes adata.obs['cell_type2'] is populated)
marker_genes = {
    "Cardiomyocyte": ["Ryr2","Myl2","Nppb","Myh7"],
    "Fibroblast":    ["Fstl1","Col1a1","Postn"],
    "Endothelial":   ["Cd36","Pecam1","Vwf","Emcn"],
    "Epicardial":    ["Gpx3"],
    "Smooth muscle": ["Myh11","Acta2"],
    "Macrophage":    ["C1qa","Cd68"],
    "Lymphocyte":    ["Ptprc"],
    "Pericyte":      ["Rgs5"],
}

# Dotplot by cell_type2
sc.pl.dotplot(
    adata,
    var_names=marker_genes,
    groupby='cell_type2',
    standard_scale='var',
    figsize=(10,6),
    show=False
)
plt.title("Figure 3E: Subtype Dotplot", fontsize=16)
plt.savefig(output_folder / "Figure3E_dotplot.png", dpi=300, bbox_inches='tight')
plt.show()