# Figure 5 Analysis Notebook (PE-Aged)

This notebook loads PE-Aged `h5ad`, computes pseudotime with Palantir, and generates Figures 5A–5F.

## 1. Setup & Imports

In [None]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import palantir
import matplotlib.pyplot as plt
from matplotlib import rcParams
from scipy.sparse import issparse
from scipy.ndimage import gaussian_filter1d
from matplotlib.colors import Normalize
from matplotlib.patches import Patch

# Directories
DATA_DIR = "./data"
RESULTS_DIR = "./results/figure5_PE_Aged"
os.makedirs(RESULTS_DIR, exist_ok=True)

# Increase global font sizes for plots
rcParams['font.size'] = 24
rcParams['axes.titlesize'] = 28
rcParams['axes.labelsize'] = 24
rcParams['xtick.labelsize'] = 22
rcParams['ytick.labelsize'] = 22
rcParams['legend.fontsize'] = 22
rcParams['legend.title_fontsize'] = 24

## 2. Load Data & Compute Pseudotime

In [None]:
# Load AnnData
adata = sc.read_h5ad(f"{DATA_DIR}/PE-Aged.h5ad")

# Select root cell from 'Remote' niche
remote_candidates = adata.obs_names[adata.obs["cell_niche"] == "Remote"]
if len(remote_candidates) == 0:
    raise ValueError("No 'Remote' cells found for root selection.")
root_candidate = remote_candidates[0]
root_idx = int(adata.obs_names.get_loc(root_candidate))
adata.uns["iroot"] = root_idx

# Run diffusion maps
dm_res = palantir.utils.run_diffusion_maps(
    pd.DataFrame(adata.obsm["X_pca"], index=adata.obs_names),
    knn=30, n_components=30
)

# Determine multiscale space
ms_data = palantir.utils.determine_multiscale_space(dm_res)

# Run Palantir pseudotime
pr_res = palantir.core.run_palantir(ms_data, root_candidate)

# Store pseudotime
adata.obs["pseudotime"] = pr_res.pseudotime
print("Pseudotime computed.")

## 3. Figure 5A: UMAP Colored by Pseudotime

In [None]:
# UMAP pseudotime plot
fig, ax = plt.subplots(figsize=(8,6))
sc.pl.umap(
    adata,
    color="pseudotime",
    ax=ax,
    size=25,
    alpha=0.9,
    show=False
)
cb = ax.collections[0].colorbar
cb.ax.tick_params(labelsize=24)
cb.ax.set_ylabel("Pseudotime", size=26)
ax.set_title("Figure 5A: Pseudotime Trajectory", fontsize=28)
ax.axis("off")
plt.tight_layout()
plt.savefig(f"{RESULTS_DIR}/Figure5A_umap_pseudotime.png", dpi=300, bbox_inches="tight")
plt.show()

## 4. Figure 5B: UMAP Pseudotime & Spatial Niche

In [None]:
# Right panel: UMAP colored by cell_niche
niche_palette = {
    "Fibrotic Niche": "#8A2BE2",
    "Ultraproximal":  "#D62728",
    "Proximal":       "#FFBF00",
    "Intermediate":   "#1F77B4",
    "Remote":         "#2ca02c",
    "Others":         "gray"
}

fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16,6))

# Left
sc.pl.umap(adata, color="pseudotime", ax=ax1, size=25, alpha=0.9, show=False)
cb = ax1.collections[0].colorbar
cb.ax.tick_params(labelsize=24)
cb.ax.set_ylabel("Pseudotime", size=26)
ax1.set_title("Pseudotime Trajectory", fontsize=28)
ax1.axis("off")

# Right
for niche, color in niche_palette.items():
    mask = adata.obs["cell_niche"] == niche
    coords = adata.obsm["X_umap"][mask]
    ax2.scatter(coords[:,0], coords[:,1],
                s=25, c=color, label=niche,
                alpha=0.9 if niche!="Others" else 0.5)
# Outline fibrotic
mask_fib = adata.obs["cell_niche"]=="Fibrotic Niche"
coords_fib = adata.obsm["X_umap"][mask_fib]
ax2.scatter(coords_fib[:,0], coords_fib[:,1],
            facecolor='none', edgecolor='#8A2BE2',
            s=35, linewidth=1, alpha=0.9, zorder=10)
ax2.set_title("Spatial Niche", fontsize=28)
ax2.axis("off")

# Legend
order = ["Fibrotic Niche","Ultraproximal","Proximal","Intermediate","Remote","Others"]
handles, labels = ax2.get_legend_handles_labels()
hmap = dict(zip(labels, handles))
sorted_handles = [hmap[k] for k in order if k in hmap]
fig.legend(sorted_handles, order,
           loc='center left', bbox_to_anchor=(0.85,0.5),
           title="Spatial Niche", fontsize=22, title_fontsize=24)
plt.tight_layout(rect=[0,0,0.8,1])
plt.savefig(f"{RESULTS_DIR}/Figure5B_combined_umap.png", dpi=300, bbox_inches="tight")
plt.show()

## 5. Figure 5C: Niche Composition Across Pseudotime Bins

In [None]:
# Bin pseudotime
num_bins = 50
bins = np.linspace(adata.obs["pseudotime"].min(),
                   adata.obs["pseudotime"].max(),
                   num_bins+1)
adata.obs["pseudotime_bin"] = pd.cut(adata.obs["pseudotime"],
                                     bins=bins,
                                     labels=range(num_bins),
                                     include_lowest=True)

# Compute proportions
count_table = (adata.obs
               .groupby(["pseudotime_bin","cell_niche"])
               .size()
               .unstack(fill_value=0))
prop_table = count_table.div(count_table.sum(axis=1), axis=0)

# Reorder niches
niche_order = ["Remote","Intermediate","Proximal","Ultraproximal","Fibrotic Niche"]
prop_table = prop_table.reindex(columns=niche_order).fillna(0)
colors = ["#2ca02c","#1f77b4","#ffbf00","#d62728","#8a2be2"]

# Plot
fig, ax = plt.subplots(figsize=(18,7))
prop_table.plot(kind="bar", stacked=True, color=colors, ax=ax, width=0.9)
ax.set_title("Figure 5C: Niche Composition Across Pseudotime Bins", fontsize=24, pad=20)
ax.set_xlabel("Pseudotime Bin", fontsize=20)
ax.set_ylabel("Proportion of Cells", fontsize=20)
ax.set_xticks(range(num_bins))
ax.set_xticklabels(range(num_bins), rotation=45, ha="right", fontsize=18)
ax.tick_params(axis="y", labelsize=18)
ax.legend(title="Cell Niche", bbox_to_anchor=(1.02,1), loc="upper left",
          fontsize=24, title_fontsize=26, frameon=False)
plt.tight_layout()
plt.savefig(f"{RESULTS_DIR}/Figure5C_niche_bar.png", dpi=300, bbox_inches="tight")
plt.show()

## 6. Figure 5D–5F: Gene Expression Dynamics

In [None]:
# Genes of interest
genes = ["Col1a1","Fn1","Postn"]
niche_list = ["Fibrotic Niche","Ultraproximal","Proximal","Intermediate","Remote"]
celltype_cats = sorted(adata.obs["cell_type2"].unique())

# Expression matrix
expr_mat = pd.DataFrame(adata.X.A if issparse(adata.X) else adata.X,
                        index=adata.obs_names, columns=adata.var_names)
spatial_coords = adata.obsm["spatial"].values

for gene in genes:
    # Pseudotime curves per niche
    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16,6), sharey=True)
    for niche in niche_list:
        mask = (adata.obs["cell_type2"]=="Fibroblast (activated)") & (adata.obs["cell_niche"]==niche)
        sub = expr_mat.loc[mask, gene]
        pts = adata.obs.loc[mask, "pseudotime"]
        if sub.size>0:
            bins = np.linspace(pts.min(), pts.max(), num_bins+1)
            idx = pd.cut(pts, bins=bins, labels=range(num_bins), include_lowest=True).astype(int)
            df_mean = sub.groupby(idx).mean().reindex(range(num_bins), fill_value=0)
            smoothed = gaussian_filter1d(df_mean.values, sigma=2)
            ax1.plot(range(num_bins), smoothed, label=niche)
    ax1.set_title(f"{gene}: Pseudotime Curve", fontsize=24)
    ax1.set_xlabel("Bin", fontsize=20)
    ax1.set_ylabel("Avg Expression", fontsize=20)
    ax1.legend(fontsize=14)
    ax1.tick_params(labelsize=16)

    # CellType2 % bar
    mask_expr = expr_mat[gene] > 0
    ct_counts = adata.obs.loc[mask_expr, "cell_type2"].value_counts(normalize=True).reindex(celltype_cats).fillna(0)
    left = 0
    for ct in celltype_cats:
        w = ct_counts[ct]
        ax2.barh(0, w, left=left, height=0.5, label=ct)
        left += w
    ax2.set_xlim(0,1)
    ax2.set_yticks([])
    ax2.set_title(f"{gene}: CellType2 %", fontsize=24)
    ax2.legend(bbox_to_anchor=(1.02,0.5), loc="center left", fontsize=16)
    ax2.set_xlabel("Proportion", fontsize=20)
    ax2.tick_params(labelsize=16)

    plt.tight_layout()
    plt.savefig(f"{RESULTS_DIR}/Figure5_{gene}_dynamics.png", dpi=300, bbox_inches="tight")
    plt.show()