In [38]:
import anndata
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
from mcDETECT.utils import *
from sklearn.neighbors import NearestNeighbors

import warnings
warnings.filterwarnings("ignore")
sc.settings.verbosity = 0

In [39]:
# Color
color_cts = clr.LinearSegmentedColormap.from_list("magma", ["#000003", "#3B0F6F", "#8C2980", "#F66E5B", "#FD9F6C", "#FBFCBF"], N=256)

In [40]:
# Specify data, setting, and paths
settings = {"Xenium_5K_BC": {"x_shift": 0, "y_shift": 7000},
            "Xenium_5K_OC": {"x_shift": 12000, "y_shift": 10000},
            "Xenium_5K_CC": {"x_shift": 26000, "y_shift": 8000},
            "Xenium_5K_LC": {"x_shift": 12000, "y_shift": 0},
            "Xenium_5K_Prostate": {"x_shift": 26000, "y_shift": 1000},
            "Xenium_5K_Skin": {"x_shift": 0, "y_shift": 1000}}

output_dir = f"../../output/merged_data/"

In [41]:
# Read data
adata = sc.read_h5ad(output_dir + "adata_all_raw.h5ad")

In [42]:
# Determine plot size
x_range = adata.obs["global_x"].max() - adata.obs["global_x"].min()
y_range = adata.obs["global_y"].max() - adata.obs["global_y"].min()
short_edge = min(x_range, y_range)

scale = 10 / short_edge
plot_figsize = (int(x_range * scale), int(y_range * scale))
print(f"Plot size: {plot_figsize}")

Plot size: (16, 10)


In [43]:
# Select tumor cells
adata_tumor = adata[adata.obs["cell_type_merged"] == "Malignant cell"].copy()
adata_tumor

AnnData object with n_obs × n_vars = 672964 × 5001
    obs: 'cell_id', 'global_x', 'global_y', 'transcript_counts', 'control_probe_counts', 'genomic_control_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'deprecated_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area', 'nucleus_count', 'segmentation_method', 'cell_type_merged', 'batch'
    var: 'gene_ids', 'feature_types', 'genome', 'gene'
    uns: 'batch_colors', 'cell_type_merged_colors'

In [44]:
# Normalize and log1p
sc.pp.normalize_total(adata_tumor, target_sum = 1e4)
sc.pp.log1p(adata_tumor)

### 1. Hypoxia and heat shock

In [45]:
def zscore_series(x):
    mu = np.nanmean(x)
    sd = np.nanstd(x)
    return (x - mu) / (sd + 1e-8)


def compute_expression_score(adata, key, target_genes, top_k = 2, binary_subtyping = True):
    
    # filter target genes
    target_genes = [i for i in target_genes if i in adata.var_names]
    
    # extract expression
    X = adata[:, target_genes].X
    if not isinstance(X, np.ndarray):
        X = X.toarray()
        
    # calculate the expression of the k-th top gene
    idx_sorted = np.argsort(-X, axis=1)
    kth_gene_index = idx_sorted[:, top_k - 1].reshape(-1, 1)
    scores = np.take_along_axis(X, kth_gene_index, axis=1).flatten()
    adata.obs[key] = scores
    
    # binary subtyping
    if binary_subtyping:
        thr = adata.obs[key].median()
        adata.obs[f"{key}_subtype"] = pd.Categorical(np.where(adata.obs[key] <= thr, "Low", "High"), categories = ["Low", "High"], ordered = True)
    
    return adata

In [46]:
gene_programs = {"hypoxia": ["HIF1A", "EPAS1", "NFE2L2", "CREB1", "RELA", "RELB", "NFKB1", "NFKB2"],
                 "heat_shock": ["HSP90AA1", "HSP90AB1", "HSPA1A", "HSPA1B", "HSPA6", "HSPA8", "HSPH1", "DNAJB1", "HSPB1", "HSPD1", "HSPE1"]}

for key, geneset in gene_programs.items():
    
    geneset = [gene for gene in geneset if gene in adata_tumor.var_names]
    
    for gene in geneset:
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x="global_x", y="global_y", color=gene, size=1, color_map=color_cts, show=False)
        cbar = ax.collections[0].colorbar
        pos = cbar.ax.get_position()
        cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        ax.set_title("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_{gene}.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()
    
    for i in range(len(geneset)):
    
        adata_tumor = compute_expression_score(adata_tumor, key = f"{key}_{i + 1}_genes", target_genes = geneset, top_k = i + 1)
        
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x="global_x", y="global_y", color=f"{key}_{i + 1}_genes", size=1, color_map=color_cts, show=False)
        cbar = ax.collections[0].colorbar
        pos = cbar.ax.get_position()
        cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        ax.set_title("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_{i + 1}_genes.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()

### 2. Mechanical score

In [47]:
def compute_mechanical_crowding(adata_tumor, key, k = 10, binary_subtyping = True):
    
    # coordinates
    XY_tum = np.c_[adata_tumor.obs["global_x"].values, adata_tumor.obs["global_y"].values]

    # kNN on all cells
    nbrs = NearestNeighbors(n_neighbors = k, algorithm = "kd_tree").fit(XY_tum)

    # distances from tumor cells to all cells
    dist, idx = nbrs.kneighbors(XY_tum, return_distance = True)

    # mean distance to k nearest neighbors
    mean_dist = dist.mean(axis = 1)
    mean_dist = np.maximum(mean_dist, 1e-6)

    # inverse distance
    crowding_raw = 1.0 / mean_dist
    adata_tumor.obs[f"{key}_raw"] = crowding_raw

    # z-score normalization
    adata_tumor.obs[key] = np.nan
    adata_tumor.obs.loc[:, key] = zscore_series(adata_tumor.obs.loc[:, f"{key}_raw"].values)
    del adata_tumor.obs[f"{key}_raw"]
    
    # symmetric clipping
    vals = adata_tumor.obs[key].to_numpy()
    vmin, vmax = np.nanmin(vals), np.nanmax(vals)
    assert vmin < 0 and vmax > 0, f"Expected vmin < 0 and vmax > 0, got vmin = {vmin}, vmax = {vmax}"
    clip_val = np.min(np.abs((vmin, vmax)))
    adata_tumor.obs[f"{key}_clipped"] = adata_tumor.obs[key].clip(-clip_val, clip_val)
    
    if binary_subtyping:
        thr = adata_tumor.obs[key].median()
        adata_tumor.obs[f"{key}_subtype"] = pd.Categorical(np.where(adata_tumor.obs[key] <= thr, "Low", "High"), categories = ["Low", "High"], ordered = True)
    
    return clip_val, adata_tumor

In [48]:
key = "mechanical"

# compute score
clip_val, adata_tumor = compute_mechanical_crowding(adata_tumor, key = key, k = 10, binary_subtyping = True)

# plot score
sc.set_figure_params(figsize = plot_figsize)
ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"{key}", color_map = color_cts, size = 1, title = " ", show = False)
cbar = ax.collections[0].colorbar
pos = cbar.ax.get_position()
cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
ax.grid(False)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel("")
ax.set_ylabel("")
for spine in ax.spines.values():
    spine.set_visible(False)
plt.savefig(output_dir + f"{key}.jpeg", dpi = 300, bbox_inches = "tight")
plt.close()

# plot subtypes
sc.set_figure_params(figsize = plot_figsize)
ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"{key}_subtype", size = 1, title = " ", show = False)
ax.grid(False)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel("")
ax.set_ylabel("")
for spine in ax.spines.values():
    spine.set_visible(False)
plt.savefig(output_dir + f"{key}_subtype.jpeg", dpi = 300, bbox_inches = "tight")
plt.close()

### 3. Immune attack

In [49]:
def compute_spatial_score(adata_all, adata_tumor, key, source_col = "cell_type_merged", source_values = ("T cell",), k = 20, radius = 50.0, sigma = None, binary_subtyping = True, neighbor_weight_col = None):

    # select source cells
    if isinstance(source_values, (list, tuple, set)):
        src_mask = adata_all.obs[source_col].isin(source_values)
    else:
        src_mask = adata_all.obs[source_col] == source_values
    src_idx = np.where(src_mask.values)[0]

    # coordinates
    # XY_src = np.c_[adata_all.obs.loc[adata_all.obs.index[src_idx], "global_x"].values, adata_all.obs.loc[adata_all.obs.index[src_idx], "global_y"].values]
    XY_src = np.c_[adata_all.obs["global_x"].values[src_idx],  adata_all.obs["global_y"].values[src_idx]]
    XY_tum = np.c_[adata_tumor.obs["global_x"].values, adata_tumor.obs["global_y"].values]
    
    # number of neighbors within radius
    tree = cKDTree(XY_src)
    neighbor_counts = tree.query_ball_point(XY_tum, r = radius, return_length = True)
    adata_tumor.obs[f"{key}_neighbor_counts"] = neighbor_counts
    adata_tumor.obs[f"log_{key}_neighbor_counts"] = np.log1p(neighbor_counts)

    if neighbor_weight_col is not None:
        
        # retrieve weights for source cells
        expr = adata_all[src_idx, neighbor_weight_col].X
        if not isinstance(expr, np.ndarray):
            expr = expr.toarray()
        W = expr.mean(axis = 1).ravel()
        W = np.where(np.isfinite(W), W, 0.0)

        # kNN search for distances and indices
        n_neighbors = min(k, XY_src.shape[0])
        nbrs = NearestNeighbors(n_neighbors = n_neighbors, algorithm = "kd_tree").fit(XY_src)
        dist, nn_ind = nbrs.kneighbors(XY_tum, return_distance = True)

        # apply radius mask
        within = dist <= radius

        # Gaussian kernel
        if sigma is None:
            sigma = radius / 2.0
        K = np.exp(-(dist ** 2) / (2.0 * (sigma ** 2)))
        K = K * within

        # apply weights
        W_neighbors = W[nn_ind]  # shape: (n_tumor, n_neighbors)
        contrib = K * W_neighbors

        # aggregate: mean over neighbors
        alpha = 0.5
        denom = np.maximum(within.sum(axis = 1, keepdims = True), 1) ** alpha
        raw_score = (contrib.sum(axis = 1, keepdims = True) / denom).ravel()
        adata_tumor.obs[f"{key}_weighted_raw"] = raw_score

        # z-score normalization
        adata_tumor.obs[f"{key}_weighted"] = np.nan
        adata_tumor.obs.loc[:, f"{key}_weighted"] = zscore_series(adata_tumor.obs.loc[:, f"{key}_weighted_raw"].values)
        del adata_tumor.obs[f"{key}_weighted_raw"]
    
    # binary subtyping
    if binary_subtyping:
        counts = np.array(neighbor_counts)
        adata_tumor.obs[f"{key}_subtype"] = pd.Categorical(np.where(counts == 0, "Away", "Close"), categories = ["Away", "Close"], ordered = True)
        if neighbor_weight_col is not None:
            thr = adata_tumor.obs[f"{key}_weighted"].median()
            adata_tumor.obs[f"{key}_weighted_subtype"] = pd.Categorical(np.where(adata_tumor.obs[f"{key}_weighted"] <= thr, "Low", "High"), categories = ["Low", "High"], ordered = True)

    return adata_tumor

In [50]:
# Define proximity stress programs
spatial_programs = {"immune_cell_proximity": ["CD4+ T cell", "CD8+ T cell", "T cell", "B cell", "Dendritic cell", "Myeloid cell", "Mast cell"],
                    "tcell_proximity": ["CD4+ T cell", "CD8+ T cell", "T cell"],
                    "tcell_attack": ["CD4+ T cell", "CD8+ T cell", "T cell"]}

for key, src_vals in spatial_programs.items():
    
    if key == "tcell_attack":
        
        # compute score
        adata_tumor = compute_spatial_score(adata, adata_tumor, key = key, source_col = "cell_type_merged", source_values = src_vals, k = 20, radius = 100, binary_subtyping = True, neighbor_weight_col = ["GZMB", "GZMK", "GZMA"])
        
        # plot score
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"{key}_weighted", color_map = color_cts, size = 1, title = " ", show = False)
        cbar = ax.collections[0].colorbar
        pos = cbar.ax.get_position()
        cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_weighted_score.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()
        
        # plot subtypes
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"{key}_weighted_subtype", size = 1, title = " ", show = False)
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_weighted_subtype.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()
        
    else:
        
        # compute score
        adata_tumor = compute_spatial_score(adata, adata_tumor, key = key, source_col = "cell_type_merged", source_values = src_vals, k = 20, radius = 50, binary_subtyping = True)
        
        # plot score (number of neighbors)
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"log_{key}_neighbor_counts", color_map = color_cts, size = 1, title = " ", show = False)
        cbar = ax.collections[0].colorbar
        pos = cbar.ax.get_position()
        cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_neighbor_counts.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()
        
        # plot subtypes
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"{key}_subtype", size = 1, title = " ", show = False)
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_subtype.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()

In [51]:
# Plot key genes
for gene in ["EPCAM", "KRT20"]:
    sc.set_figure_params(figsize = plot_figsize)
    ax = sc.pl.scatter(adata_tumor, x="global_x", y="global_y", color=gene, color_map=color_cts, size=1, show=False)
    cbar = ax.collections[0].colorbar
    pos = cbar.ax.get_position()
    cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.set_title("")
    for spine in ax.spines.values():
        spine.set_visible(False)
    plt.savefig(output_dir + f"{gene}.jpeg", dpi = 300, bbox_inches = "tight")
    plt.close()

In [52]:
# rename the two columns
adata_tumor.obs = adata_tumor.obs.rename(columns = {"hypoxia_2_genes": "hypoxia_score",
                                                    "hypoxia_2_genes_subtype": "hypoxia_subtype",
                                                    "heat_shock_2_genes": "heat_shock_score",
                                                    "heat_shock_2_genes_subtype": "heat_shock_subtype"})

# keep only the renamed columns
cols_to_drop = [c for c in adata_tumor.obs.columns if (c.startswith("hypoxia_") and c not in {"hypoxia_score", "hypoxia_subtype"}) or (c.startswith("heat_shock_") and c not in {"heat_shock_score", "heat_shock_subtype"})]
adata_tumor.obs = adata_tumor.obs.drop(columns = cols_to_drop)

In [53]:
adata_tumor.obs.columns

Index(['cell_id', 'global_x', 'global_y', 'transcript_counts',
       'control_probe_counts', 'genomic_control_counts',
       'control_codeword_counts', 'unassigned_codeword_counts',
       'deprecated_codeword_counts', 'total_counts', 'cell_area',
       'nucleus_area', 'nucleus_count', 'segmentation_method',
       'cell_type_merged', 'batch', 'hypoxia_score', 'hypoxia_subtype',
       'heat_shock_score', 'heat_shock_subtype', 'mechanical',
       'mechanical_clipped', 'mechanical_subtype',
       'immune_cell_proximity_neighbor_counts',
       'log_immune_cell_proximity_neighbor_counts',
       'immune_cell_proximity_subtype', 'tcell_proximity_neighbor_counts',
       'log_tcell_proximity_neighbor_counts', 'tcell_proximity_subtype',
       'tcell_attack_neighbor_counts', 'log_tcell_attack_neighbor_counts',
       'tcell_attack_weighted', 'tcell_attack_subtype',
       'tcell_attack_weighted_subtype'],
      dtype='object')

In [54]:
adata_tumor.write_h5ad(output_dir + "adata_atumor_scored.h5ad")