In [2]:
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import scanpy as sc
from mcDETECT.utils import *
from scipy.stats import spearmanr
from sklearn.neighbors import NearestNeighbors

import warnings
warnings.filterwarnings("ignore")
sc.settings.verbosity = 0

In [3]:
# Color
color_cts = clr.LinearSegmentedColormap.from_list("magma", ["#000003", "#3B0F6F", "#8C2980", "#F66E5B", "#FD9F6C", "#FBFCBF"], N=256)

In [4]:
# Specify data, setting, and paths
settings = {"Xenium_5K_BC": {"x_shift": 0, "y_shift": 7000},
            "Xenium_5K_OC": {"x_shift": 12000, "y_shift": 10000},
            "Xenium_5K_CC": {"x_shift": 26000, "y_shift": 8000},
            "Xenium_5K_LC": {"x_shift": 12000, "y_shift": 0},
            "Xenium_5K_Prostate": {"x_shift": 26000, "y_shift": 1000},
            "Xenium_5K_Skin": {"x_shift": 0, "y_shift": 1000}}

data_dir = "../../data/merged_data/"
output_dir = "../../output/merged_data/"

In [5]:
# Read data
adata = sc.read_h5ad(data_dir + "adata_all_raw.h5ad")

In [6]:
# Determine plot size
x_range = adata.obs["global_x"].max() - adata.obs["global_x"].min()
y_range = adata.obs["global_y"].max() - adata.obs["global_y"].min()
short_edge = min(x_range, y_range)

scale = 10 / short_edge
plot_figsize = (int(x_range * scale), int(y_range * scale))
print(f"Plot size: {plot_figsize}")

Plot size: (16, 10)


In [7]:
# Select tumor cells
adata_tumor = adata[adata.obs["cell_type_merged"] == "Malignant cell"].copy()
adata_tumor

AnnData object with n_obs × n_vars = 672964 × 5001
    obs: 'cell_id', 'global_x', 'global_y', 'transcript_counts', 'control_probe_counts', 'genomic_control_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'deprecated_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area', 'nucleus_count', 'segmentation_method', 'x_pixel', 'y_pixel', 'cell_type_merged', 'batch'
    var: 'gene_ids', 'feature_types', 'genome', 'gene'
    uns: 'batch_colors', 'cell_type_merged_colors'

In [8]:
# Normalize and log1p
sc.pp.normalize_total(adata_tumor, target_sum = 1e4)
sc.pp.log1p(adata_tumor)

### 1. Granule data and pathway enrichment scores

In [9]:
# Initialization
adata_tumor.obs["granule_count"] = 0
adata_tumor.obs["log_granule_count"] = 0.0
all_cols_nuc, all_cols_cyto, all_cols_sg = None, None, None
initialized = False

for data in settings.keys():
    
    print(f"========== Processing {data}... ==========")
    
    # batch mask
    batch_mask = adata_tumor.obs["batch"] == data
    
    # paths
    data_dir_tmp = f"../../data/{data}/"
    utils_dir = "../../data/_utils/"
    
    # read granule data
    adata_tmp = sc.read_h5ad(data_dir_tmp + "intermediate_data/adata.h5ad")
    adata_tumor_tmp = adata_tmp[adata_tmp.obs["cell_type_merged"] == "Malignant cell"].copy()
    granule_adata = sc.read_h5ad(data_dir_tmp + "processed_data/granule_adata_no_mcDETECT.h5ad")
    print(f"{granule_adata.n_obs} granules detected in {adata_tumor_tmp.n_obs} malignant cells.")
    
    # granule count per cell
    granule_counts = granule_adata.obs.groupby("cell_id").size()
    adata_tumor_tmp.obs["granule_count"] = adata_tumor_tmp.obs["cell_id"].map(granule_counts).fillna(0).astype(int)
    adata_tumor_tmp.obs["log_granule_count"] = np.log1p(adata_tumor_tmp.obs["granule_count"])
    
    # map granule count to merged adata
    adata_tumor.obs.loc[batch_mask, "granule_count"] = adata_tumor_tmp.obs["granule_count"].values
    adata_tumor.obs.loc[batch_mask, "log_granule_count"] = adata_tumor_tmp.obs["log_granule_count"].values
    
    # read ssGSEA scores
    scores_nuc = pd.read_parquet(data_dir_tmp + "processed_data/ssgsea_hallmark_nuclear.parquet")
    scores_nuc = scores_nuc.add_prefix(prefix = "nuclear_")
    
    scores_cyto = pd.read_parquet(data_dir_tmp + "processed_data/ssgsea_hallmark_cytoplasmic.parquet")
    scores_cyto = scores_cyto.add_prefix(prefix = "cytoplasmic_")
    
    scores_sg = pd.read_parquet(data_dir_tmp + "processed_data/ssgsea_hallmark_sg.parquet")
    scores_sg = scores_sg.add_prefix(prefix = "sg_")
    
    # initialize column names once
    if not initialized:
        all_cols_nuc = list(scores_nuc.columns)
        all_cols_cyto = list(scores_cyto.columns)
        all_cols_sg = list(scores_sg.columns)
        for col in (all_cols_nuc + all_cols_cyto + all_cols_sg):
            if col not in adata_tumor.obs.columns:
                adata_tumor.obs[col] = np.nan
        initialized = True
    
    if batch_mask.sum() != scores_nuc.shape[0]:
        raise ValueError(f"Batch '{data}': batch_mask has {batch_mask.sum()} cells, "
                         f"but tumor_tmp has {scores_nuc.shape[0]} malignant cells. "
                         "Your merged adata_tumor subset for this batch likely doesn't match adata_tumor_tmp ordering.")
    
    # assign into merged data
    adata_tumor.obs.loc[batch_mask, all_cols_nuc] = scores_nuc.to_numpy()
    adata_tumor.obs.loc[batch_mask, all_cols_cyto] = scores_cyto.to_numpy()
    adata_tumor.obs.loc[batch_mask, all_cols_sg] = scores_sg.to_numpy()

184531 granules detected in 102180 malignant cells.
421705 granules detected in 160250 malignant cells.
258680 granules detected in 221355 malignant cells.
317757 granules detected in 44624 malignant cells.
210224 granules detected in 95429 malignant cells.
246190 granules detected in 49126 malignant cells.


In [10]:
# Plot granule count per cell (log scale, clipped)
q_low, q_high = 0.001, 0.999
vals = adata_tumor.obs["log_granule_count"].values
vmin, vmax = np.quantile(vals, [q_low, q_high])
adata_tumor.obs["log_granule_count_clip"] = np.clip(vals, vmin, vmax)

sc.set_figure_params(figsize = plot_figsize)
ax = sc.pl.scatter(adata_tumor, x="global_x", y="global_y", color="log_granule_count_clip", size=1, color_map=color_cts, show=False)
cbar = ax.collections[0].colorbar
pos = cbar.ax.get_position()
cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
ax.grid(False)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("")
for spine in ax.spines.values():
    spine.set_visible(False)
plt.savefig(output_dir + "granule_count_per_cell.png", dpi=300, bbox_inches='tight')
plt.close()

### 2. Stress scores

In [12]:
# ==================== Functions for computing stress scores ==================== #

# z-score normalization
def zscore_series(x):
    mu = np.nanmean(x)
    sd = np.nanstd(x)
    return (x - mu) / (sd + 1e-8)

# expression-based scores
def compute_expression_score(adata, key, target_genes, top_k = 2, binary_subtyping = True):
    
    # filter target genes
    target_genes = [i for i in target_genes if i in adata.var_names]
    
    # extract expression
    X = adata[:, target_genes].X
    if not isinstance(X, np.ndarray):
        X = X.toarray()
        
    # calculate the expression of the k-th top gene
    idx_sorted = np.argsort(-X, axis=1)
    kth_gene_index = idx_sorted[:, top_k - 1].reshape(-1, 1)
    scores = np.take_along_axis(X, kth_gene_index, axis=1).flatten()
    adata.obs[key] = scores
    
    # binary subtyping
    if binary_subtyping:
        thr = adata.obs[key].median()
        adata.obs[f"{key}_subtype"] = pd.Categorical(np.where(adata.obs[key] <= thr, "Low", "High"), categories = ["Low", "High"], ordered = True)
    
    return adata

# mechanical crowding score
def compute_mechanical_crowding(adata_tumor, key, k = 10, binary_subtyping = True):
    
    # coordinates
    XY_tum = np.c_[adata_tumor.obs["global_x"].values, adata_tumor.obs["global_y"].values]

    # kNN on all cells
    nbrs = NearestNeighbors(n_neighbors = k, algorithm = "kd_tree").fit(XY_tum)

    # distances from tumor cells to all cells
    dist, idx = nbrs.kneighbors(XY_tum, return_distance = True)

    # mean distance to k nearest neighbors
    mean_dist = dist.mean(axis = 1)
    mean_dist = np.maximum(mean_dist, 1e-6)

    # inverse distance
    crowding_raw = 1.0 / mean_dist
    adata_tumor.obs[f"{key}_raw"] = crowding_raw

    # z-score normalization
    adata_tumor.obs[key] = np.nan
    adata_tumor.obs.loc[:, key] = zscore_series(adata_tumor.obs.loc[:, f"{key}_raw"].values)
    del adata_tumor.obs[f"{key}_raw"]
    
    # symmetric clipping
    vals = adata_tumor.obs[key].to_numpy()
    vmin, vmax = np.nanmin(vals), np.nanmax(vals)
    assert vmin < 0 and vmax > 0, f"Expected vmin < 0 and vmax > 0, got vmin = {vmin}, vmax = {vmax}"
    clip_val = np.min(np.abs((vmin, vmax)))
    adata_tumor.obs[f"{key}_clipped"] = adata_tumor.obs[key].clip(-clip_val, clip_val)
    
    if binary_subtyping:
        thr = adata_tumor.obs[key].median()
        adata_tumor.obs[f"{key}_subtype"] = pd.Categorical(np.where(adata_tumor.obs[key] <= thr, "Low", "High"), categories = ["Low", "High"], ordered = True)
    
    return clip_val, adata_tumor

# immune attack scores
def compute_spatial_score(adata_all, adata_tumor, key, source_col = "cell_type_merged", source_values = ("T cell",), k = 20, radius = 50.0, sigma = None, binary_subtyping = True, neighbor_weight_col = None):

    # select source cells
    if isinstance(source_values, (list, tuple, set)):
        src_mask = adata_all.obs[source_col].isin(source_values)
    else:
        src_mask = adata_all.obs[source_col] == source_values
    src_idx = np.where(src_mask.values)[0]

    # coordinates
    # XY_src = np.c_[adata_all.obs.loc[adata_all.obs.index[src_idx], "global_x"].values, adata_all.obs.loc[adata_all.obs.index[src_idx], "global_y"].values]
    XY_src = np.c_[adata_all.obs["global_x"].values[src_idx],  adata_all.obs["global_y"].values[src_idx]]
    XY_tum = np.c_[adata_tumor.obs["global_x"].values, adata_tumor.obs["global_y"].values]
    
    # number of neighbors within radius
    tree = cKDTree(XY_src)
    neighbor_counts = tree.query_ball_point(XY_tum, r = radius, return_length = True)
    adata_tumor.obs[f"{key}_neighbor_counts"] = neighbor_counts
    adata_tumor.obs[f"log_{key}_neighbor_counts"] = np.log1p(neighbor_counts)

    if neighbor_weight_col is not None:
        
        # retrieve weights for source cells
        expr = adata_all[src_idx, neighbor_weight_col].X
        if not isinstance(expr, np.ndarray):
            expr = expr.toarray()
        W = expr.mean(axis = 1).ravel()
        W = np.where(np.isfinite(W), W, 0.0)

        # kNN search for distances and indices
        n_neighbors = min(k, XY_src.shape[0])
        nbrs = NearestNeighbors(n_neighbors = n_neighbors, algorithm = "kd_tree").fit(XY_src)
        dist, nn_ind = nbrs.kneighbors(XY_tum, return_distance = True)

        # apply radius mask
        within = dist <= radius

        # Gaussian kernel
        if sigma is None:
            sigma = radius / 2.0
        K = np.exp(-(dist ** 2) / (2.0 * (sigma ** 2)))
        K = K * within

        # apply weights
        W_neighbors = W[nn_ind]  # shape: (n_tumor, n_neighbors)
        contrib = K * W_neighbors

        # aggregate: mean over neighbors
        alpha = 0.5
        denom = np.maximum(within.sum(axis = 1, keepdims = True), 1) ** alpha
        raw_score = (contrib.sum(axis = 1, keepdims = True) / denom).ravel()
        adata_tumor.obs[f"{key}_weighted_raw"] = raw_score

        # z-score normalization
        adata_tumor.obs[f"{key}_weighted"] = np.nan
        adata_tumor.obs.loc[:, f"{key}_weighted"] = zscore_series(adata_tumor.obs.loc[:, f"{key}_weighted_raw"].values)
        del adata_tumor.obs[f"{key}_weighted_raw"]
    
    # binary subtyping
    if binary_subtyping:
        counts = np.array(neighbor_counts)
        adata_tumor.obs[f"{key}_subtype"] = pd.Categorical(np.where(counts == 0, "Away", "Close"), categories = ["Away", "Close"], ordered = True)
        if neighbor_weight_col is not None:
            thr = adata_tumor.obs[f"{key}_weighted"].median()
            adata_tumor.obs[f"{key}_weighted_subtype"] = pd.Categorical(np.where(adata_tumor.obs[f"{key}_weighted"] <= thr, "Low", "High"), categories = ["Low", "High"], ordered = True)

    return adata_tumor

#### 2.1. Hypoxia and heat shock

In [13]:
gene_programs = {"hypoxia": ["HIF1A", "EPAS1", "NFE2L2", "CREB1", "RELA", "RELB", "NFKB1", "NFKB2"],
                 "heat_shock": ["HSP90AA1", "HSP90AB1", "HSPA1A", "HSPA1B", "HSPA6", "HSPA8", "HSPH1", "DNAJB1", "HSPB1", "HSPD1", "HSPE1"]}

for key, geneset in gene_programs.items():
    
    geneset = [gene for gene in geneset if gene in adata_tumor.var_names]
    
    for gene in geneset:
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x="global_x", y="global_y", color=gene, size=1, color_map=color_cts, show=False)
        cbar = ax.collections[0].colorbar
        pos = cbar.ax.get_position()
        cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        ax.set_title("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_{gene}.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()
    
    for i in range(len(geneset)):
    
        adata_tumor = compute_expression_score(adata_tumor, key = f"{key}_{i + 1}_genes", target_genes = geneset, top_k = i + 1)
        
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x="global_x", y="global_y", color=f"{key}_{i + 1}_genes", size=1, color_map=color_cts, show=False)
        cbar = ax.collections[0].colorbar
        pos = cbar.ax.get_position()
        cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        ax.set_title("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_{i + 1}_genes.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()

#### 2.2. Mechanical score

In [14]:
key = "mechanical"

# compute score
clip_val, adata_tumor = compute_mechanical_crowding(adata_tumor, key = key, k = 10, binary_subtyping = True)

# plot score
sc.set_figure_params(figsize = plot_figsize)
ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"{key}", color_map = color_cts, size = 1, title = " ", show = False)
cbar = ax.collections[0].colorbar
pos = cbar.ax.get_position()
cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
ax.grid(False)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel("")
ax.set_ylabel("")
for spine in ax.spines.values():
    spine.set_visible(False)
plt.savefig(output_dir + f"{key}.jpeg", dpi = 300, bbox_inches = "tight")
plt.close()

# plot subtypes
sc.set_figure_params(figsize = plot_figsize)
ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"{key}_subtype", size = 1, title = " ", show = False)
ax.grid(False)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel("")
ax.set_ylabel("")
for spine in ax.spines.values():
    spine.set_visible(False)
plt.savefig(output_dir + f"{key}_subtype.jpeg", dpi = 300, bbox_inches = "tight")
plt.close()

#### 2.3. Immune attack

In [15]:
# Define proximity stress programs
spatial_programs = {"immune_cell_proximity": ["CD4+ T cell", "CD8+ T cell", "T cell", "B cell", "Dendritic cell", "Myeloid cell", "Mast cell"],
                    "tcell_proximity": ["CD4+ T cell", "CD8+ T cell", "T cell"],
                    "tcell_attack": ["CD4+ T cell", "CD8+ T cell", "T cell"]}

for key, src_vals in spatial_programs.items():
    
    if key == "tcell_attack":
        
        # compute score
        adata_tumor = compute_spatial_score(adata, adata_tumor, key = key, source_col = "cell_type_merged", source_values = src_vals, k = 20, radius = 100, binary_subtyping = True, neighbor_weight_col = ["GZMB", "GZMK", "GZMA"])
        
        # plot score
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"{key}_weighted", color_map = color_cts, size = 1, title = " ", show = False)
        cbar = ax.collections[0].colorbar
        pos = cbar.ax.get_position()
        cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_weighted_score.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()
        
        # plot subtypes
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"{key}_weighted_subtype", size = 1, title = " ", show = False)
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_weighted_subtype.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()
        
    else:
        
        # compute score
        adata_tumor = compute_spatial_score(adata, adata_tumor, key = key, source_col = "cell_type_merged", source_values = src_vals, k = 20, radius = 50, binary_subtyping = True)
        
        # plot score (number of neighbors)
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"log_{key}_neighbor_counts", color_map = color_cts, size = 1, title = " ", show = False)
        cbar = ax.collections[0].colorbar
        pos = cbar.ax.get_position()
        cbar.ax.set_position([pos.x0, pos.y0, pos.width * 0.15, pos.height])
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_neighbor_counts.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()
        
        # plot subtypes
        sc.set_figure_params(figsize = plot_figsize)
        ax = sc.pl.scatter(adata_tumor, x = "global_x", y = "global_y", color = f"{key}_subtype", size = 1, title = " ", show = False)
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlabel("")
        ax.set_ylabel("")
        for spine in ax.spines.values():
            spine.set_visible(False)
        plt.savefig(output_dir + f"{key}_subtype.jpeg", dpi = 300, bbox_inches = "tight")
        plt.close()

In [16]:
# rename the two columns
adata_tumor.obs = adata_tumor.obs.rename(columns = {"hypoxia_2_genes": "hypoxia_score",
                                                    "hypoxia_2_genes_subtype": "hypoxia_subtype",
                                                    "heat_shock_2_genes": "heat_shock_score",
                                                    "heat_shock_2_genes_subtype": "heat_shock_subtype"})

# keep only the renamed columns
cols_to_drop = [c for c in adata_tumor.obs.columns if (c.startswith("hypoxia_") and c not in {"hypoxia_score", "hypoxia_subtype"}) or (c.startswith("heat_shock_") and c not in {"heat_shock_score", "heat_shock_subtype"})]
adata_tumor.obs = adata_tumor.obs.drop(columns = cols_to_drop)

### 3. Correlations

#### 3.1. Correlation among SG scores

In [17]:
vars_of_interest = ["log_granule_count", "hypoxia_score", "heat_shock_score", "immune_cell_proximity_neighbor_counts", "mechanical"]
df = adata_tumor.obs[vars_of_interest].replace([np.inf, -np.inf], np.nan).dropna()
corr_method = "spearman"
corr_mat = df.corr(method=corr_method)

# plot labels
plot_labels = {"log_granule_count": "SG per cell",
               "hypoxia_score": "Hypoxia",
               "heat_shock_score": "Heat shock",
               "mechanical": "Mechanical stress",
               "immune_cell_proximity_neighbor_counts": "Immune proximity"}
corr_mat_pretty = corr_mat.rename(index=plot_labels, columns=plot_labels)

# Correlation heatmap
plt.figure(figsize=(7, 6))
ax = sns.heatmap(corr_mat_pretty, cmap="vlag", vmin=-1, vmax=1, center=0, annot=True, fmt=".2f", annot_kws={"size": 12}, square=True, linewidths=0.5)
ax.grid(False)
plt.savefig(output_dir + "stress_score_correlation_heatmap.png", dpi=300, bbox_inches="tight")
plt.close()

#### 3.2. Correlation among ssGSEA scores

In [18]:
# 1. nuclear vs cytoplasmic
nuclear_cols = [c for c in adata_tumor.obs.columns if c.startswith("nuclear_")]
cyto_cols = [c for c in adata_tumor.obs.columns if c.startswith("cytoplasmic_")]

df = adata_tumor.obs[nuclear_cols + cyto_cols].replace([np.inf, -np.inf], np.nan).dropna()
corr_mat = pd.DataFrame(index=nuclear_cols, columns=cyto_cols, dtype=float)

for n in nuclear_cols:
    for c in cyto_cols:
        corr_mat.loc[n, c] = spearmanr(df[n], df[c]).correlation
corr_mat.index = [c.replace("nuclear_", "") for c in corr_mat.index]
corr_mat.columns = [c.replace("cytoplasmic_", "") for c in corr_mat.columns]

# corrlation heatmap
nuclear_labels = [" ".join(s.capitalize() for s in i.split("_")[2:]) for i in nuclear_cols]
cyto_labels = [" ".join(s.capitalize() for s in i.split("_")[2:]) for i in cyto_cols]

plt.figure(figsize=(12, 10))
ax = sns.heatmap(corr_mat, cmap="vlag", vmin=-1, vmax=1, center=0, annot=True, fmt=".2f", annot_kws={"size": 6}, linewidths=0.3)
ax.grid(False)
ax.set_xticklabels(cyto_labels, rotation=45, ha="right", fontsize=9)
ax.set_yticklabels(nuclear_labels, rotation=0, fontsize=9)
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("")
plt.savefig(output_dir + "ssGSEA_nuclear_cytoplasmic_correlation_heatmap.png", dpi=300, bbox_inches="tight")
plt.close()

In [19]:
# granule count vs ssGSEA scores
df_g = adata_tumor.obs[["log_granule_count"] + nuclear_cols + cyto_cols].replace([np.inf, -np.inf], np.nan).dropna()
records = []
for col in nuclear_cols:
    r = spearmanr(df_g["log_granule_count"], df_g[col]).correlation
    records.append(("Nuclear", col, r))
for col in cyto_cols:
    r = spearmanr(df_g["log_granule_count"], df_g[col]).correlation
    records.append(("Cytoplasmic", col, r))
corr_granule = pd.DataFrame(records, columns=["compartment", "raw_name", "spearman_r"])
corr_granule["pathway"] = corr_granule["raw_name"].apply(lambda x: " ".join(s.capitalize() for s in x.split("_")[2:]))
max_abs = corr_granule["spearman_r"].abs().max()

fig, axes = plt.subplots(1, 2, figsize=(14, 8), sharex=True, gridspec_kw={"wspace": 0.4})
sns.barplot(data=corr_granule.query("compartment == 'Nuclear'").sort_values("spearman_r"), x="spearman_r", y="pathway", ax=axes[0], color="#4C72B0")
axes[0].axvline(0, color="black", lw=0.8)
axes[0].set_title("Nuclear")
axes[0].set_xlabel("Spearman r")
axes[0].set_ylabel("")
axes[0].set_yticklabels(axes[0].get_yticklabels(), rotation=0, fontsize=8)
sns.barplot(data=corr_granule.query("compartment == 'Cytoplasmic'").sort_values("spearman_r"), x="spearman_r", y="pathway", ax=axes[1], color="#DD8452")
axes[1].axvline(0, color="black", lw=0.8)
axes[1].set_title("Cytoplasmic")
axes[1].set_xlabel("Spearman r")
axes[1].set_ylabel("")
axes[1].set_yticklabels(axes[1].get_yticklabels(), rotation=0, fontsize=8)
plt.savefig(output_dir + "granule_count_ssGSEA_correlation_nuclear_cytoplasmic.png", dpi=300, bbox_inches="tight")
plt.close()

In [20]:
# 2. nuclear vs SG (within all cells)
nuclear_cols = [c for c in adata_tumor.obs.columns if c.startswith("nuclear_")]
sg_cols = [c for c in adata_tumor.obs.columns if c.startswith("sg_")]

df = adata_tumor.obs[nuclear_cols + sg_cols].replace([np.inf, -np.inf], np.nan).dropna()
corr_mat = pd.DataFrame(index=nuclear_cols, columns=sg_cols, dtype=float)

for n in nuclear_cols:
    for c in sg_cols:
        corr_mat.loc[n, c] = spearmanr(df[n], df[c]).correlation
corr_mat.index = [c.replace("nuclear_", "") for c in corr_mat.index]
corr_mat.columns = [c.replace("sg_", "") for c in corr_mat.columns]

# corrlation heatmap
nuclear_labels = [" ".join(s.capitalize() for s in i.split("_")[2:]) for i in nuclear_cols]
sg_labels = [" ".join(s.capitalize() for s in i.split("_")[2:]) for i in sg_cols]

plt.figure(figsize=(12, 10))
ax = sns.heatmap(corr_mat, cmap="vlag", vmin=-1, vmax=1, center=0, annot=True, fmt=".2f", annot_kws={"size": 6}, linewidths=0.3)
ax.grid(False)
ax.set_xticklabels(sg_labels, rotation=45, ha="right", fontsize=9)
ax.set_yticklabels(nuclear_labels, rotation=0, fontsize=9)
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("")
plt.savefig(output_dir + "ssGSEA_nuclear_sg_correlation_heatmap.png", dpi=300, bbox_inches="tight")
plt.close()

In [21]:
# 3. nuclear vs SG (only within SG-positive cells)
nuclear_cols = [c for c in adata_tumor.obs.columns if c.startswith("nuclear_")]
sg_cols = [c for c in adata_tumor.obs.columns if c.startswith("sg_")]

df_all = adata_tumor.obs[nuclear_cols + sg_cols].replace([np.inf, -np.inf], np.nan).dropna()
sg_nonzero_mask = (df_all[sg_cols] != 0).any(axis=1)
df = df_all.loc[sg_nonzero_mask].dropna()
corr_mat = pd.DataFrame(index=nuclear_cols, columns=sg_cols, dtype=float)

for n in nuclear_cols:
    for c in sg_cols:
        corr_mat.loc[n, c] = spearmanr(df[n], df[c]).correlation
corr_mat.index = [c.replace("nuclear_", "") for c in corr_mat.index]
corr_mat.columns = [c.replace("sg_", "") for c in corr_mat.columns]

# corrlation heatmap
nuclear_labels = [" ".join(s.capitalize() for s in i.split("_")[2:]) for i in nuclear_cols]
sg_labels = [" ".join(s.capitalize() for s in i.split("_")[2:]) for i in sg_cols]

plt.figure(figsize=(12, 10))
ax = sns.heatmap(corr_mat, cmap="vlag", vmin=-1, vmax=1, center=0, annot=True, fmt=".2f", annot_kws={"size": 6}, linewidths=0.3)
ax.grid(False)
ax.set_xticklabels(sg_labels, rotation=45, ha="right", fontsize=9)
ax.set_yticklabels(nuclear_labels, rotation=0, fontsize=9)
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("")
plt.savefig(output_dir + "ssGSEA_nuclear_sg_correlation_heatmap_sg_positive_cells.png", dpi=300, bbox_inches="tight")
plt.close()

In [22]:
# Save scored tumor adata
adata_tumor.write_h5ad(data_dir + "adata_tumor_scored.h5ad")