#### This particular notebook includes a FFPE block age QC comparison within Dataset 1 Xenium (Rep1 only).

#### Required input files:

* Filtered transcript file (for each dataset/rep/slide)
* Annotated cell-based data object (for each dataset)

Note: r denotes filtered transcript file (filtering removal)

Environment: Please create and activate the conda environment provided in default_env.yaml before running this notebook

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import squidpy as sq

import gzip
import anndata

import os

import matplotlib.colors as mcolors

#### Load in data

In [None]:
Xenium_transcripts_r = pd.read_csv('/path/Xenium_transcripts_r.csv')

XeniumData = sc.read_h5ad('/path/25_11_22_Xenium_Dataset1_290_IntReps1and2_Annotated.h5ad')

In [None]:
# 1. Subset to only keep rows where batch == 'Sept' (rep1)
XeniumData = XeniumData[XeniumData.obs['batch'] == 'Sept'].copy()

# 2. Reset index to remove AnnData index
XeniumData.obs.reset_index(inplace=True)

# 3. Rename the new column to cell_id
XeniumData.obs.rename(columns={"index": "cell_id"}, inplace=True)

# 4. Display results
display(XeniumData)
display(XeniumData.obs)

# Plotting DESeq Genes of Interest

#### Format data accordingly

In [None]:
# Merge to add core info
Xenium_transcripts_merged = pd.merge(Xenium_transcripts_r, XeniumData.obs[['cell_id', 'Patient_ID_cores_combined']], on='cell_id', how='inner')

# View
Xenium_transcripts_merged

#### Plot based on fov

In [None]:
def plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, gene_populations, condition, fov, colors, fig_width=5, fig_height=6, save_plot=False):
    # Define the path where the plot will be saved
    save_path = '/path/DESeqGenesOfInterestPlots/Fovs/'
    
    # Ensure that directory exists if saving the plot
    if save_plot:
        os.makedirs(save_path, exist_ok=True)
    
    # Filter genes by fov
    specified_fov = Xenium_transcripts_merged[Xenium_transcripts_merged['fov_name'] == fov]
    
    # Extract subset of data for specified gene populations
    gene_subset = specified_fov[specified_fov['feature_name'].isin(gene_populations)]
    
    # Combine data from all genes
    combined_data = []
    for i, gene in enumerate(gene_populations):
        gene_data = gene_subset[gene_subset['feature_name'] == gene]
        gene_data = gene_data.assign(color=colors[i])
        combined_data.append(gene_data)
        
    combined_data =pd.concat(combined_data)
    
    # Shuffle the combined data
    combined_data = combined_data.sample(frac=1, random_state=42).reset_index(drop=True)
    
    # Create a figure
    fig, ax = plt.subplots(figsize=(fig_width, fig_height))
 
    # Plot
    ax.scatter(
        combined_data['x_location'],
        combined_data['y_location'],
        s=40,  # Adjust the size of the scatter plot dots
        alpha=0.5,  # Adjust alpha for better visualization
        c=combined_data['color'],
        label=combined_data['feature_name']
    )
    
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlabel('spatial1')
    ax.set_ylabel('spatial2')
    ax.set_aspect('equal')
    
    # Add legend at the top right
    legend_elements = [plt.Line2D([0], [0], marker='o', color=colors[i], markersize=5, label=gene, linestyle='None')
                       for i, gene in enumerate(gene_populations)]
    legend = ax.legend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, -0.05), fontsize=10)
    
    plt.tight_layout()  # Adjust layout to make room for the legend
    
    # Save plot to file if save_plot is True
    if save_plot:
        gene_population_str = "_".join(gene_populations)
        file_name = f"SpatialScatterPlot_Xenium_{condition}_fov{fov}_genes{gene_population_str}.pdf"
        full_path = os.path.join(save_path, file_name)
        plt.savefig(full_path, format='pdf')
    
    plt.show()

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['AQP8', 'EPCAM', 'PIGR'], 
                                  condition = "HC",
                                  fov = "S2", # HC
                                  colors = ['deeppink', 'darkorange', 'royalblue'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['BANK1', 'IGHD', 'MSA41', 'SELL'], 
                                  condition = "HC", 
                                  fov = "S2", # HC,
                                  colors = ['deeppink', 'darkorange', 'royalblue', 'mediumseagreen'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['S100A8', 'S100A9', 'TIMP1'], 
                                  condition = "HC", 
                                  fov = "S2", # HC
                                  colors = ['deeppink', 'darkorange', 'royalblue'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['AQP8', 'EPCAM', 'PIGR'], 
                                  condition = "PREVDZ_R", 
                                  fov = "AB13", # PREVDZ_R
                                  colors = ['deeppink', 'darkorange', 'royalblue'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['BANK1', 'IGHD', 'MSA41', 'SELL'], 
                                  condition = "PREVDZ_R",  
                                  fov = "AB13", # PREVDZ_R
                                  colors = ['deeppink', 'darkorange', 'royalblue', 'mediumseagreen'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['S100A8', 'S100A9', 'TIMP1'], 
                                  condition = "PREVDZ_R",  
                                  fov = "AB13", # PREVDZ_R
                                  colors = ['deeppink', 'darkorange', 'royalblue'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['CD1C', 'MMP1', 'MMP3'], 
                                  condition = "PREVDZ_R",  
                                  fov = "AB13", # PREVDZ_R
                                  colors = ['deeppink', 'darkorange', 'royalblue'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['BANK1', 'CD19', 'MS4A1', 'SELL'], 
                                  condition = "PREVDZ_R",  
                                  fov = "AB13", # PREVDZ_R
                                  colors = ['deeppink', 'darkorange', 'royalblue', 'mediumseagreen'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['AGR2', 'PIGR', 'SPINK4'], 
                                  condition = "PREVDZ_R",  
                                  fov = "AB13", # PREVDZ_R
                                  colors = ['deeppink', 'darkorange', 'royalblue'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['AQP8', 'EPCAM', 'PIGR'], 
                                  condition = "PREVDZ_NR",  
                                  fov = "Q12", # PREVDZ_NR
                                  colors = ['deeppink', 'darkorange', 'royalblue'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['BANK1', 'IGHD', 'MSA41', 'SELL'], 
                                  condition = "PREVDZ_NR",  
                                  fov = "Q12", # PREVDZ_NR
                                  colors = ['deeppink', 'darkorange', 'royalblue', 'mediumseagreen'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['S100A8', 'S100A9', 'TIMP1'], 
                                  condition = "PREVDZ_NR",  
                                  fov = "Q12", # PREVDZ_NR
                                  colors = ['deeppink', 'darkorange', 'royalblue'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['CD1C', 'MMP1', 'MMP3'], 
                                  condition = "PREVDZ_NR",  
                                  fov = "Q12", # PREVDZ_NR
                                  colors = ['deeppink', 'darkorange', 'royalblue'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['BANK1', 'CD19', 'MS4A1', 'SELL'], 
                                  condition = "PREVDZ_NR",  
                                  fov = "Q12", # PREVDZ_NR
                                  colors = ['deeppink', 'darkorange', 'royalblue', 'mediumseagreen'],
                                  save_plot = False)

In [None]:
plot_spatial_scatter_with_overlay_Xenium_fov(Xenium_transcripts_merged, 
                                  gene_populations = ['AGR2', 'PIGR', 'SPINK4'], 
                                  condition = "PREVDZ_NR",  
                                  fov = "Q12", # PREVDZ_NR
                                  colors = ['deeppink', 'darkorange', 'royalblue'],
                                  save_plot = False)

# Plotting Fine Annotation Clusters of Interest

In [None]:
sc.pl.umap(
    XeniumData,
    color=[
        "24_05_29_Fine_annotations_Xenium_combined",
    ],
    wspace=0.4,
)

## IEC_CT_colonocyte, MNP_monocyte, and Fibroblast_IAF clusters

In [None]:
def plot_spatial_scatter_with_overlay(
    XeniumData,
    cell_populations,
    core_id,
    save_path=None,
    fig_width=5,
    fig_height=6
):
    annotation_key = '24_05_29_Fine_annotations_Xenium_combined'
    core_key = 'Patient_ID_cores_combined'

    # 1. Subset by core
    core_mask = XeniumData.obs[core_key] == core_id
    specified_core = XeniumData[core_mask, :].copy()

    # 2. Subset by cell populations (use .values to avoid AnnData index weirdness)
    pop_mask = specified_core.obs[annotation_key].isin(cell_populations).values
    cell_subset = specified_core[pop_mask, :].copy()

    # 3. Set up figure
    fig, ax = plt.subplots(figsize=(fig_width, fig_height))

    # 4. Get color map from .uns (assumes this is already set, like in your original code)
    if not pd.api.types.is_categorical_dtype(XeniumData.obs[annotation_key]):
        # make sure it's categorical so .cat.categories works
        XeniumData.obs[annotation_key] = XeniumData.obs[annotation_key].astype('category')

    cluster_colors = dict(
        zip(
            XeniumData.obs[annotation_key].cat.categories,
            XeniumData.uns[f'{annotation_key}_colors']
        )
    )

    # Map colors for the subset
    cell_subset.obs['color'] = cell_subset.obs[annotation_key].map(cluster_colors)

    # 5. Plot populations (layering in the order provided)
    for pop in cell_populations:
        sub = cell_subset[cell_subset.obs[annotation_key] == pop]
        if sub.n_obs == 0:
            continue

        ax.scatter(
            sub.obsm['spatial'][:, 0],
            sub.obsm['spatial'][:, 1],
            c=sub.obs['color'],
            s=90,
            alpha=1.0 if pop != 'IEC_CT_colonocyte' else 0.8,  # mimic original layering
            label=pop
        )

    # 6. Axis formatting
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlabel('spatial1')
    ax.set_ylabel('spatial2')
    ax.set_aspect('equal')

    # 7. Legend only for the populations actually used
    legend_elements = [
        plt.Line2D(
            [0], [0],
            marker='o',
            color='w',
            markerfacecolor=cluster_colors[pop],
            markersize=9,
            label=pop
        )
        for pop in cell_populations
        if pop in cluster_colors
    ]
    ax.legend(
        handles=legend_elements,
        loc='upper center',
        bbox_to_anchor=(0.5, -0.05),
        fontsize=10
    )

    plt.tight_layout()

    # 8. Optional save
    if save_path is not None:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')

    plt.show()

In [None]:
plot_spatial_scatter_with_overlay(XeniumData, 
                                  cell_populations = ['IEC_CT_colonocyte', 'Fibroblast_IAF', 'MNP_monocyte'], 
                                  core_id = "HS37_HC_5")

In [None]:
plot_spatial_scatter_with_overlay(XeniumData, 
                                  cell_populations = ['IEC_CT_colonocyte', 'Fibroblast_IAF', 'MNP_monocyte'], 
                                  core_id = "HS34_VDZ_C_PRE_1_Mayo2_responder")

In [None]:
plot_spatial_scatter_with_overlay(XeniumData, 
                                  cell_populations = ['IEC_CT_colonocyte', 'Fibroblast_IAF', 'MNP_monocyte'], 
                                  core_id = "HS50_VDZ_RS_PRE_1_Mayo2_non-responder")

## Fibroblast subsets + more

In [None]:
def plot_spatial_scatter_with_overlay(
    XeniumData,
    cell_populations,
    core_id,
    save_path=None,
    fig_width=5,
    fig_height=6
):
    # Define custom colors for each cell population
    custom_colors = {
        'B': 'gold',
        'Fibroblast_submucosal_S3': 'forestgreen',
        'Fibroblast_FRC_S4': 'lime',
        'IEC_CT_colonocyte': 'crimson',
        'IEC_ISC': 'lightpink',
        'Fibroblast_CTF_S2': 'blue',
        'Fibroblast_cCF_S1': 'deepskyblue',
    }

    annot_col = '24_05_29_Fine_annotations_Xenium_combined'
    core_col = 'Patient_ID_cores_combined'

    # Filter cells by core
    core_mask = XeniumData.obs[core_col] == core_id
    specified_core = XeniumData[core_mask, :].copy()

    # Extract subset of data for specified cell populations
    pop_mask = specified_core.obs[annot_col].isin(cell_populations).values
    cell_subset = specified_core[pop_mask, :].copy()

    # Create a figure
    fig, ax = plt.subplots(figsize=(fig_width, fig_height))

    # Map custom colors onto subset
    cell_subset.obs['color'] = cell_subset.obs[annot_col].map(custom_colors)

    # ---- Plot each population in order ----

    # B cells
    B_subset = cell_subset[cell_subset.obs[annot_col] == 'B']
    ax.scatter(
        B_subset.obsm['spatial'][:, 0],
        B_subset.obsm['spatial'][:, 1],
        c=B_subset.obs['color'],
        s=55,
        alpha=0.7,
        label='B'
    )

    # Fibroblast_submucosal_S3
    Fibroblast_submucosal_S3_subset = cell_subset[cell_subset.obs[annot_col] == 'Fibroblast_submucosal_S3']
    ax.scatter(
        Fibroblast_submucosal_S3_subset.obsm['spatial'][:, 0],
        Fibroblast_submucosal_S3_subset.obsm['spatial'][:, 1],
        c=Fibroblast_submucosal_S3_subset.obs['color'],
        s=55,
        alpha=0.7,
        label='Fibroblast_submucosal_S3'
    )

    # Fibroblast_FRC_S4
    Fibroblast_FRC_S4_subset = cell_subset[cell_subset.obs[annot_col] == 'Fibroblast_FRC_S4']
    ax.scatter(
        Fibroblast_FRC_S4_subset.obsm['spatial'][:, 0],
        Fibroblast_FRC_S4_subset.obsm['spatial'][:, 1],
        c=Fibroblast_FRC_S4_subset.obs['color'],
        s=55,
        alpha=0.7,
        label='Fibroblast_FRC_S4'
    )

    # IEC_CT_colonocyte
    IEC_CT_colonocyte_subset = cell_subset[cell_subset.obs[annot_col] == 'IEC_CT_colonocyte']
    ax.scatter(
        IEC_CT_colonocyte_subset.obsm['spatial'][:, 0],
        IEC_CT_colonocyte_subset.obsm['spatial'][:, 1],
        c=IEC_CT_colonocyte_subset.obs['color'],
        s=55,
        alpha=0.7,
        label='IEC_CT_colonocyte'
    )

    # IEC_ISC
    IEC_ISC_subset = cell_subset[cell_subset.obs[annot_col] == 'IEC_ISC']
    ax.scatter(
        IEC_ISC_subset.obsm['spatial'][:, 0],
        IEC_ISC_subset.obsm['spatial'][:, 1],
        c=IEC_ISC_subset.obs['color'],
        s=55,
        alpha=0.7,
        label='IEC_ISC'
    )

    # Fibroblast_CTF_S2
    Fibroblast_CTF_S2_subset = cell_subset[cell_subset.obs[annot_col] == 'Fibroblast_CTF_S2']
    ax.scatter(
        Fibroblast_CTF_S2_subset.obsm['spatial'][:, 0],
        Fibroblast_CTF_S2_subset.obsm['spatial'][:, 1],
        c=Fibroblast_CTF_S2_subset.obs['color'],
        s=55,
        alpha=0.7,
        label='Fibroblast_CTF_S2'
    )

    # Fibroblast_cCF_S1
    Fibroblast_cCF_S1_subset = cell_subset[cell_subset.obs[annot_col] == 'Fibroblast_cCF_S1']
    ax.scatter(
        Fibroblast_cCF_S1_subset.obsm['spatial'][:, 0],
        Fibroblast_cCF_S1_subset.obsm['spatial'][:, 1],
        c=Fibroblast_cCF_S1_subset.obs['color'],
        s=55,
        alpha=0.7,
        label='Fibroblast_cCF_S1'
    )

    # Formatting
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlabel('spatial1')
    ax.set_ylabel('spatial2')
    ax.set_aspect('equal')

    # Legend from custom colors, only for cell_populations
    legend_elements = [
        plt.Line2D(
            [0], [0],
            marker='o',
            color='w',
            markerfacecolor=color,
            markersize=9,
            label=cell_type
        )
        for cell_type, color in custom_colors.items()
        if cell_type in cell_populations
    ]
    ax.legend(
        handles=legend_elements,
        loc='upper center',
        bbox_to_anchor=(0.5, -0.05),
        fontsize=10
    )

    plt.tight_layout()

    if save_path is not None:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')

    plt.show()

HC plots

In [None]:
plot_spatial_scatter_with_overlay(XeniumData, 
                                  cell_populations = ['IEC_CT_colonocyte', 'IEC_ISC', 'Fibroblast_cCF_S1', 'Fibroblast_CTF_S2', 'Fibroblast_submucosal_S3', 'Fibroblast_FRC_S4', 'B'], 
                                  core_id = "HS39_HC_1")

In [None]:
plot_spatial_scatter_with_overlay(XeniumData, 
                                  cell_populations = ['IEC_CT_colonocyte', 'IEC_ISC', 'Fibroblast_cCF_S1', 'Fibroblast_CTF_S2', 'Fibroblast_submucosal_S3', 'Fibroblast_FRC_S4', 'B'], 
                                  core_id = "HS42_HC_1")

UC plots

In [None]:
plot_spatial_scatter_with_overlay(XeniumData, 
                                  cell_populations = ['IEC_CT_colonocyte', 'IEC_ISC', 'Fibroblast_cCF_S1', 'Fibroblast_CTF_S2', 'Fibroblast_submucosal_S3', 'Fibroblast_FRC_S4', 'B'], 
                                  core_id = "HS44_VDZ_C_PRE_1_Mayo0_responder")

In [None]:
plot_spatial_scatter_with_overlay(XeniumData, 
                                  cell_populations = ['IEC_CT_colonocyte', 'IEC_ISC', 'Fibroblast_cCF_S1', 'Fibroblast_CTF_S2', 'Fibroblast_submucosal_S3', 'Fibroblast_FRC_S4', 'B'], 
                                  core_id = "HS32_VDZ_RC_POST_1_Mayo0_responder")

## Gene signatures distinguishing UC PRE Responders and Non-responder

#### Calculate mean expression of gene signature lists per cell on the log normalized counts

For each gene set:
* Take one cell (a row of your AnnData).
* Pull out the expression values for just the genes in that set (from the log_normalized_counts layer).
* Average those values → that gives you a single number summarizing that cell’s mean expression across the set.
* Repeat for every cell → you get a new metadata column in .obs (e.g. MeanExp_R_IEC).
So each column in .obs now holds the per-cell average expression for one of your gene sets.

In [None]:
### Calculate mean expression of gene signature lists per cell on the log normalized counts

adata = XeniumData  # shorthand
layer_to_use = "log_normalized_counts"

def get_matrix(A, genes, layer=None):
    sub = A[:, genes]
    return sub.layers[layer] if layer is not None else sub.X

def row_means(M):
    if hasattr(M, "tocsc"):  # sparse
        return np.asarray(M.mean(axis=1)).ravel()
    return M.mean(axis=1)

# ---- Define gene sets ----
R_IEC = [g.strip() for g in """
AQP8
CA4
FABP1
GUCA2A
LGALS4
ITLN1
GPR15
SLC26A2
SPINK4
WFDC2
MUC2
C15orf48
LEFTY1
SPINK1
EPCAM
AGR2
PIGR
FABP2
ADAMDEC1
DDC
ITM2C
S100A16
KRT8
MUC1
CEACAM6
NOS2
SELENOP
TMEM176B
TMEM176A
NUPR1
""".splitlines() if g.strip()]

NR_IAF = [g.strip() for g in """
IL1R1
TIMP1
CD44
IL13RA2
MMP1
MMP3
OSMR
NFKBIA
TNFAIP3
TNFRSF11B
S100A8
S100A9
FCGR3B
CSF3R
BASP1
OSM
VCAN
LYZ
CD14
HLA-DRA
HLA-DRB1
TREM1
""".splitlines() if g.strip()]

NR_GALT = [g.strip() for g in """
CD19
MS4A1
BANK1
SELL
CD79A
CCR7
LTB
C3
TNFSF13B
IRF8
PTGDS
CD1C
HLA-DRA
CD86
ITGAX
FCER1A
CLEC10A
CD1D
CLEC9A
XCR1
FLT3
HLA-DRB1
CD40
CD209
LAMP3
""".splitlines() if g.strip()]

# New signatures
LeadingEdge_R = [g.strip() for g in """
LGALS4
ITLN1
C15orf48
SPINK1
EPCAM
AGR2
PIGR
DDC
ITM2C
S100A16
MUC1
""".splitlines() if g.strip()]

LeadingEdge_NR = [g.strip() for g in """
OSM
TREM1
S100A8
BASP1
S100A9
CSF3R
FCGR3B
NFKBIA
SELL
LTB
ITGAX
""".splitlines() if g.strip()]

gene_sets = {
    "MeanExp_R_IEC": R_IEC,
    "MeanExp_NR_IAF": NR_IAF,
    "MeanExp_NR_GALT": NR_GALT,
    "MeanExp_R_LeadingEdge": LeadingEdge_R,
    "MeanExp_NR_LeadingEdge": LeadingEdge_NR,
}

# ---- Compute per-cell mean expression for each set ----
varnames = set(adata.var_names.astype(str))

for col_name, genes in gene_sets.items():
    present = [g for g in genes if g in varnames]
    missing = [g for g in genes if g not in varnames]

    if len(present) == 0:
        adata.obs[col_name] = np.nan
        print(f"[{col_name}] 0/{len(genes)} genes found in adata.var_names. Column filled with NaN.")
        continue

    M = get_matrix(adata, present, layer=layer_to_use)
    adata.obs[col_name] = row_means(M)

    print(f"[{col_name}] Using {len(present)}/{len(genes)} genes. "
          f"Missing: {', '.join(missing) if missing else 'none'}")

# Quick check
adata.obs[
    ["MeanExp_R_IEC", "MeanExp_NR_IAF", "MeanExp_NR_GALT",
     "MeanExp_R_LeadingEdge", "MeanExp_NR_LeadingEdge"]
].head()


In [None]:
adata.obs['24_01_17_Condition'].unique()

In [None]:
### Group by core

adata.obs_names = adata.obs_names.astype(str)

# --- Subsets ---
adata_HC = adata[adata.obs['24_01_17_Condition'] == 'HC'].copy()
adata_UCPRE_R = adata[adata.obs['24_01_17_Condition'] == 'PRE_VDZ_R'].copy()
adata_UCPRE_NR = adata[adata.obs['24_01_17_Condition'] == 'PRE_VDZ_NR'].copy()

# --- Columns ---
group_col = 'Patient_ID_cores_combined'
geneset_cols = [
    'MeanExp_R_IEC',
    'MeanExp_NR_IAF',
    'MeanExp_NR_GALT',
    'MeanExp_R_LeadingEdge',
    'MeanExp_NR_LeadingEdge',
]

def core_means_df(A, subset_name):
    """Return long-format DF of core-level mean expression for each gene set."""
    df = A.obs[[group_col] + geneset_cols].dropna(subset=[group_col]).copy()
    core_mean = (
        df.groupby(group_col, as_index=False)[geneset_cols]
          .mean(numeric_only=True)
    )
    core_mean['Subset'] = subset_name
    return core_mean.melt(
        id_vars=[group_col, 'Subset'],
        value_vars=geneset_cols,
        var_name='GeneSet',
        value_name='MeanExpression'
    )

# --- Build combined DF across the 3 subsets you already made ---
df_long = pd.concat([
    core_means_df(adata_HC, 'HC'),
    core_means_df(adata_UCPRE_R, 'UCPRE_R'),
    core_means_df(adata_UCPRE_NR, 'UCPRE_NR'),
], ignore_index=True)

# Optional: set plotting order
df_long['Subset'] = pd.Categorical(df_long['Subset'], ['HC', 'UCPRE_R', 'UCPRE_NR'], ordered=True)
df_long['GeneSet'] = pd.Categorical(
    df_long['GeneSet'],
    ['MeanExp_R_IEC', 'MeanExp_NR_IAF', 'MeanExp_NR_GALT', 'MeanExp_R_LeadingEdge', 'MeanExp_NR_LeadingEdge'],
    ordered=True
)

In [None]:

# --- Plot (15 violins: 5 gene sets × 3 subsets), each dot = one core ---
plt.figure(figsize=(12, 6))
ax = sns.violinplot(
    data=df_long,
    x='GeneSet',
    y='MeanExpression',
    hue='Subset',
    inner='box',
    dodge=True,
    cut=0,
    linewidth=1
)

# Overlay per-core points
sns.stripplot(
    data=df_long,
    x='GeneSet',
    y='MeanExpression',
    hue='Subset',
    dodge=True,
    size=7,
    alpha=0.6,
    edgecolor='none',
    ax=ax
)

# Fix duplicate legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[:3], labels[:3], title='Condition', frameon=False, loc='best')

ax.set_title('Core-level mean expression by gene set and condition')
ax.set_xlabel('Gene set')
ax.set_ylabel('Core mean expression (log_normalized_counts)')
plt.tight_layout()
plt.show()

# --- Optional wide-format table for inspection/export ---
core_means_wide = (
    df_long
    .pivot_table(index=[group_col, 'Subset'], columns='GeneSet', values='MeanExpression')
    .reset_index()
)
display(core_means_wide)


In [None]:
display(len(core_means_wide))
display("NR")
display(adata_UCPRE_NR.obs['Patient_ID_cores_combined'].unique())
display("R")
display(adata_UCPRE_R.obs['Patient_ID_cores_combined'].unique())

In [None]:
## Spatial scatter plot function

#import numpy as np
#import matplotlib.pyplot as plt

def plot_spatial_core_scalar(
    A,
    obs_key,
    core_id,
  #  slide,
    save_plot=False,
    fig_width=5,
    fig_height=5,
    point_size=2000,
    cmap='viridis',
    robust=True,
    vmin=None,
    vmax=None,
    flip_y=False
):
    """
    Plot a spatial scatter for a single core colored by a scalar obs column.
    """
    # --- Basic checks ---
    if 'Patient_ID_cores_combined' not in A.obs.columns:
        raise KeyError("Missing 'Patient_ID_cores_combined' in .obs")
    if obs_key not in A.obs.columns:
        raise KeyError(f"Missing '{obs_key}' in .obs")
    if 'spatial' not in A.obsm:
        raise KeyError("Missing 'spatial' in .obsm")

    # Define the path where the plot will be saved
    save_path = '/path/Cores_NoPlotTitle/'

    # Ensure that directory exists if saving the plot
    if save_plot:
        os.makedirs(save_path, exist_ok=True)

    # --- Subset to the requested core ---
    core_view = A[A.obs['Patient_ID_cores_combined'] == core_id].copy()
    if core_view.n_obs == 0:
        raise ValueError(f"No cells found for core_id '{core_id}'.")

    coords = core_view.obsm['spatial']
    if coords.shape[0] != core_view.n_obs or coords.shape[1] < 2:
        raise ValueError("Invalid 'spatial' coordinates in .obsm['spatial'].")

    values = core_view.obs[obs_key].astype(float).to_numpy()

    # --- Color scale ---
    if vmin is not None and vmax is not None:
        # user-specified range
        vmin, vmax = float(vmin), float(vmax)
    else:
        if robust:
            lo, hi = np.nanpercentile(values, [1, 99])
            vmin, vmax = float(lo), float(hi)
        else:
            vmin, vmax = np.nanmin(values), np.nanmax(values)

    # --- Plot ---
    fig, ax = plt.subplots(figsize=(fig_width, fig_height), dpi=300)
    sc = ax.scatter(
        coords[:, 0], coords[:, 1],
        c=values,
        s=point_size,
        cmap=cmap,
        vmin=vmin, vmax=vmax,
        edgecolor='none',
        alpha=0.9,
    )
    cbar = plt.colorbar(sc, ax=ax, fraction=0.046, pad=0.04)
    cbar.set_label(obs_key, rotation=270, labelpad=20, fontsize=14)
    cbar.ax.tick_params(labelsize=12)

 #   ax.set_title(f"Core: {core_id}")
    ax.set_xlabel("spatial1")
    ax.set_ylabel("spatial2")
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_aspect('equal', adjustable='box')

    if flip_y:
        ax.invert_yaxis()

    plt.tight_layout()  # Adjust layout to make room for the legend
    
    # Save plot to file if save_plot is True
    if save_plot:
        file_name = f"SpatialScatterPlot_XeniumIBD290Sept_core{core_id}_genes{obs_key}_NoPlotTitle.pdf"
        full_path = os.path.join(save_path, file_name)
        plt.savefig(full_path, format='pdf', bbox_inches='tight', dpi=300)
    
    plt.show()


HS46_VDZ_LC_PRE_1_Mayo3_non-responder

In [None]:
# Plot MeanExp_NR_GALT on one UCPRE_NR core
plot_spatial_core_scalar(
    adata_UCPRE_NR,
    obs_key='MeanExp_NR_GALT',
    core_id='HS46_VDZ_LC_PRE_1_Mayo3_non-responder',
   # save_plot=True,      
    point_size=4,
    cmap='viridis',
    robust=True,
    vmin= 0,
    vmax=0.7,
)



# Options: 'MeanExp_R_IEC', 'MeanExp_NR_IAF', 'MeanExp_NR_GALT', 'MeanExp_R_LeadingEdge', 'MeanExp_NR_LeadingEdge'

In [None]:
# Plot MeanExp_NR_IAF on one UCPRE_NR core
plot_spatial_core_scalar(
    adata_UCPRE_NR,
    obs_key='MeanExp_NR_IAF',
    core_id='HS46_VDZ_LC_PRE_1_Mayo3_non-responder',
   # save_plot=True,      
    point_size=4,
    cmap='viridis',
    robust=True,
    vmin= 0,
    vmax=0.7,
)



# Options: 'MeanExp_R_IEC', 'MeanExp_NR_IAF', 'MeanExp_NR_GALT', 'MeanExp_R_LeadingEdge', 'MeanExp_NR_LeadingEdge'

In [None]:
# Plot MeanExp_R_IEC on one UCPRE_NR core
plot_spatial_core_scalar(
    adata_UCPRE_NR,
    obs_key='MeanExp_R_IEC',
    core_id='HS46_VDZ_LC_PRE_1_Mayo3_non-responder',
  #  save_plot=True,      
    point_size=4,
    cmap='viridis',
    robust=True,
    vmin= 0,
    vmax=0.7,
)



# Options: 'MeanExp_R_IEC', 'MeanExp_NR_IAF', 'MeanExp_NR_GALT', 'MeanExp_R_LeadingEdge', 'MeanExp_NR_LeadingEdge'

HS50_VDZ_RS_PRE_1_Mayo2_non-responder

In [None]:
# Plot MeanExp_NR_GALT on one UCPRE_NR core
plot_spatial_core_scalar(
    adata_UCPRE_NR,
    obs_key='MeanExp_NR_GALT',
    core_id='HS50_VDZ_RS_PRE_1_Mayo2_non-responder',
   # save_plot=True,      
    point_size=4,
    cmap='viridis',
    robust=True,
    vmin= 0,
    vmax=0.7,
)



# Options: 'MeanExp_R_IEC', 'MeanExp_NR_IAF', 'MeanExp_NR_GALT', 'MeanExp_R_LeadingEdge', 'MeanExp_NR_LeadingEdge'

In [None]:
# Plot MeanExp_NR_IAF on one UCPRE_NR core
plot_spatial_core_scalar(
    adata_UCPRE_NR,
    obs_key='MeanExp_NR_IAF',
    core_id='HS50_VDZ_RS_PRE_1_Mayo2_non-responder',
  #  save_plot=True,      
    point_size=4,
    cmap='viridis',
    robust=True,
    vmin= 0,
    vmax=0.7,
)



# Options: 'MeanExp_R_IEC', 'MeanExp_NR_IAF', 'MeanExp_NR_GALT', 'MeanExp_R_LeadingEdge', 'MeanExp_NR_LeadingEdge'

In [None]:
# Plot MeanExp_R_IEC on one UCPRE_NR core
plot_spatial_core_scalar(
    adata_UCPRE_NR,
    obs_key='MeanExp_R_IEC',
    core_id='HS50_VDZ_RS_PRE_1_Mayo2_non-responder',
  #  save_plot=True,      
    point_size=4,
    cmap='viridis',
    robust=True,
    vmin= 0,
    vmax=0.7,
)



# Options: 'MeanExp_R_IEC', 'MeanExp_NR_IAF', 'MeanExp_NR_GALT', 'MeanExp_R_LeadingEdge', 'MeanExp_NR_LeadingEdge'

HS34_VDZ_C_PRE_1_Mayo2_responder

In [None]:
# Plot MeanExp_NR_GALT on one UCPRE_R core
plot_spatial_core_scalar(
    adata_UCPRE_R,
    obs_key='MeanExp_NR_GALT',
    core_id='HS34_VDZ_C_PRE_1_Mayo2_responder',
  #  save_plot=True,      
    point_size=4,
    cmap='viridis',
    robust=True,
    vmin= 0,
    vmax=0.7,
)



# Options: 'MeanExp_R_IEC', 'MeanExp_NR_IAF', 'MeanExp_NR_GALT', 'MeanExp_R_LeadingEdge', 'MeanExp_NR_LeadingEdge'

In [None]:
# Plot MeanExp_NR_IAF on one UCPRE_R core
plot_spatial_core_scalar(
    adata_UCPRE_R,
    obs_key='MeanExp_NR_IAF',
    core_id='HS34_VDZ_C_PRE_1_Mayo2_responder',
  #  save_plot=True,      
    point_size=4,
    cmap='viridis',
    robust=True,
    vmin= 0,
    vmax=0.7,
)



# Options: 'MeanExp_R_IEC', 'MeanExp_NR_IAF', 'MeanExp_NR_GALT', 'MeanExp_R_LeadingEdge', 'MeanExp_NR_LeadingEdge'

In [None]:
# Plot MeanExp_R_IEC on one UCPRE_R core
plot_spatial_core_scalar(
    adata_UCPRE_R,
    obs_key='MeanExp_R_IEC',
    core_id='HS34_VDZ_C_PRE_1_Mayo2_responder',
  #  save_plot=True,      
    point_size=4,
    cmap='viridis',
    robust=True,
    vmin= 0,
    vmax=0.7,
)



# Options: 'MeanExp_R_IEC', 'MeanExp_NR_IAF', 'MeanExp_NR_GALT', 'MeanExp_R_LeadingEdge', 'MeanExp_NR_LeadingEdge'