#### This particular notebook includes 2 different neighborhood enrichment analyses:
* All cores combined
* Split by core

This notebook uses Xenium Dataset 1 (combined reps 1 and 2).

#### Required input files:
* Annotated cell-based data object

Environment: Please create and activate the conda environment provided in default_env.yaml before running this notebook

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import squidpy as sq

import gzip
import anndata

import os

# All cores combined analysis

In [None]:
Int_XeniumData = sc.read_h5ad('/path/25_11_22_Xenium_Dataset1_290_IntReps1and2_Annotated.h5ad')

# View
Int_XeniumData

In [None]:
Int_XeniumData.obs

In [None]:
sc.pl.umap(
    Int_XeniumData,
    color=[
        "24_05_29_Fine_annotations_Xenium_combined",
    ],
    wspace=0.4,
)

### Create dendrogram for fine annotations

In [None]:
Int_XeniumData.uns['log1p']["base"] = None

In [None]:
# Fine annotations

sc.tl.rank_genes_groups(Int_XeniumData, layer='log_normalized_counts', groupby='24_05_29_Fine_annotations_Xenium_combined', method='wilcoxon')
sc.pl.rank_genes_groups(Int_XeniumData, n_genes=25, sharey=False)

In [None]:
## Compute dendrogram
# Compute hierarchical clustering using PCs (several distance metrics and linkage methods are available

# Fine annotations

sc.tl.dendrogram(Int_XeniumData, '24_05_29_Fine_annotations_Xenium_combined')

ax = sc.pl.dendrogram(Int_XeniumData, '24_05_29_Fine_annotations_Xenium_combined')

In [None]:
# Check that the dendrograms and colors saved

Int_XeniumData

### Spatial Statistics for Fine Annotations

#### Xenium HC

In [None]:
## HC data

# Create a boolean mask for rows where '24_01_17_Condition' is 'HC'
condition_mask_HC = Int_XeniumData.obs['24_01_17_Condition'] == 'HC'

# Filter the anndata object using the mask
XeniumHCdata = Int_XeniumData[condition_mask_HC, :]

# View
XeniumHCdata

# 163,895 obs

In [None]:
## Build the spatial neighbors graphs

# Using delaunay = True as my approach

sq.gr.spatial_neighbors(
    XeniumHCdata,
    coord_type="generic",
    delaunay=True,
)

In [None]:
sq.gr.nhood_enrichment(XeniumHCdata, cluster_key="24_05_29_Fine_annotations_Xenium_combined", seed=4)
sq.pl.nhood_enrichment(
    XeniumHCdata,
    cluster_key="24_05_29_Fine_annotations_Xenium_combined",
    cmap="inferno",
    figsize=(5, 5),
)

In [None]:
# View
XeniumHCdata

In [None]:
factor_column = XeniumHCdata.obs["24_05_29_Fine_annotations_Xenium_combined"].astype('category')
levels = factor_column.cat.categories

row_names = levels
col_names = levels

XeniumHCdata_nhoodenrich_zscore_df = pd.DataFrame(XeniumHCdata.uns["24_05_29_Fine_annotations_Xenium_combined_nhood_enrichment"]["zscore"], index = row_names, columns = col_names)

XeniumHCdata_nhoodenrich_zscore_df

In [None]:
XeniumHCdata.uns["24_05_29_Fine_annotations_Xenium_combined_nhood_enrichment"]

#### Xenium PREVDZ

In [None]:
## PRE_VDZ data

# Create a boolean mask for rows where '24_01_17_Condition' is either 'PRE_VDZ_R' or 'PRE_VDZ_NR'
condition_mask_PREVDZ = (
    (Int_XeniumData.obs['24_01_17_Condition'] == 'PRE_VDZ_R') | 
    (Int_XeniumData.obs['24_01_17_Condition'] == 'PRE_VDZ_NR')
)

# Filter the anndata object using the mask
XeniumPREVDZdata = Int_XeniumData[condition_mask_PREVDZ, :]

# View
XeniumPREVDZdata

# 237,423

In [None]:
## Build the spatial neighbors graphs

# Using delaunay = True as my approach

sq.gr.spatial_neighbors(
    XeniumPREVDZdata,
    coord_type="generic",
    delaunay=True,
)

In [None]:
sq.gr.nhood_enrichment(XeniumPREVDZdata, cluster_key="24_05_29_Fine_annotations_Xenium_combined", seed=4)
sq.pl.nhood_enrichment(
    XeniumPREVDZdata,
    cluster_key="24_05_29_Fine_annotations_Xenium_combined",
    cmap="inferno",
    figsize=(5, 5),
)

In [None]:
# View
XeniumPREVDZdata

In [None]:
factor_column = XeniumPREVDZdata.obs["24_05_29_Fine_annotations_Xenium_combined"].astype('category')
levels = factor_column.cat.categories

row_names = levels
col_names = levels

XeniumPREVDZdata_nhoodenrich_zscore_df = pd.DataFrame(XeniumPREVDZdata.uns["24_05_29_Fine_annotations_Xenium_combined_nhood_enrichment"]["zscore"], index = row_names, columns = col_names)

XeniumPREVDZdata_nhoodenrich_zscore_df

### Calculations to inform max/min axis values

In [None]:
# See how many values are greater than x value

HC_count = (XeniumHCdata_nhoodenrich_zscore_df > 200).sum().sum()

print(HC_count)

PREVDZ_count = (XeniumPREVDZdata_nhoodenrich_zscore_df > 200).sum().sum()

print(PREVDZ_count)

In [None]:
## HC
# Find the positions of values greater than 200
HC_greater_than_200 = XeniumHCdata_nhoodenrich_zscore_df > 200

# Print the column and row combination for each value greater than 200
for index, row in HC_greater_than_200.iterrows():
    for column in row.index:
        if row[column]:
            print(f"Row: {index}, Column: {column}, Value: {XeniumHCdata_nhoodenrich_zscore_df.at[index,column]}")

In [None]:
## PREVDZ
# Find the positions of values greater than 200
PREVDZ_greater_than_200 = XeniumPREVDZdata_nhoodenrich_zscore_df > 200

# Print the column and row combination for each value greater than 200
for index, row in PREVDZ_greater_than_200.iterrows():
    for column in row.index:
        if row[column]:
            print(f"Row: {index}, Column: {column}, Value: {XeniumPREVDZdata_nhoodenrich_zscore_df.at[index,column]}")

### Output heatmaps

In [None]:
## XeniumHC

# Set leaves order
Xeniumdata_leavesorder = XeniumHCdata.uns['dendrogram_24_05_29_Fine_annotations_Xenium_combined']['categories_idx_ordered']

# Set order according to dendrogram (extra line above for generating Xeniumdata_leavesorder)
XeniumHCdata_nhoodenrich_zscore_df_ordered = XeniumHCdata_nhoodenrich_zscore_df.iloc[Xeniumdata_leavesorder, Xeniumdata_leavesorder]

# Create mask for upper triange
mask = np.triu(np.ones_like(XeniumHCdata_nhoodenrich_zscore_df_ordered, dtype=bool))

# Ensure the diagonal values are not masked
np.fill_diagonal(mask, False)

# Create heatmap
plt.figure(figsize=(10, 8))  
ax = sns.heatmap(XeniumHCdata_nhoodenrich_zscore_df_ordered, mask=mask, annot=False, cmap='coolwarm', 
                 vmin=-20, vmax=50, 
                 cbar_kws={'shrink': 0.5, 'label': 'Neighborhood enrichment z-score', 'ticks': [-20, -10, 0, 10, 20, 30, 40, 50]},
                # fmt=".1f" # Show 1 decimal place
                )

plt.title('Xenium HC Data: Neighborhood Enrichment Z-Score', fontsize=13)

# Customize colorbar
cbar = ax.collections[0].colorbar
cbar.set_ticks([-20, -10, 0, 10, 20, 30, 40, 50])  # Ensure these ticks are set explicitly
cbar.set_ticklabels(['-20', '-10', '0', '10', '20', '30', '40', '50'])
cbar.ax.tick_params(labelsize=12)
cbar.set_label('Neighborhood enrichment z-score', rotation=270, labelpad=15, fontsize=12)

# Save figure
# plt.savefig('/path/NhoodEnrichmentZHeatmap_IntXeniumHC_FineAnnotations_20-50.pdf', bbox_inches='tight')

plt.show()

In [None]:
## XeniumPREVDZ

# Set order according to dendrogram (extra line above for generating Xeniumdata_leavesorder)
XeniumPREVDZdata_nhoodenrich_zscore_df_ordered = XeniumPREVDZdata_nhoodenrich_zscore_df.iloc[Xeniumdata_leavesorder, Xeniumdata_leavesorder]

# Create mask for upper triange
mask = np.triu(np.ones_like(XeniumPREVDZdata_nhoodenrich_zscore_df_ordered, dtype=bool))

# Ensure the diagonal values are not masked
np.fill_diagonal(mask, False)

# Create heatmap
plt.figure(figsize=(10, 8)) 
ax = sns.heatmap(XeniumPREVDZdata_nhoodenrich_zscore_df_ordered, mask=mask, annot=False, cmap='coolwarm', 
                 vmin=-20, vmax=50, 
                 cbar_kws={'shrink': 0.5, 'label': 'Neighborhood enrichment z-score', 'ticks': [-20, -10, 0, 10, 20, 30, 40, 50]},
                # fmt=".1f" # Show 1 decimal place
                )

plt.title('Xenium PREVDZ Data: Neighborhood Enrichment Z-Score')

# Customize colorbar
cbar = ax.collections[0].colorbar
cbar.set_ticks([-20, -10, 0, 10, 20, 30, 40, 50])  # Ensure these ticks are set explicitly
cbar.set_ticklabels(['-20', '-10', '0', '10', '20', '30', '40', '50'])
cbar.ax.tick_params(labelsize=12)
cbar.set_label('Neighborhood enrichment z-score', rotation=270, labelpad=15, fontsize=12)

# Save figure
# plt.savefig('/path/NhoodEnrichmentZHeatmap_IntXeniumPREVDZ_FineAnnotations_20-50.pdf', bbox_inches='tight')

plt.show()

### Create Z-Score Quantification Plots

In [None]:
def plot_enrichment_scores(XeniumHCdata, enrichment_scores_df, cell_type):
    # Find the index of the specified cell type
    unique_clusters_sorted = sorted(XeniumHCdata.obs['24_05_29_Fine_annotations_Xenium_combined'].unique())
    cell_index = unique_clusters_sorted.index(cell_type)
    
    # Isolate the cell data to plot as an enrichment plot by z-score
    cell_scores = enrichment_scores_df.iloc[cell_index, :]
    
    # Order and color the data
    x_labels = unique_clusters_sorted
    cluster_colors = XeniumHCdata.uns['24_05_29_Fine_annotations_Xenium_combined_colors']
    
    # Zip scores, labels, and colors together for sorting
    scores_labels_colors = list(zip(cell_scores, x_labels, cluster_colors))
    
    # Exclude the self-comparison by removing the entry with the label 'Fibroblast_IAF'
    filtered_scores_labels_colors = [s for s in scores_labels_colors if s[1] != 'Fibroblast_IAF']
    
    # Sort the remaining data
    sorted_scores_labels_colors = sorted(filtered_scores_labels_colors, key=lambda x: x[0], reverse=True)
    sorted_scores, sorted_labels, sorted_colors = zip(*sorted_scores_labels_colors)
    
    # Plot the data
    plt.figure(figsize=(10, 6))
    plt.bar(np.arange(len(sorted_scores)), sorted_scores, color=sorted_colors, align='center')
    plt.xticks(np.arange(len(sorted_labels)), sorted_labels, rotation='vertical')
    plt.xlabel('Clusters')
    plt.ylabel('z-score')
    plt.ylim(-20, 50)
    plt.title(f'Xenium HC: Enrichment of {cell_type} cells to other cells (Fine Annotations)')
    plt.tight_layout()  # Adjust layout to make room for the rotated x-axis labels
  #  plt.savefig('/path/BarPlot_Xenium_HC_Fine_FibIAFEnrichScoreVsOtherPlot_5020.pdf', bbox_inches='tight')
    plt.show()

# Ensure you pass the enrichment DataFrame correctly when calling the function.
plot_enrichment_scores(XeniumHCdata, XeniumHCdata_nhoodenrich_zscore_df, 'Fibroblast_IAF')


In [None]:
def plot_enrichment_scores(XeniumPREVDZdata, enrichment_scores_df, cell_type):
    # Find the index of the specified cell type
    unique_clusters_sorted = sorted(XeniumPREVDZdata.obs['24_05_29_Fine_annotations_Xenium_combined'].unique())
    cell_index = unique_clusters_sorted.index(cell_type)
    
    # Isolate the cell data to plot as an enrichment plot by z-score
    cell_scores = enrichment_scores_df.iloc[cell_index, :]
    
    # Order and color the data
    x_labels = unique_clusters_sorted
    cluster_colors = XeniumPREVDZdata.uns['24_05_29_Fine_annotations_Xenium_combined_colors']
    
    # Zip scores, labels, and colors together for sorting
    scores_labels_colors = list(zip(cell_scores, x_labels, cluster_colors))
    
    # Exclude the self-comparison by removing the entry with the label 'Fibroblast_IAF'
    filtered_scores_labels_colors = [s for s in scores_labels_colors if s[1] != 'Fibroblast_IAF']
    
    # Sort the remaining data
    sorted_scores_labels_colors = sorted(filtered_scores_labels_colors, key=lambda x: x[0], reverse=True)
    sorted_scores, sorted_labels, sorted_colors = zip(*sorted_scores_labels_colors)
    
    # Plot the data
    plt.figure(figsize=(10, 6))
    plt.bar(np.arange(len(sorted_scores)), sorted_scores, color=sorted_colors, align='center')
    plt.xticks(np.arange(len(sorted_labels)), sorted_labels, rotation='vertical')
    plt.xlabel('Clusters')
    plt.ylabel('z-score')
    plt.ylim(-20, 50)
    plt.title(f'Xenium PREVDZ: Enrichment of {cell_type} cells to other cells (Fine Annotations)')
    plt.tight_layout()  # Adjust layout to make room for the rotated x-axis labels
  #  plt.savefig('/path/BarPlot_Xenium_PREVDZ_Fine_FibIAFEnrichScoreVsOtherPlot_5020.pdf', bbox_inches='tight')
    plt.show()

# Ensure you pass the enrichment DataFrame correctly when calling the function.
plot_enrichment_scores(XeniumPREVDZdata, XeniumPREVDZdata_nhoodenrich_zscore_df, 'Fibroblast_IAF')


# Split by core analysis

In [None]:
# Load in if needed

# View
Int_XeniumData

In [None]:
# Get unique values from "24_01_17_HS" column
unique_24_01_17_HS = Int_XeniumData.obs['24_01_17_HS'].unique()

print("Xenium")

# Iterate over unique values and print corresponding unique core values
for value in unique_24_01_17_HS:
    # Filter df for rows where "24_01_17_HS" equals the current unique value
    filtered_df = Int_XeniumData.obs[Int_XeniumData.obs['24_01_17_HS'] == value]
    # Get unique values from "Patient_ID_cores_combined" column for the filtered DataFrame
    unique_cores = filtered_df['Patient_ID_cores_combined'].unique()
    # Format the cores for printing: join them with ', '
    formatted_cores = ', '.join(unique_cores)
    # Print the result in a more readable format
    print(f"HS: {value}, Corresponding cores: {formatted_cores}")

In [None]:
# Create the Xenium_mapping -- Will be used later to average z-scores from cores to corresponding HS

Xenium_mapping = {
    str(core): HS_EM 
    for core, HS_EM in zip(Int_XeniumData.obs['Patient_ID_cores_combined'], Int_XeniumData.obs['24_01_17_HS']) 
    if HS_EM != 'unassigned'
}

### Calculate neighborhood enrichment for Xenium Fibroblast_IAF vs MNP_Monocyte

In [None]:
## HC and PRE_VDZ data

# Create a boolean mask for rows where '24_01_17_Condition' is 'HC', 'PRE_VDZ_R', or 'PRE_VDZ_NR'
condition_mask_HCPREVDZ = (
    (Int_XeniumData.obs['24_01_17_Condition'] == 'HC') |
    (Int_XeniumData.obs['24_01_17_Condition'] == 'PRE_VDZ_R') | 
    (Int_XeniumData.obs['24_01_17_Condition'] == 'PRE_VDZ_NR')
)

# Filter the anndata object using the mask
Xenium_HCandPREVDZdata = Int_XeniumData[condition_mask_HCPREVDZ, :]

# View
Xenium_HCandPREVDZdata

# 401,318

In [None]:
### Xenium Fibroblast_IAF and MNP_Monocyte

## Set up cores list

## Identify cores that contain values from both cell types of interest
# Step 1: Group by 'Patient_ID_cores_combined' and aggregate '24_05_29_Fine_annotations_Xenium_combined' into a single string per group
cores_aggregated = Xenium_HCandPREVDZdata.obs.groupby('Patient_ID_cores_combined')['24_05_29_Fine_annotations_Xenium_combined'].apply(' '.join).reset_index()

# Step 2: Identify cores containing both "Fibroblast_IAF" and "MNP_monocyte"
qualifying_cores = cores_aggregated[cores_aggregated['24_05_29_Fine_annotations_Xenium_combined'].apply(lambda x: 'Fibroblast_IAF' in x and 'MNP_monocyte' in x)]

# Step 3: Extract the list of qualifying cores
list_of_qualifying_cores = sorted(set(qualifying_cores['Patient_ID_cores_combined']))

# Print list of qualifying cores
print(list_of_qualifying_cores)

# Calculate number of qualifying cores
num_qualifying_cores = len(list_of_qualifying_cores)

# Calculate number of total cores
num_total_cores = len(Xenium_HCandPREVDZdata.obs['Patient_ID_cores_combined'].unique())

# Print
print("")
print(f"{num_qualifying_cores} out of {num_total_cores} cores")


## Quantity the number of cells included in the analysis (aka in all of the qualifying cores)

# Filter Xenium_HCandPREVDZdata.obs to include only rows where 'Patient_ID_cores_combined' is in list_of_qualifying_cores
qualifying_cores_filtered_cells = Xenium_HCandPREVDZdata.obs[Xenium_HCandPREVDZdata.obs['Patient_ID_cores_combined'].isin(list_of_qualifying_cores)]

# Now, further filter for '24_01_17_Condition' == 'HC'
cells_in_HC = qualifying_cores_filtered_cells[qualifying_cores_filtered_cells['24_01_17_Condition'] == 'HC']

# And, filter for '24_01_17_Condition' starting with 'PRE'
cells_in_PRE = qualifying_cores_filtered_cells[qualifying_cores_filtered_cells['24_01_17_Condition'].str.startswith('PRE')]

# Calculate the number of cells for total and each condition
num_cells_total = len(qualifying_cores_filtered_cells)
num_cells_in_HC = len(cells_in_HC)
num_cells_in_PRE = len(cells_in_PRE)

# Print the results
print("")
print(f"Number of cells in qualifying cores (total): {num_cells_total}")
print(f"Number of cells in qualifying cores (HC condition): {num_cells_in_HC}")
print(f"Number of cells in qualifying cores (PREVDZ condition): {num_cells_in_PRE}")


In [None]:
## Set up df
Xenium_nhoodenrich_Fibroblast_IAFMNP_monocyte_df_total = pd.DataFrame(index=['NhoodEnrichZScore'])
technology = 'Xenium'
celltype_1 = 'Fibroblast_IAF'
celltype_2 = 'MNP_monocyte'

# Only including cores with values for both cell subsets

## Run loop
for core in list_of_qualifying_cores:
    try:
        #split the object by core
        fdata = Xenium_HCandPREVDZdata[Xenium_HCandPREVDZdata.obs['Patient_ID_cores_combined']==core].copy()

        # Find the condition for the individual core
        condition_list = list(set(fdata.obs['24_01_17_Condition']))
        if condition_list:
            condition = condition_list[0]
            col_name = technology + condition + '_core_' + core + '_comparing_' + celltype_1 + '_vs_' + celltype_2
            print(col_name)
        else:
            print("No condition found for core:", core)
            continue

        #calculate neighborhood graph
        sq.gr.spatial_neighbors(fdata,coord_type = 'generic',delaunay=True)
        sq.gr.nhood_enrichment(fdata, cluster_key="24_05_29_Fine_annotations_Xenium_combined", seed=4)
        nbhd_enrichment = fdata.uns["24_05_29_Fine_annotations_Xenium_combined"+'_nhood_enrichment']['zscore']
        print(f"Spatial distances min and max: {fdata.obsp['spatial_distances'].min()}, {fdata.obsp['spatial_distances'].max()}\n")

        #build a data frame
        df = pd.DataFrame(nbhd_enrichment)
        df.columns = sorted(set(fdata.obs["24_05_29_Fine_annotations_Xenium_combined"]))
        df.index = sorted(set(fdata.obs["24_05_29_Fine_annotations_Xenium_combined"]))
    
        #pull out value of interest (intersection of the cell type pair of interest)
        value_of_interest = df.loc[celltype_2,celltype_1]
    
        #add to df_total
        Xenium_nhoodenrich_Fibroblast_IAFMNP_monocyte_df_total[col_name] = [value_of_interest]
        
    except KeyError as e:
        print(f"KeyError encountered: {e}")
        continue
    except Exception as e:
        print(f"Unexpected error: {e}")
        continue

In [None]:
# Sort by column name (so that they will be alphabetical and the HC and PREVDZ values will cluster)
Xenium_nhoodenrich_Fibroblast_IAFMNP_monocyte_df_total = Xenium_nhoodenrich_Fibroblast_IAFMNP_monocyte_df_total.sort_index(axis=1)

# View
Xenium_nhoodenrich_Fibroblast_IAFMNP_monocyte_df_total

In [None]:
# Save as csv file

#Xenium_nhoodenrich_Fibroblast_IAFMNP_monocyte_df_total.to_csv('/path/XeniumInt_NeighborhoodEnrichScores_Fibroblast_IAF_MNP_monocyte_HCandPREVDZ.csv')