In [None]:
import sys
import os
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns; sns.set(color_codes=True)
import scimap.plotting as sm
from skimage import io
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.ticker as mtick
from matplotlib.lines import Line2D
import matplotlib.colors as mcolors
from matplotlib.colors import ListedColormap
import scipy.stats as stats
from statsmodels.stats.multitest import multipletests
import anndata
adata = anndata.read_h5ad("/data/vasileiosionat2/IBEX_FINAL/Scimap/Adata/Final_polished_NI/adata_final.h5ad")
adata.obs

In [None]:
#Figure 1D (IBEX healthy section)
# Load the segmentation mask for sample H3
segmentation_mask = io.imread(path_to_segmentation_path_H3)

# Specify the crop coordinates (same as ROI of Extended Figure 1A)
x_min, x_max = 1054, 3556   # X range (columns)
y_min, y_max = 418, 2308   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H3"

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'Immune': '#008000',
    'Fibroblast': '#0000FF',
    'Epithelial': '#FFEA00',
    'Vascular': '#990F0FFF',
    'Other': '#e5e5e5'
}

# Define the default grey color for other clusters
default_cluster_color = "#E5E5E5"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl1'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl1'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
#output_path = path_to_output
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 1D (IBEX periodontitis section)

# Load the segmentation mask for sample P1a
segmentation_mask = io.imread(path_to_segmentation_path_P1a)

# Specify the crop coordinates
x_min, x_max = 72, 1962   # X range (columns)
y_min, y_max = 288, 2790   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P1a"

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'Immune': '#008000',
    'Fibroblast': '#0000FF',
    'Epithelial': '#FFEA00',
    'Vascular': '#990F0FFF',
    'Other': '#e5e5e5'
}

# Define the default grey color for other clusters
default_cluster_color = "#E5E5E5"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl1'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl1'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
#output_path = path_to_output
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 1D, IBEX dotplot - Lvl1 clusters
# Define the subset of clusters you want to include (Lvl1 clusters)
clusters_of_interest = ['Epithelial', 'Fibroblast', 'Immune', 'Vascular']

# Define the subset of genes you want to plot
correct_order = ['CK5', 'PanCK', 'CD138', 'Vimentin', 'CD45', 'CD3', 'aSMA', 'CD31', ]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['lvl1_spatial_cluster'].isin(clusters_of_interest)].copy()

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,            # Genes will be on the x-axis
    groupby='lvl1_spatial_cluster',      # Clusters will be on the y-axis
    standard_scale='var',            #Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width
    grid=False,                  # Optional: Disable grid
    dot_min=0.2,
    dot_max=0.8
)

# Show the plot
dotplot.show()

# Save the plot as PDF with a transparent background
dotplot.savefig(path_to_output, dpi=300, bbox_inches='tight', transparent=True)

In [None]:
#Extended Figure 1B
# List of SampleIDs to exclude based on the criteria presented in Table S1
excluded_sample_ids = ['H1', 'P4', 'P2b']

# Create a subset of the AnnData object excluding the specific SampleIDs
adata_subset = adata[~adata.obs['imageid'].isin(excluded_sample_ids)].copy()

# Create a new figure with custom dimensions
plt.figure(figsize=(50, 6))  # Adjust width and height as needed

# Define the specific order of major_cluster categories
anticipated_order = ['Other', 'Epithelial', 'Immune', 'Fibroblast', 'Vascular']

# Reorder the major_cluster column in the copied AnnData object
adata_subset.obs['Lvl1'] = adata_subset.obs['Lvl1'].astype('category')
adata_subset.obs['Lvl1'] = adata_subset.obs['Lvl1'].cat.reorder_categories(anticipated_order)
adata_subset.obs = adata_subset.obs.sort_values('Lvl1')

# Define custom colors for each major_cluster category
colors = {
    'Immune': '#008000',
    'Fibroblast': '#0000FF',
    'Epithelial': '#FFEA00',
    'Vascular': '#990F0FFF',
    'Other': '#e5e5e5'
}

# Plot the stacked bar plot with specified colors
plot = sm.stacked_barplot(adata_subset, x_axis='ConditionID', y_axis='Lvl1',
                          order_yaxis=anticipated_order, method='percent', plot_tool='matplotlib', color=colors, figsize=(2, 6))

# Get the current axes and set its background color to white
plt.gca().set_facecolor('white')

# Format y-axis ticks as percentages
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))

# Get the x-axis limits
x_min, x_max = plt.xlim()

# Plot horizontal dotted lines at y = 0.25, 0.5, and 0.75
plt.hlines(y=[0, 0.25, 0.5, 0.75, 1], xmin=x_min, xmax=x_max, linestyle='--', colors='grey')

# Set y-axis tick labels to match the percentages
plt.yticks([0, 0.25, 0.5, 0.75, 1], ['0%', '25%', '50%', '75%', '100%'])

# Modify the borders
plt.gca().spines['top'].set_visible(False)       # Make top border invisible
plt.gca().spines['right'].set_visible(False)     # Make right border invisible
plt.gca().spines['bottom'].set_color('black')    # Set bottom border color to black
plt.gca().spines['left'].set_color('black')      # Set left border color to black

# Adjust y-axis limits to start a little above the lower edge of the outline
y_min, y_max = plt.ylim()
plt.ylim(y_min - 0.03, y_max)

# Remove 'clinical_status' from x-axis title
plt.xlabel('')

# Increase font size of x-axis tick labels (clinical_status categories)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)  # Adjust font size as needed

# Rotate x-axis labels diagonally
plt.xticks(rotation=45, ha='right')

# Custom legend handles as circles, sorted by anticipated_order in reverse
sorted_legend_handles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[category], markersize=10) 
                         for category in reversed(anticipated_order)]
sorted_legend_labels = reversed(anticipated_order)

# Add legend with sorted custom handles and labels outside the plot
legend = plt.legend(sorted_legend_handles, sorted_legend_labels, loc='upper left', bbox_to_anchor=(1, 1), markerscale=1, frameon=False, borderpad=2)

# Save the plot as TIFF file
#plt.savefig(path_to_output_dir)

# Display the plot
plt.show()

In [None]:
#Extended Figure 1C
# Load the CSV file into a pandas DataFrame
df1 = pd.read_csv(path_to_total_cells_per_area_per_patient)

# Define the specific order of major_cluster categories and corresponding colors
anticipated_order = ['Other', 'Epithelial', 'Immune', 'Fibroblast', 'Vascular']
colors = {
    'Other': '#e5e5e5',
    'Epithelial': '#FFEA00',
    'Immune': '#008000',
    'Fibroblast': '#0000FF',
    'Vascular': '#990F0FFF',
}

# Pivot the data so that each 'patientID' has columns for each 'lvl1_spatial_cluster'
df_pivot = df.pivot_table(index='patient', columns='annotation', values='cells_per_mm2', aggfunc='sum')

# Reorder the columns (clusters) based on the anticipated_order
df_pivot = df_pivot[anticipated_order]

# Manually specify the order of patients
manual_order = ['H2','H3','H4','H5','H6','H7','H10','H11',
                'P1', 'P2', 'P3', 'P5', 'P6', 'P7', 'P8', 'P9']

# Reorder the DataFrame rows based on the manual order
df_pivot = df_pivot.loc[manual_order]

# Plot the stacked bar plot with adjusted bar width
plt.figure(figsize=(20, 8))
df_pivot.plot(kind='bar', stacked=True, figsize=(20, 10), color=[colors[cluster] for cluster in anticipated_order], width=0.9)  # Adjust the width

# Format the plot
plt.gca().set_facecolor('white')
plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{int(x)}'))  # Format y-axis to show integer counts
plt.gca().spines['top'].set_visible(False)  # Remove top border
plt.gca().spines['right'].set_visible(False)  # Remove right border
plt.gca().spines['bottom'].set_color('black')  # Set bottom border to black
plt.gca().spines['left'].set_color('black')  # Set left border to black

plt.xlabel('Patient ID')
plt.ylabel('Total Cells')
plt.xticks(fontsize=20, rotation=45, ha='right')  # Adjust font size and rotation
plt.yticks(fontsize=20)

# Custom legend handles and labels
sorted_legend_handles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[label], markersize=10) 
                         for label in anticipated_order]
plt.legend(sorted_legend_handles, anticipated_order, loc='upper left', bbox_to_anchor=(1, 1), frameon=False)

# Adjust layout and save the plot
plt.tight_layout()
plt.savefig('path_to_output_dir.pdf', format='pdf')
plt.show()

In [None]:
#Figure 2A - Healthy Oral Epithelium ROI
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask')

# Specify the crop coordinates
x_min, x_max = 936, 1552  # X range (columns)
y_min, y_max = 716, 1332  # Y range (rows)


# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H2a" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()


# Define the specific cluster colors
specific_cluster_colors = {
    'Ep.or.k': "#A7C7E7",
    'Ep.or.sp': "#5D3FD3",
    'Ep.or.b-pb': "#0096FF",
    'mAPC': "#FFBF00"
}

# Define the default grey color for other clusters
default_cluster_color = "#36454F"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl2.5'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl2.5'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(12, 12), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"/path_to_output_dir_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 2A - Healthy Tooth-Associated Epithelium ROI
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask')

# Specify the crop coordinates
x_min, x_max = 2112, 2728  # X range (columns)
y_min, y_max = 220, 836  # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H2a" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()


# Define the specific cluster colors
specific_cluster_colors = {
    'Ep.TA': "#702963",
    'Neut': "#32CD32",
    'mAPC': "#FFBF00"
}


# Define the default grey color for other clusters
default_cluster_color = "#36454F"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl2.5'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl2.5'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(12, 12), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"/path_to_output_dir_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 2A - Healthy Lymphoid aggregates ROI
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask')

# Specify the crop coordinates
x_min, x_max = 2780, 3396  # X range (columns)
y_min, y_max = 1252, 1868  # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H2a" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'T': "#6495ED",
    'B': "#966919",
    'mAPC': "#FFBF00",
    'Plasma' : '#E5E4E2',
    'Lymph.mix' : '#7DF9FF'
}

# Define the default grey color for other clusters
default_cluster_color = "#36454F"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl2.5'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl2.5'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(12, 12), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"/path_to_output_dir_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 2A - Healthy Deep CT ROI
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask')

# Specify the crop coordinates
x_min, x_max = 1860, 2476  # X range (columns)
y_min, y_max = 1396, 2012  # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H2a" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()


# Define the specific cluster colors
specific_cluster_colors = {
    'Fib': "#8B0000",
    'SMC': "#FF00FF",
    'mAPC': "#FFBF00",
    'Plasma' : '#E5E4E2',
}

# Define the default grey color for other clusters
default_cluster_color = "#36454F"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl2.5'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl2.5'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(12, 12), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"/path_to_output_dir_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 2A - Periodontitis Oral Epithelium ROI
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask')

# Specify the crop coordinates
x_min, x_max = 938, 1554  # X range (columns)
y_min, y_max = 1588, 2204  # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P9a" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()


# Define the specific cluster colors
specific_cluster_colors = {
    'Ep.or.k': "#A7C7E7",
    'Ep.or.sp': "#5D3FD3",
    'Ep.or.b-pb': "#0096FF",
    'mAPC': "#FFBF00"
}

# Define the default grey color for other clusters
default_cluster_color = "#36454F"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl2.5'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl2.5'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(12, 12), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"/path_to_output_dir_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 2A - Periodontitis Tooth-Associated Epithelium ROI
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask')

# Specify the crop coordinates
x_min, x_max = 2648, 3264  # X range (columns)
y_min, y_max = 1282, 1898  # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P9a" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()


# Define the specific cluster colors
specific_cluster_colors = {
    'Ep.TA': "#702963",
    'Neut': "#32CD32",
    'mAPC': "#FFBF00"
}


# Define the default grey color for other clusters
default_cluster_color = "#36454F"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl2.5'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl2.5'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(12, 12), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"/path_to_output_dir_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 2A - Periodontitis Lymphoid aggregates ROI
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask')

# Specify the crop coordinates
x_min, x_max = 2264, 2880  # X range (columns)
y_min, y_max = 3160, 3776  # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P9a" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()


# Define the specific cluster colors
specific_cluster_colors = {
    'T': "#6495ED",
    'B': "#966919",
    'mAPC': "#FFBF00",
    'Plasma' : '#E5E4E2',
    'Lymph.mix' : '#7DF9FF'
}

# Define the default grey color for other clusters
default_cluster_color = "#36454F"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl2.5'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl2.5'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(12, 12), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"/path_to_output_dir_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 2A - Periodontitis Deep CT ROI
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask')

# Specify the crop coordinates
x_min, x_max = 1346, 1962  # X range (columns)
y_min, y_max = 2938, 3554  # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P9a" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()


# Define the specific cluster colors
specific_cluster_colors = {
    'Fib': "#8B0000",
    'SMC': "#FF00FF",
    'mAPC': "#FFBF00",
    'Plasma' : '#E5E4E2',
}

# Define the default grey color for other clusters
default_cluster_color = "#36454F"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl2.5'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl2.5'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(12, 12), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"/path_to_output_dir_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 2B
plt.rcParams['font.family'] = 'Arial'
plt.rcParams['pdf.fonttype'] = 42  # Ensures fonts are embedded as text and may be modified in Illustrator
plt.rcParams['ps.fonttype'] = 42


# Define the subset of clusters you want to include and their desired order
clusters_of_interest = [
     'Ep.or.k', 'Ep.or.sp', 'Ep.or.b-pb',  'Ep.TA', 'Neut', 'mAPC', 'Mast', 'Fib',
    'VEC', 'SMC',  'Plasma', 'B' , 'T', 'Lymph.mix'
]


# Define the subset of proteins you want to plot in the desired order
correct_order = [
    'Hoechst', 'CK5', 'PanCK',  'CD138', 'Ki67', 'S100a8-9', 'CK19', 
    'CD45', 'Vimentin', 'MPO', 'HLA-DR', 'MCT',
    'Thy-1', 'aSMA', 'CD31',  'CD20', 'CD3', 'CD4', 'CD8a'
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl2.5'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl2.5'] = adata_subset.obs['Lvl2.5'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl2.5',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8,
        figsize=(6, 7)  # Adjust width (first value) and height (second value)
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = 'path_to_output_dir.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)

In [None]:
#Figure 2C - Healthy section
# Load the segmentation mask
segmentation_mask = io.imread('/path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 471, 4386  # X range (columns)
y_min, y_max = 408, 2535  # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H2a" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'TAE': "#800080",
    'FibCT': "red",
    'OE': "#5D3FD3",
    'Epi-CT' : '#00FFFF',
    'NeutCT' : '#097969' ,
    'Plasma' : 'magenta', 
    'T-B-APC': 'yellow',
    'BV': 'orange',
    'Im-Str.1' : '#CF9FFF', 
    'Im-Str.2' : '#E1E1E1'
    
}

# Define the default grey color for other clusters
default_cluster_color = "#E5E5E5"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['niche_merged'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['niche_merged'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.2,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"/path_to_outpu_dir_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 2C - Periodontitis section
# Load the segmentation mask
segmentation_mask = io.imread('/path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 232, 3472  # X range (columns)
y_min, y_max = 208, 4064  # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P9a" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'TAE': "#800080",
    'FibCT': "red",
    'OE': "#5D3FD3",
    'Epi-CT' : '#00FFFF',
    'NeutCT' : '#097969' ,
    'Plasma' : 'magenta', 
    'T-B-APC': 'yellow',
    'BV': 'orange',
    'Im-Str.1' : '#CF9FFF', 
    'Im-Str.2' : '#E1E1E1'
    
}

# Define the default grey color for other clusters
default_cluster_color = "#E5E5E5"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['niche_merged'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['niche_merged'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.2,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"/path_to_outpu_dir_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 2D 
# Create DataFrame from adata.obs
df = pd.DataFrame(adata.obs)
df['niche_merged'] = df['niche_merged'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['niche_merged', 'Lvl2.5']).size().unstack(fill_value=0)

# Transpose the DataFrame
niche_groups = niche_groups.T

# Custom x-axis (niche) order
custom_order = ['OE', 'Epi-CT', 'TAE', 'NeutCT',  
                'Im-Str.1', 'Im-Str.2', 'FibCT',  'BV', 'Plasma', 'T-B-APC']
niche_groups = niche_groups.reindex(columns=custom_order)

# Custom y-axis (cell type) selection
y_axis_clusters = ['Ep.or.k', 'Ep.or.sp', 'Ep.or.b-pb', 'Ep.TA', 'Neut', 'mAPC', 'Mast', 'Fib',  
                   'VEC', 'SMC', 'Plasma', 'B',  'T', 'Lymph.mix']
niche_groups_filtered = niche_groups.loc[y_axis_clusters]

# Transpose so clusters are columns (x-axis), and niches are rows (y-axis)
niche_groups_flipped = niche_groups_filtered.T  # Now niches are rows, clusters are columns

# Apply log1p transformation
niche_groups_log = niche_groups_flipped.applymap(lambda x: np.log1p(x))

# Plot heatmap
plt.figure(figsize=(7, 5))
sns.heatmap(
    niche_groups_log,
    vmax=np.log1p(12000), 
    vmin=np.log1p(500),       # log(0+1)
    cmap='plasma',
    cbar_kws={'label': 'log(Count + 1)'},
    linewidths=0.05,
    linecolor='black'
)

plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Type')
plt.xticks(rotation=90, ha='right')
plt.yticks(rotation=0)

# Adjust colorbar ticks (log scale)
cbar = plt.gca().collections[0].colorbar
log_ticks = [np.log1p(500), np.log1p(1000), np.log1p(10000)]
cbar.set_ticks(log_ticks)
cbar.ax.set_yticklabels(['500', '1,000', '10,000'])

plt.tight_layout()
output_path = f"/path_to_output_dir.pdf"
plt.savefig(output_path, format='pdf')
plt.show()

In [None]:
# Figure 2E
# List of SampleIDs to exclude
excluded_sample_ids = ['H1', 'P4', 'P2b']

# Create a subset of the AnnData object excluding the specific SampleIDs
adata_subset = adata[~adata.obs['imageid'].isin(excluded_sample_ids)].copy()

# Create a new figure with custom dimensions
plt.figure(figsize=(50, 6))  # Adjust width and height as needed

# Define the specific order of major_cluster categories
anticipated_order = ['OE', 'FibCT', 'Im-Str.1', 'Epi-CT', 'BV', 'Im-Str.2', 'T-B-APC',   'TAE', 'NeutCT', 'Plasma',
 ]

# Reorder the major_cluster column in the copied AnnData object
adata_subset.obs['niche_merged'] = adata_subset.obs['niche_merged'].astype('category')
adata_subset.obs['niche_merged'] = adata_subset.obs['niche_merged'].cat.reorder_categories(anticipated_order)
adata_subset.obs = adata_subset.obs.sort_values('niche_merged')

# Define custom colors for each major_cluster category
colors = {
    'TAE': "#800080",
    'FibCT': "red",
    'OE': "#5D3FD3",
    'Epi-CT' : '#00FFFF',
    'NeutCT' : '#097969' ,
    'Plasma' : 'magenta', 
    'T-B-APC': 'yellow',
    'BV': 'orange',
    'Im-Str.1' : '#CF9FFF', 
    'Im-Str.2' : '#E1E1E1'
    
}

# Plot the stacked bar plot with specified colors
plot = sm.stacked_barplot(adata_subset, x_axis='ConditionID', y_axis='niche_merged',
                          order_yaxis=anticipated_order, method='percent', plot_tool='matplotlib', color=colors, figsize=(2, 6))

# Get the current axes and set its background color to white
plt.gca().set_facecolor('white')

# Format y-axis ticks as percentages
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))

# Get the x-axis limits
x_min, x_max = plt.xlim()

# Plot horizontal dotted lines at y = 0.25, 0.5, and 0.75
plt.hlines(y=[0, 0.25, 0.5, 0.75, 1], xmin=x_min, xmax=x_max, linestyle='--', colors='grey')

# Set y-axis tick labels to match the percentages
plt.yticks([0, 0.25, 0.5, 0.75, 1], ['0%', '25%', '50%', '75%', '100%'])

# Modify the borders
plt.gca().spines['top'].set_visible(False)       # Make top border invisible
plt.gca().spines['right'].set_visible(False)     # Make right border invisible
plt.gca().spines['bottom'].set_color('black')    # Set bottom border color to black
plt.gca().spines['left'].set_color('black')      # Set left border color to black

# Adjust y-axis limits to start a little above the lower edge of the outline
y_min, y_max = plt.ylim()
plt.ylim(y_min - 0.03, y_max)

# Remove 'clinical_status' from x-axis title
plt.xlabel('')

# Increase font size of x-axis tick labels (clinical_status categories)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)  # Adjust font size as needed

# Rotate x-axis labels diagonally
plt.xticks(rotation=45, ha='right')

# Custom legend handles as circles, sorted by anticipated_order in reverse
sorted_legend_handles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[category], markersize=10) 
                         for category in reversed(anticipated_order)]
sorted_legend_labels = reversed(anticipated_order)

# Add legend with sorted custom handles and labels outside the plot
legend = plt.legend(sorted_legend_handles, sorted_legend_labels, loc='upper left', bbox_to_anchor=(1, 1), markerscale=1, frameon=False, borderpad=2)

# Save the plot as TIFF file
#plt.savefig('/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure3/niche_stacked_barplot.pdf', format='pdf')

# Display the plot
plt.show()

In [None]:
#Extended Figure 2A - Healthy section
def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path, crop_coords=None):
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['imageid'] == sample_name]

        x_coords = subset_adata.obs['X_centroid'].values
        y_coords = subset_adata.obs['Y_centroid'].values

        # Crop if coordinates are provided
        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['X_centroid'].values
            y_coords = subset_adata.obs['Y_centroid'].values

        # Aspect ratio for figure size
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set white background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Draw the rectangle first (so it's behind the points)
        if crop_coords:
            print(f"Drawing rectangle at: x_min={x_min}, x_max={x_max}, y_min={y_min}, y_max={y_max}")

            rect = plt.Rectangle(
                (x_min, y_min), x_max - x_min, y_max - y_min, 
                linewidth=2, edgecolor='black', facecolor='none', zorder=1
            )
            ax.add_patch(rect)

        # Draw points on top
        colors = subset_adata.obs[cluster_key].map(cluster_colors).fillna("#E5E5E5").values
        plt.scatter(x_coords, y_coords, c=colors, s=15, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        #plt.savefig(output_path, dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()



# Example usage:
crop_coords = (386, 4386, 208, 3448)
sample_names = ['H2a']
output_path = 'path_to_output_dir_{}.pdf'

Epi_cluster_colors = {
    'Ep.or.k': "#097969",
    'Ep.or.sp': "#BB5566FF",
    'Ep.or.b-pb': "#004488FF",
    'Ep.TA': "#FD7901FF",
    'Fib': "#d2e0b7",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'Lymph.mix': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neut': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}
Strom_cluster_colors = {
    'Ep.or.k': "#e0cbb7",
    'Ep.or.sp': "#e0cbb7",
    'Ep.or.b-pb': "#e0cbb7",
    'Ep.TA': "#e0cbb7",
    'Fib': "#BB5566FF",
    'VEC': "#004488FF",
    'SMC': "#DDAA33FF",
    'Lymph.mix': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neut': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}

Lymphoid_cluster_colors = {
    'Ep.or.k': "#e0cbb7",
    'Ep.or.sp': "#e0cbb7",
    'Ep.or.b-pb': "#e0cbb7",
    'Ep.TA': "#e0cbb7",
    'Fib': "#d2e0b7",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'Lymph.mix': '#BF40BF',
    'T': '#DE3163',
    'Plasma': '#191970',
    'B' : '#70BE48',
    'Neut': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}
  
Myeloid_cluster_colors = {
    'Ep.or.k': "#e0cbb7",
    'Ep.or.sp': "#e0cbb7",
    'Ep.or.b-pb': "#e0cbb7",
    'Ep.TA': "#e0cbb7",
    'Fib': "#d2e0b7",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'Lymph.mix': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neut': '#AA336A',
    'mAPC': '#008000',
    'Mast': '#E4D00A',
    'Other': "#E5E5E5E5"
}

# Generate plots
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Epi_cluster_colors, output_path.format("Epi"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Strom_cluster_colors, output_path.format("Strom"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Lymphoid_cluster_colors, output_path.format("Lymphoid"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Myeloid_cluster_colors, output_path.format("Myeloid"), crop_coords)

In [None]:
#Extended Figure 2A - Periodontitis section
def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path, crop_coords=None):
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['imageid'] == sample_name]

        x_coords = subset_adata.obs['X_centroid'].values
        y_coords = subset_adata.obs['Y_centroid'].values

        # Crop if coordinates are provided
        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['X_centroid'].values
            y_coords = subset_adata.obs['Y_centroid'].values

        # Aspect ratio for figure size
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set white background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Draw the rectangle first (so it's behind the points)
        if crop_coords:
            print(f"Drawing rectangle at: x_min={x_min}, x_max={x_max}, y_min={y_min}, y_max={y_max}")

            rect = plt.Rectangle(
                (x_min, y_min), x_max - x_min, y_max - y_min, 
                linewidth=2, edgecolor='black', facecolor='none', zorder=1
            )
            ax.add_patch(rect)

        # Draw points on top
        colors = subset_adata.obs[cluster_key].map(cluster_colors).fillna("#E5E5E5").values
        plt.scatter(x_coords, y_coords, c=colors, s=10, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        #plt.savefig(output_path, dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()



# Example usage:
crop_coords = (232, 3472, 208, 4208)
sample_names = ['P9a']
output_path = 'path_to_output_dir_{}.pdf'

Epi_cluster_colors = {
    'Ep.or.k': "#097969",
    'Ep.or.sp': "#BB5566FF",
    'Ep.or.b-pb': "#004488FF",
    'Ep.TA': "#FD7901FF",
    'Fib': "#d2e0b7",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'Lymph.mix': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neut': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}
Strom_cluster_colors = {
    'Ep.or.k': "#e0cbb7",
    'Ep.or.sp': "#e0cbb7",
    'Ep.or.b-pb': "#e0cbb7",
    'Ep.TA': "#e0cbb7",
    'Fib': "#BB5566FF",
    'VEC': "#004488FF",
    'SMC': "#DDAA33FF",
    'Lymph.mix': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neut': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}

Lymphoid_cluster_colors = {
    'Ep.or.k': "#e0cbb7",
    'Ep.or.sp': "#e0cbb7",
    'Ep.or.b-pb': "#e0cbb7",
    'Ep.TA': "#e0cbb7",
    'Fib': "#d2e0b7",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'Lymph.mix': '#BF40BF',
    'T': '#DE3163',
    'Plasma': '#191970',
    'B' : '#70BE48',
    'Neut': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}
  
Myeloid_cluster_colors = {
    'Ep.or.k': "#e0cbb7",
    'Ep.or.sp': "#e0cbb7",
    'Ep.or.b-pb': "#e0cbb7",
    'Ep.TA': "#e0cbb7",
    'Fib': "#d2e0b7",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'Lymph.mix': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neut': '#AA336A',
    'mAPC': '#008000',
    'Mast': '#E4D00A',
    'Other': "#E5E5E5E5"
}

# Generate plots
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Epi_cluster_colors, output_path.format("Epi"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Strom_cluster_colors, output_path.format("Strom"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Lymphoid_cluster_colors, output_path.format("Lymphoid"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Myeloid_cluster_colors, output_path.format("Myeloid"), crop_coords)

In [None]:
#Extended Figure 2B
# Load your data
df = pd.read_csv('path_to_total_Lvl2.5_cells_per_area_per_patient.csv')

# Filter relevant columns
df = df[['annotation', 'status', 'cells_per_mm2', 'patient']]  # Include 'patient' for uniqueness

# Get unique annotations
annotations = df['annotation'].unique()

# Define number of rows and columns for the subplot grid
n_rows = 5
n_cols = (len(annotations) + n_rows - 1) // n_rows  # Calculate number of columns based on number of annotations

# Set up the figure with subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 2.5, n_rows * 2))
axes = axes.flatten()  # Flatten the axes array to easily index

# Store p-values for multiple testing correction
p_values = []

# Iterate over each annotation and plot
for idx, annotation in enumerate(annotations):
    # Filter data for the current annotation
    df_annotation = df[df['annotation'] == annotation]
    
    # Perform t-test for 'status' groups (Healthy vs Perio) in the current annotation
    healthy_data = df_annotation[df_annotation['status'] == 'Healthy']['cells_per_mm2']
    perio_data = df_annotation[df_annotation['status'] == 'Periodontitis']['cells_per_mm2']
    
    # Perform t-test
    t_stat, p_val = stats.ttest_ind(healthy_data, perio_data, equal_var=False)  # Welch's t-test
    p_values.append(p_val)
    # Define the desired order
    order = ["Healthy", "Periodontitis"]

    # Plotting on the corresponding subplot axis
    sns.boxplot(data=df_annotation, x="status", y="cells_per_mm2", 
            palette={"Healthy": "#006A8E", "Periodontitis": "#B1283A"}, width=0.7, 
            fliersize=0, dodge=True, order=order, ax=axes[idx])  

    # Add individual data points (jittered for visibility)
    sns.stripplot(data=df_annotation, x="status", y="cells_per_mm2", 
              palette={"Healthy": "black", "Periodontitis": "black"}, dodge=True,
              jitter=True, alpha=0.7, marker="o", edgecolor="black", 
              linewidth=0.1, order=order, ax=axes[idx])

    # Set aesthetics for the subplot
    axes[idx].set_xlabel("")
    axes[idx].set_ylabel("Cells per mm²")
    axes[idx].set_title(f"{annotation}")
    axes[idx].legend(title="", loc="upper right")
    axes[idx].grid(axis="y", linestyle="--", alpha=0.7)

# Correct p-values for multiple comparisons using Bonferroni
_, corrected_p_values, _, _ = multipletests(p_values, method='fdr_bh')

# Display corrected p-values on the plots
for idx, annotation in enumerate(annotations):
    # Correct p-values
    p_val = corrected_p_values[idx]
    axes[idx].text(0.5, 0.95, f'P = {p_val:.3f}', transform=axes[idx].transAxes, ha='center', va='top', fontsize=10, color='black')

# Adjust layout to ensure labels and titles fit
plt.tight_layout()
#plt.savefig(output_dir.pdf', format='pdf')
# Show the figure
plt.show()

In [None]:
#Extended Figure 3A - Healthy section
def generate_spatial_plot(adata, sample_names, lvl2_key, lvl4_key, output_path, crop_coords=None):
    # Define the preferred order for Lvl1 categories
    category_order = ['Epithelial', 'Fibroblast', 'Vascular', 'Immune']

    # Identify unique clusters in Lvl4 grouped by ordered Lvl1 categories
    sorted_clusters = []
    for category in category_order:
        clusters = sorted(adata.obs.loc[adata.obs[lvl2_key] == category, lvl4_key].unique())
        sorted_clusters.extend(clusters)

    other_clusters = sorted(adata.obs.loc[~adata.obs[lvl2_key].isin(category_order), lvl4_key].unique())

    cmap = plt.colormaps.get_cmap("turbo")
    cluster_colors = {cluster: cmap(i / len(sorted_clusters)) for i, cluster in enumerate(sorted_clusters)}

    # Assign all non-selected clusters a grey color
    other_color = "#A0A0A0"
    for cluster in other_clusters:
        cluster_colors[cluster] = other_color

    for sample_name in sample_names:
        subset_adata = adata[adata.obs['imageid'] == sample_name]

        x_coords = subset_adata.obs['X_centroid'].values
        y_coords = subset_adata.obs['Y_centroid'].values

        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['X_centroid'].values
            y_coords = subset_adata.obs['Y_centroid'].values

        # Aspect ratio adjustment
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Assign colors based on Lvl4
        colors = subset_adata.obs[lvl4_key].astype(str).map(cluster_colors).fillna(other_color).values
        plt.scatter(x_coords, y_coords, c=colors, s=10.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path.format("Main"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

    # Generate a separate legend plot
    generate_legend_plot(cluster_colors, other_color, output_path.format("Legend"))

def generate_legend_plot(cluster_colors, other_color, legend_output_path):
    """Creates and saves a separate legend plot, showing only selected clusters and grouping others as 'Other'."""
    selected_clusters = {k: v for k, v in cluster_colors.items() if v != other_color}

    fig, ax = plt.subplots(figsize=(1, len(selected_clusters) * 0.3 + 0.6))  # Adjust size dynamically
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(selected_clusters) + 1)  # Extra space for 'Other'

    # Add clusters in preferred order
    for i, (cluster, color) in enumerate(selected_clusters.items()):
        ax.add_patch(mpatches.Rectangle((0, i), 1, 0.8, color=color, ec="black"))
        ax.text(1.1, i + 0.4, cluster, va="center", fontsize=8)

    # Add "Other" category in grey
    ax.add_patch(mpatches.Rectangle((0, len(selected_clusters)), 1, 0.8, color=other_color, ec="black"))
    ax.text(1.1, len(selected_clusters) + 0.4, "Other", va="center", fontsize=8, fontweight="bold")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis("off")
    
    #plt.savefig(legend_output_path, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

# Example usage
sample_names = ['H2a']
output_path = 'path_to_output_dir{}.pdf'
crop_coords = (386, 4386, 208, 3448)

generate_spatial_plot(adata, sample_names, 'Lvl1', 'Lvl4', output_path, crop_coords)

In [None]:
#Extended Figure 3A - Periodontitis section

def generate_spatial_plot(adata, sample_names, lvl2_key, lvl4_key, output_path, crop_coords=None):
    # Define the preferred order for Lvl1 categories
    category_order = ['Epithelial', 'Fibroblast', 'Vascular', 'Immune']

    # Identify unique clusters in Lvl4 grouped by ordered Lvl1 categories
    sorted_clusters = []
    for category in category_order:
        clusters = sorted(adata.obs.loc[adata.obs[lvl2_key] == category, lvl4_key].unique())
        sorted_clusters.extend(clusters)

    other_clusters = sorted(adata.obs.loc[~adata.obs[lvl2_key].isin(category_order), lvl4_key].unique())

    cmap = plt.colormaps.get_cmap("turbo")
    cluster_colors = {cluster: cmap(i / len(sorted_clusters)) for i, cluster in enumerate(sorted_clusters)}

    # Assign all non-selected clusters a grey color
    other_color = "#A0A0A0"
    for cluster in other_clusters:
        cluster_colors[cluster] = other_color

    for sample_name in sample_names:
        subset_adata = adata[adata.obs['imageid'] == sample_name]

        x_coords = subset_adata.obs['X_centroid'].values
        y_coords = subset_adata.obs['Y_centroid'].values

        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['X_centroid'].values
            y_coords = subset_adata.obs['Y_centroid'].values

        # Aspect ratio adjustment
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Assign colors based on Lvl4
        colors = subset_adata.obs[lvl4_key].astype(str).map(cluster_colors).fillna(other_color).values
        plt.scatter(x_coords, y_coords, c=colors, s=10.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path.format("Main"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

    # Generate a separate legend plot
    generate_legend_plot(cluster_colors, other_color, output_path.format("Legend"))

def generate_legend_plot(cluster_colors, other_color, legend_output_path):
    """Creates and saves a separate legend plot, showing only selected clusters and grouping others as 'Other'."""
    selected_clusters = {k: v for k, v in cluster_colors.items() if v != other_color}

    fig, ax = plt.subplots(figsize=(1, len(selected_clusters) * 0.3 + 0.6))  # Adjust size dynamically
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(selected_clusters) + 1)  # Extra space for 'Other'

    # Add clusters in preferred order
    for i, (cluster, color) in enumerate(selected_clusters.items()):
        ax.add_patch(mpatches.Rectangle((0, i), 1, 0.8, color=color, ec="black"))
        ax.text(1.1, i + 0.4, cluster, va="center", fontsize=8)

    # Add "Other" category in grey
    ax.add_patch(mpatches.Rectangle((0, len(selected_clusters)), 1, 0.8, color=other_color, ec="black"))
    ax.text(1.1, len(selected_clusters) + 0.4, "Other", va="center", fontsize=8, fontweight="bold")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis("off")
    
    #plt.savefig(legend_output_path, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

# Example usage
sample_names = ['P9a']
output_path = 'path_to_output_dir{}.pdf'
crop_coords = (232, 3472, 208, 4208)

generate_spatial_plot(adata, sample_names, 'Lvl1', 'Lvl4', output_path, crop_coords)

In [None]:
#Extended Figure 3B
plt.rcParams['font.family'] = 'Arial'
plt.rcParams['pdf.fonttype'] = 42  # Ensures fonts are embedded as text, not outlines
plt.rcParams['ps.fonttype'] = 42


# Define the subset of clusters you want to include and their desired order
clusters_of_interest = [
    'Ep.or.k', 'Ep.or.sp', 'Ep.or.b-pb',  'Ep.prol',  'Lang',  'T.IE',  'Ep.TA',  'Neut.IE',
 'Neut.CT',  'Neut.BV',  'Neut.S100a8-9',   'Tcyt.B.mix', 'T.cyt',  'Th.B.mix', 'T.mAPC.mix', 'T.h', 'mAPC', 'mAPC.BV',  'Thy1.CD45.cell',  'En.Im.mix', 'Mast', 'Fib', 'VEC', 'SMC',
'PB', 'Plasma',  'Pl.T.mix', 'B'
]


# Define the subset of genes you want to plot in the desired order
correct_order = [
    'Hoechst', 'CK5', 'PanCK',  'CD138', 'S100a8-9', 'Ki67',  'CK19', 
    'CD45', 'MPO', 'CD3', 'CD4', 'CD8a', 'CD20', 'HLA-DR', 'MCT',
    'Thy-1', 'aSMA', 'CD31', 'Vimentin'
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl4'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    use_raw =False,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl4',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8,
        figsize=(14, 6)  # Adjust width (first value) and height (second value)
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = 'path_to_output_dir.pdf'
#dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)

In [None]:
#Figure 4A - Healthy Oral epithelium
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 1499, 2172   # X range (columns)
y_min, y_max = 870, 1543   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H5" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'Ep.prol': "#D2042D",
    'Ep.or.sp': "#1434A4",
    'Ep.or.b-pb': "#1434A4",
    'Ep.or.k' : '#1434A4',
    'Lang' : '#C9CC3F',
    'T.IE' : '#00FFFF' ,
    'mAPC': '#93C572',
    'mAPC.BV' : '#93C572'    
}

# Define the default grey color for other clusters
default_cluster_color = "#a6a6a6"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl4'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl4'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"path_to_output_dir_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 4A - Healthy Tooth associated epithelium
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 5507, 6124    # X range (columns)
y_min, y_max = 865, 1482   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H5" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'Neut.IE': "#BF40BF",
    'Neut.CT': "#CF9FFF",
    'Neut.BV': "#6495ED",
    'Neut.S100a8-9' : '#DE3163',
    'Ep.TA' : '#702963',
}

# Define the default grey color for other clusters
default_cluster_color = "#a6a6a6"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl4'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl4'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"path_to_output_dir_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 4A - Periodontitis Oral epithelium
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 2837, 3510   # X range (columns)
y_min, y_max = 3432, 4105   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P9c" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'Ep.prol': "#D2042D",
    'Ep.or.sp': "#1434A4",
    'Ep.or.b-pb': "#1434A4",
    'Ep.or.k' : '#1434A4',
    'Lang' : '#C9CC3F',
    'T.IE' : '#00FFFF' ,
    'mAPC': '#93C572',
    'mAPC.BV' : '#93C572'    
}

# Define the default grey color for other clusters
default_cluster_color = "#a6a6a6"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl4'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl4'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"path_to_output_dir_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 4A - Periodontitis Tooth associated epithelium
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 1716, 2481   # X range (columns)
y_min, y_max = 3864, 4629   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P9b" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'Neut.IE': "#BF40BF",
    'Neut.CT': "#CF9FFF",
    'Neut.BV': "#6495ED",
    'Neut.S100a8-9' : '#DE3163',
    'Ep.TA' : '#702963',
}

# Define the default grey color for other clusters
default_cluster_color = "#a6a6a6"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['Lvl4'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['Lvl4'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"path_to_output_dir_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Extended Figure 4A - Oral Epithelial Niche - Health
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 438, 3441   # X range (columns)
y_min, y_max = 306, 1692   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H5" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'OE': 'magenta',
    'Epi-CT': '#C0C0C0',
}

# Define the default grey color for other clusters
default_cluster_color = "#454143"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['niche_merged'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['niche_merged'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=0.2,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"path_to_output_dir_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Extended Figure 4A - TA Epithelial Niche - Health
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 4206, 6495   # X range (columns)
y_min, y_max = 636, 1836   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H5" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'TAE': 'purple',
    'NeutCT': 'yellow',
}

# Define the default grey color for other clusters
default_cluster_color = "#454143"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['niche_merged'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['niche_merged'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=0.2,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Niche_composition_OE_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Extended Figure 4A - Oral Epithelial Niche - Periodontitis
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 1446, 4638   # X range (columns)
y_min, y_max = 3204, 4602   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P9c" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'OE': 'magenta',
    'Epi-CT': '#C0C0C0',
}

# Define the default grey color for other clusters
default_cluster_color = "#454143"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['niche_merged'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['niche_merged'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=0.2,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"path_to_output_dir_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Extended Figure 4A - TA Epithelial Niche - Periodontitis
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 1080, 2850   # X range (columns)
y_min, y_max = 3382, 6315   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P9b" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'TAE': 'purple',
    'NeutCT': 'yellow',
}

# Define the default grey color for other clusters
default_cluster_color = "#454143"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['niche_merged'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['niche_merged'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=0.2,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"path_to_output_dir_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Extended Figure 4B
# Convert AnnData obs to DataFrame
df = pd.DataFrame(adata.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts for all clusters and niches
niche_groups = df.groupby(['Lvl4', 'niche_merged']).size().unstack(fill_value=0)

# Define cluster (y-axis) and niche (x-axis) orders
cluster_order = ['Ep.or.k', 'Ep.or.sp',  'Ep.or.b-pb', 'Ep.prol', 'T.IE', 'Lang',  'Ep.TA', 'Neut.IE', 'Neut.CT', 'Neut.BV',  'Neut.S100a8-9']
niche_order = ['OE', 'Epi-CT', 'TAE', 'NeutCT']  # Niches to plot

# **Normalize using all niches, not just the selected ones**
niche_groups_total = niche_groups.sum(axis=1)  # Sum across ALL niches per cluster
niche_groups_relative = niche_groups.div(niche_groups_total, axis=0)  # Normalize

# **Filter only the selected niches for plotting**
niche_groups_relative_filtered = niche_groups_relative[niche_order]

# Reindex to ensure correct order
niche_groups_relative_filtered = niche_groups_relative_filtered.reindex(index=cluster_order, columns=niche_order)

# **Plot heatmap: Niches (x-axis) and Clusters (y-axis)**
plt.figure(figsize=(4, 5))
sns.heatmap(
    niche_groups_relative_filtered,  # Pre-filtered for plotting
    cmap='plasma',
    cbar_kws={'label': 'Relative Frequency'},
    linewidths=0.05, linecolor='black',
    vmax=0.7,
    vmin=0.05,
    xticklabels=niche_order,
    yticklabels=cluster_order
)
plt.title('Relative Abundance of Cell Clusters Across Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=90, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()

# Save output
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/IBEX_clusters_Epi_niches.pdf'
fig = plt.gcf()
#plt.savefig(output_path, dpi=300, format='pdf', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()


In [None]:
#Figure 5A
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 1500, 2900   # X range (columns)
y_min, y_max = 300, 2500   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H3" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'TAE': '#D3D3D3',
    'NeutCT': 'magenta',
    'T-B-APC': 'cyan',
    'Plasma': 'red',
}

# Define the default grey color for other clusters
default_cluster_color = "#454143"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['niche_merged'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['niche_merged'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"path_to_outpu_dir_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Figure 5D
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask.tiff')

# Specify the crop coordinates
x_min, x_max = 800, 3000   # X range (columns)
y_min, y_max = 1900, 3300   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P1a" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'TAE': '#D3D3D3',
    'NeutCT': 'magenta',
    'T-B-APC': 'cyan',
    'Plasma': 'red',
}

# Define the default grey color for other clusters
default_cluster_color = "#454143"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['niche_merged'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['niche_merged'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=1.5,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"path_to_outpu_dir_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
# Figure 5B and E (subsequently split into two  plots in Illustrator (one for health and one for periodontitis) and recolored according to Fig. 5A & D
# Load csv file with distances based on the distance from TAE tool
df_results = pd.read_csv("path_to_your_file.csv")


# Calculate means, standard deviations, and sample counts for each group
summary_stats = df_results_filtered.groupby(['Sample Type', 'Target Niche'], as_index=False).agg(
    {
        'Unique Cells Count': ['mean', 'std', 'count'],
        'Average Distance': ['mean', 'std', 'count']
    }
)

# Flatten the column hierarchy
summary_stats.columns = [
    'Sample Type', 'Target Niche', 
    'Unique Cells Count Mean', 'Unique Cells Count Std', 'Unique Cells Count N',
    'Average Distance Mean', 'Average Distance Std', 'Average Distance N'
]

# Calculate standard errors
summary_stats['Unique Cells Count SE'] = summary_stats['Unique Cells Count Std'] / np.sqrt(summary_stats['Unique Cells Count N'])
summary_stats['Average Distance SE'] = summary_stats['Average Distance Std'] / np.sqrt(summary_stats['Average Distance N'])

# Define marker styles and colors
marker_styles = {'Periodontitis': 's', 'Health': 'o'}
palette = sns.color_palette('Set2', n_colors=summary_stats['Target Niche'].nunique())

# Plot the means with error bars for Perio and Health samples (swapped axes)
plt.figure(figsize=(10, 6))

for idx, (_, row) in enumerate(summary_stats.iterrows()):
    plt.errorbar(
        x=row['Average Distance Mean'], 
        y=row['Unique Cells Count Mean'],
        xerr=row['Average Distance SE'],  # Use standard error for x
        yerr=row['Unique Cells Count SE'],  # Use standard error for y
        fmt=marker_styles[row['Sample Type']],
        color=palette[idx % len(palette)],
        label=f"{row['Target Niche']} ({row['Sample Type']})" if idx < len(palette) else "",
        markersize=8
    )

# Create separate legends for Sample Type and Target Niche
sample_type_handles = [plt.Line2D([0], [0], marker=marker_styles[s], color='w',
                                   markerfacecolor='gray', markersize=10, label=s)
                        for s in marker_styles]

niche_handles = [plt.Line2D([0], [0], marker='o', color='w',
                             markerfacecolor=palette[i], markersize=10,
                             label=niche)
                 for i, niche in enumerate(summary_stats['Target Niche'].unique())]

# Display legends
plt.legend(handles=niche_handles + sample_type_handles, 
           title='Target Niche & Sample Type', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.title('Average Distance from Crevicular Epithelium vs Cell Counts (with Error Bars)')
plt.xlabel('Average Distance from Crevicular Epithelium (pixels)')
plt.ylabel('Average Cell Counts')
plt.grid(True)
plt.tight_layout()
# Save the plot as an SVG file
output_path = f"path_to_output_dir.pdf"
plt.savefig(output_path, format='pdf', facecolor='black')
plt.show()

In [None]:
#Extended Figure 4D - Healthy section
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask')

# Specify the crop coordinates
x_min, x_max = 1910, 3254   # X range (columns)
y_min, y_max = 1555, 2899   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "H11b" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'FibCT': 'yellow',
    'T-B-APC': 'cyan',
    'Plasma': 'red',
    'BV' : 'orange'
}

# Define the default grey color for other clusters
default_cluster_color = "#454143"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['niche_merged'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['niche_merged'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=0.2,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"path_to_output_dir.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()

In [None]:
#Extended Figure 4D - Periodontitis section
# Load the segmentation mask
segmentation_mask = io.imread('path_to_segmentation_mask')

# Specify the crop coordinates
x_min, x_max = 1682, 3360   # X range (columns)
y_min, y_max = 4735, 6413   # Y range (rows)

# Crop the segmentation mask
cropped_segmentation_mask = segmentation_mask[y_min:y_max, x_min:x_max]

# Get the unique cell IDs from the cropped segmentation mask (excluding background)
unique_cell_ids = np.unique(cropped_segmentation_mask)
unique_cell_ids_no_background = unique_cell_ids[unique_cell_ids != 0]  # Exclude background (0)

# Specify the sample ID
sample_id = "P7" 

# Filter adata to include only the HV219B1 sample
adata_sample = adata[adata.obs['imageid'] == sample_id]
# Create the 'cell_index' column with sequential integers starting from 1
adata_sample.obs['cell_index'] = np.arange(1, len(adata_sample) + 1)

# Check the result to make sure the column has been added correctly
adata_sample.obs['cell_index'].unique().tolist() # Check the first few rows

# Create a subset of adata_sample where the cell_id is in the list of cell_ids_from_mask
adata_mask_subset = adata_sample[adata_sample.obs['cell_index'].isin(unique_cell_ids_no_background)].copy()

# Define the specific cluster colors
specific_cluster_colors = {
    'FibCT': 'yellow',
    'T-B-APC': 'cyan',
    'Plasma': 'red',
    'BV' : 'orange'
}

# Define the default grey color for other clusters
default_cluster_color = "#454143"  # Light grey

# Get the unique clusters present in the cropped area
present_clusters = adata_mask_subset.obs['niche_merged'].unique()

# Filter the colormap to only include the clusters present in the cropped area
filtered_cluster_colors = {cluster: specific_cluster_colors.get(cluster, default_cluster_color) 
                           for cluster in present_clusters}

# Print the filtered clusters and their colors
print(f"Filtered clusters: {list(filtered_cluster_colors.keys())}")

# Initialize an empty mask with the same shape as the cropped segmentation mask
colored_mask = np.full_like(cropped_segmentation_mask, fill_value=np.nan, dtype=float)

# Create a mapping from cluster names to their numeric label
cluster_label_to_numeric = {cluster: idx for idx, cluster in enumerate(filtered_cluster_colors.keys())}

# Iterate over each cell in the cropped segmentation mask and apply the appropriate color
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background cells
    
    # Get the corresponding row in `adata_mask_subset` to find the cluster
    row = adata_mask_subset.obs[adata_mask_subset.obs['cell_index'] == cell_id]
    
    if not row.empty:
        cluster_label = row['niche_merged'].values[0]
        if cluster_label in filtered_cluster_colors:
            # Map the cluster label to the corresponding color
            color_idx = cluster_label_to_numeric[cluster_label]
            colored_mask[cropped_segmentation_mask == cell_id] = color_idx

# Create a colormap from the filtered cluster colors
colormap = ListedColormap(list(filtered_cluster_colors.values()))

# Create a figure to plot
plt.figure(figsize=(13, 17), facecolor='black')

# Plot the colored mask (clusters)
plt.imshow(colored_mask, cmap=colormap, interpolation='nearest')

# Add contours to highlight cell boundaries
for cell_id in np.unique(cropped_segmentation_mask):
    if cell_id == 0:
        continue  # Skip background
    # Create a binary mask for each cell
    binary_mask = cropped_segmentation_mask == cell_id
    plt.contour(
        binary_mask,
        levels=[0.5],  # Contour level at 0.5 to highlight cell boundaries
        colors='grey',  # Grey contours for better visibility
        linewidths=0.2,  # Line width for contours
    )

# Hide the axes for better visual presentation
plt.axis('off')

# Save the plot as an SVG file
output_path = f"path_to_output_dir.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.show()