In [None]:
import scimap as sm
import pandas as pd
import sys
import os
import scanpy as sc
import seaborn as sns; sns.set(color_codes=True)
import anndata
adata = anndata.read_h5ad('/data/vasileiosionat2/Xenium/Drake_outputs/ccProcessed.h5ad')
adata.obs

In [None]:
perio_count = (adata.obs['status.3'] == 'healthy').sum()
print(perio_count)

In [None]:
!pip install openpyxl


In [None]:
import pandas as pd


# Save adata.var to an Excel file
adata.var.to_excel("/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Xenium_genes.xlsx")


In [None]:
print(adata.obs['total_area_mm2'].iloc[1000:1051].tolist())

In [None]:
adata.obs.columns.unique()

In [None]:
adata.obs['niche_knn50k10_merged'].unique()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define the rotation angle in degrees
    rotation_angle = 30
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad),  np.cos(rotation_rad)]
    ])
    
    # Horizontal flip matrix
    horizontal_flip_matrix = np.array([
        [1, 0],
        [0, 1]
    ])
    
    # Combined transformation matrix: Horizontal flip followed by rotation
    combined_matrix = rotation_matrix @ horizontal_flip_matrix

    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        # Subset the data for the current sample
        subset_adata = adata[adata.obs['sample'] == sample_name]

        # Get the coordinates
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply the combined transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = combined_matrix @ coords
        
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate the aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range

        # Set the figure size based on the aspect ratio
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate the scatter plot with adjusted coordinates
        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()

        # Set black background
        fig.patch.set_facecolor('black')
        ax.set_facecolor('black')

        scatter = plt.scatter(
            x=new_x_coords,
            y=new_y_coords,
            c=subset_adata.obs[cluster_key].map(cluster_colors),
            s=4.9  # Slightly smaller dot size
        )

        # Remove grids
        ax.grid(False)

        # Remove ticks and their labels
        ax.set_xticks([])
        ax.set_yticks([])

        # Set axis limits
        ax.set_xlim(new_x_coords.min(), new_x_coords.max())
        ax.set_ylim(new_y_coords.min(), new_y_coords.max())

        # Remove figure spines
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200  # μm

    # Calculate the starting and ending positions for the scale bar
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length

    # Place the scale bar slightly above the bottom of the plot
        scale_bar_y = new_y_coords.min() + 0.01 * (new_y_coords.max() - new_y_coords.min())

    # Plot the scale bar
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],  # X coordinates of the scale bar
            [scale_bar_y, scale_bar_y],  # Y coordinates (constant for a horizontal bar)
            color='white',  # Scale bar color
            linewidth=8     # Thickness of the scale bar
        )


        # Set DPI and save the plot with specific filename
        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='tiff', bbox_inches='tight', facecolor=fig.get_facecolor())  # Save the plot
        plt.show()
        plt.close()


# Example usage:
sample_names = ['s2r2_HV184']  # List of sample names
output_path = '/data/vasileiosionat2/Xenium/Figures/Cluster_tifs/Lvl1/{}.tif'

lvl1_cluster_colors = {
    'Immune': '#008000',
    'Fibroblast': '#0000FF',
    'Epithelial': '#FFEA00',
    'Vascular': '#990F0FFF',
    'Other': '#e5e5e5'
}
lvl1_cluster_key = 'Lvl1'

# Generate plots for level 1 clusters
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path.format("HV184C"))



In [None]:
adata.obs['Lvl2.5'].unique().tolist()

In [None]:
adata.obs.columns.unique().tolist()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path, crop_coords=None):
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Crop if coordinates are provided
        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values

        # Aspect ratio for figure size
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set white background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')


        # Draw points on top
        colors = subset_adata.obs[cluster_key].map(cluster_colors).fillna("#E5E5E5").values
        plt.scatter(x_coords, y_coords, c=colors, s=4.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()


# Example usage:
crop_coords = (2200, 3618, 180, 2074)
# Example usage:
sample_names = ['HV205B']  # List of sample names
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/HV205B_{}.pdf'

Epi_cluster_colors = {
    'Keratin': "#097969",
    'Spinous': "#BB5566FF",
    'Basal': "#004488FF",
    'Crev': "#FD7901FF",
    'Fib': "#d2e0b7",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'Neur': "#E5E5E5E5",
    'NK': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neutrophil': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}

Strom_cluster_colors = {
    'Keratin': "#e0cbb7",
    'Spinous': "#e0cbb7",
    'Basal': "#e0cbb7",
    'Crev': "#e0cbb7",
    'Mel': '#E5E5E5E5',
    'Fib': "#BB5566FF",
    'VEC': "#004488FF",
    'SMC': "#DDAA33FF",
    'NK': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neutrophil': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}

Lymphoid_cluster_colors = {
    'Keratin': "#e0cbb7",
    'Spinous': "#e0cbb7",
    'Basal': "#e0cbb7",
    'Crev': "#e0cbb7",
    'Mel': '#E5E5E5E5',
    'Fib': "#d2e0b7",
    'Neur': "#E5E5E5E5",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'NK': '#DDAA33FF',
    'T': '#DE3163',
    'Plasma': '#191970',
    'B': '#0FFF50',
    'Neutrophil': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}
  
Myeloid_cluster_colors = {
    'Keratin': "#e0cbb7",
    'Spinous': "#e0cbb7",
    'Basal': "#e0cbb7",
    'Crev': "#e0cbb7",
    'Mel': '#E5E5E5E5',
    'Fib': "#d2e0b7",
    'Neur': "#E5E5E5E5",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'NK': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neutrophil': '#AA336A',
    'mAPC': '#008000',
    'Mast': '#E4D00A',
    'Other': "#E5E5E5E5"
}
# Generate plots
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Epi_cluster_colors, output_path.format("Epi"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Strom_cluster_colors, output_path.format("Strom"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Lymphoid_cluster_colors, output_path.format("Lymphoid"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Myeloid_cluster_colors, output_path.format("Myeloid"), crop_coords)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 2200, 3618  # Horizontal bounds
deep_ymin, deep_ymax = 180, 2074  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
    'Fibrous CT' : 'red',
     'Epi-CT': '#00FFFF',
     'Plasma-Fib CT' : '#CF9FFF',
     'Plasma' : 'magenta' ,
     'Spinous' : '#5D3FD3',
     'Keratin' : '#0096FF' ,
     'Crevicular' : '#800080' ,
     'Lymphoid' : 'yellow'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#e1e1e1"

# Subset the AnnData object for the sample
sample_id = "HV205B"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['niche_knn50k10_merged'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['niche_knn50k10_merged'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['niche_knn50k10_merged'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['niche_knn50k10_merged'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 100  
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Niches_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path, crop_coords=None):
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Crop if coordinates are provided
        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values

        # Aspect ratio for figure size
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set white background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Draw points on top
        colors = subset_adata.obs[cluster_key].map(cluster_colors).fillna("#E5E5E5").values
        plt.scatter(x_coords, y_coords, c=colors, s=4.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()


# Example usage:
crop_coords = (2832, 4250, 480, 2374)
# Example usage:
sample_names = ['s1r3']  # List of sample names
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/HV214s_{}.pdf'

Epi_cluster_colors = {
    'Keratin': "#097969",
    'Spinous': "#BB5566FF",
    'Basal': "#004488FF",
    'Crev': "#FD7901FF",
    'Fib': "#d2e0b7",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'Neur': "#E5E5E5E5",
    'NK': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neutrophil': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}

Strom_cluster_colors = {
    'Keratin': "#e0cbb7",
    'Spinous': "#e0cbb7",
    'Basal': "#e0cbb7",
    'Crev': "#e0cbb7",
    'Mel': '#E5E5E5E5',
    'Fib': "#BB5566FF",
    'VEC': "#004488FF",
    'SMC': "#DDAA33FF",
    'NK': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neutrophil': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}

Lymphoid_cluster_colors = {
    'Keratin': "#e0cbb7",
    'Spinous': "#e0cbb7",
    'Basal': "#e0cbb7",
    'Crev': "#e0cbb7",
    'Mel': '#E5E5E5E5',
    'Fib': "#d2e0b7",
    'Neur': "#E5E5E5E5",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'NK': '#DDAA33FF',
    'T': '#DE3163',
    'Plasma': '#191970',
    'B': '#0FFF50',
    'Neutrophil': '#b7d2e0',
    'mAPC': '#b7d2e0',
    'Mast': '#b7d2e0',
    'Other': "#E5E5E5E5"
}
  
Myeloid_cluster_colors = {
    'Keratin': "#e0cbb7",
    'Spinous': "#e0cbb7",
    'Basal': "#e0cbb7",
    'Crev': "#e0cbb7",
    'Mel': '#E5E5E5E5',
    'Fib': "#d2e0b7",
    'Neur': "#E5E5E5E5",
    'VEC': "#e0b7b7",
    'SMC': "#e0b7b7",
    'NK': '#b7d2e0',
    'T': '#b7d2e0',
    'Plasma': '#b7d2e0',
    'B': '#b7d2e0',
    'Neutrophil': '#AA336A',
    'mAPC': '#008000',
    'Mast': '#E4D00A',
    'Other': "#E5E5E5E5"
}
# Generate plots
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Epi_cluster_colors, output_path.format("Epi"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Strom_cluster_colors, output_path.format("Strom"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Lymphoid_cluster_colors, output_path.format("Lymphoid"), crop_coords)
generate_spatial_plot(adata, sample_names, 'Lvl2.5', Myeloid_cluster_colors, output_path.format("Myeloid"), crop_coords)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 2832, 4250,  # Horizontal bounds
deep_ymin, deep_ymax = 480, 2374  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
    'Fibrous CT' : 'red',
     'Epi-CT': '#00FFFF',
     'Plasma-Fib CT' : '#CF9FFF',
     'Plasma' : 'magenta' ,
     'Spinous' : '#5D3FD3',
     'Keratin' : '#0096FF' ,
     'Crevicular' : '#800080' ,
     'Lymphoid' : 'yellow'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#e1e1e1"

# Subset the AnnData object for the sample
sample_id = "s1r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['niche_knn50k10_merged'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['niche_knn50k10_merged'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['niche_knn50k10_merged'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['niche_knn50k10_merged'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 100  
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Niches_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()

In [None]:
adata.obs['sample'].unique().tolist()

In [None]:
import matplotlib.pyplot as plt
import scimap.plotting as sm
import matplotlib.ticker as mtick
from matplotlib.lines import Line2D

# List of SampleIDs to exclude
excluded_sample_ids = ['s2r9_a', 's2r9_b', 'HV140A_C', 'HV140A_D', 's2r4_a', 's2r4_b', 's2r5', 's2r8_HV188_a', 's2r8_HV188_b', 's2r8_HV211', 
                       'HV192A_A', 'HV192A_B','HV192A_C']

# Create a subset of the AnnData object excluding the specific SampleIDs
adata_subset = adata[~adata.obs['sample'].isin(excluded_sample_ids)].copy()

# Create a new figure with custom dimensions
plt.figure(figsize=(50, 6))  # Adjust width and height as needed

# Define the specific order of major_cluster categories
anticipated_order = [ 'Spinous', 'Keratin', 'Crevicular',  'Fibrous CT',  'Epi-CT', 'Lymphoid', 'Plasma-Fib CT',  'Plasma']

# Reorder the major_cluster column in the copied AnnData object
adata_subset.obs['niche_knn50k10_merged'] = adata_subset.obs['niche_knn50k10_merged'].astype('category')
adata_subset.obs['niche_knn50k10_merged'] = adata_subset.obs['niche_knn50k10_merged'].cat.reorder_categories(anticipated_order)
adata_subset.obs = adata_subset.obs.sort_values('niche_knn50k10_merged')

# Define custom colors for each major_cluster category
colors = {
    'Fibrous CT' : 'red',
     'Epi-CT': '#00FFFF',
     'Plasma-Fib CT' : '#CF9FFF',
     'Plasma' : 'magenta' ,
     'Spinous' : '#5D3FD3',
     'Keratin' : '#0096FF' ,
     'Crevicular' : '#800080' ,
     'Lymphoid' : 'yellow'
    
}

# Plot the stacked bar plot with specified colors
plot = sm.stacked_barplot(adata_subset, x_axis='status.3', y_axis='niche_knn50k10_merged',
                          order_yaxis=anticipated_order, method='percent', plot_tool='matplotlib', color=colors, figsize=(2, 6))

# Get the current axes and set its background color to white
plt.gca().set_facecolor('white')

# Format y-axis ticks as percentages
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))

# Get the x-axis limits
x_min, x_max = plt.xlim()

# Plot horizontal dotted lines at y = 0.25, 0.5, and 0.75
plt.hlines(y=[0, 0.25, 0.5, 0.75, 1], xmin=x_min, xmax=x_max, linestyle='--', colors='grey')

# Set y-axis tick labels to match the percentages
plt.yticks([0, 0.25, 0.5, 0.75, 1], ['0%', '25%', '50%', '75%', '100%'])

# Modify the borders
plt.gca().spines['top'].set_visible(False)       # Make top border invisible
plt.gca().spines['right'].set_visible(False)     # Make right border invisible
plt.gca().spines['bottom'].set_color('black')    # Set bottom border color to black
plt.gca().spines['left'].set_color('black')      # Set left border color to black

# Adjust y-axis limits to start a little above the lower edge of the outline
y_min, y_max = plt.ylim()
plt.ylim(y_min - 0.03, y_max)

# Remove 'clinical_status' from x-axis title
plt.xlabel('')

# Increase font size of x-axis tick labels (clinical_status categories)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)  # Adjust font size as needed

# Rotate x-axis labels diagonally
plt.xticks(rotation=45, ha='right')

# Custom legend handles as circles, sorted by anticipated_order in reverse
sorted_legend_handles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[category], markersize=10) 
                         for category in reversed(anticipated_order)]
sorted_legend_labels = reversed(anticipated_order)

# Add legend with sorted custom handles and labels outside the plot
legend = plt.legend(sorted_legend_handles, sorted_legend_labels, loc='upper left', bbox_to_anchor=(1, 1), markerscale=1, frameon=False, borderpad=2)

# Save the plot as TIFF file
plt.savefig('/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_niche_stacked_barplot.pdf', format='pdf')

# Display the plot
plt.show()


In [None]:
adata.obs['Lvl4'].unique().tolist()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'adata.obs' is available and contains 'niche_knn30k11' and 'spatial_cluster'
df = pd.DataFrame(adata.obs)
df['niche_knn50k10_merged'] = df['niche_knn50k10_merged'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['niche_knn50k10_merged', 'Lvl4']).size().unstack(fill_value=0)

# Transpose the DataFrame to swap axes
niche_groups = niche_groups.T

# Define custom order for the x-axis (after transposing, so this is the niche clusters now)
custom_order = ['Keratin', 'Spinous', 'Epi-CT', 'Crevicular',  'Fibrous CT', 'Plasma-Fib CT',  'Plasma', 'Lymphoid']

# Reindex columns based on the custom order
niche_groups = niche_groups.reindex(columns=custom_order)

# Define subset of clusters for the y-axis
y_axis_clusters = ['Ep.K',  'Ep.Sp', 'Ep.PB',   'Ep.B-PB',  'Ep.prol', 
                   'Ep.B',   'Mel',  'Lang',  'T.IE', 'cDC2', 
                   'Ep.Cr.B',  'Ep.Cr.PB',  'Ep.Cr.Sp.1',  'Ep.Cr.Sp.2',  'Fib.1',  'Fib.2',  'Fib.3',  'Fib.4',  'Fib.5', 'MyoF',
                    'VEC.1', 'VEC.2', 'VEC.3', 'VEC.4', 'VEC.5',   'SMC', 'LEC', 'Neur',  'Mac', 'Mast', 'Mono', 'pDC',  'cDC1',
                     'Pl.1', 'Pl.2',  'PB', 'NK', 'Tc', 'Treg', 'mregDC',    'Th_proximity_to_B', 'Th', 'T.IL7Rhi',
 'T_proximity_to_Fib', 'B']
niche_groups_filtered = niche_groups.loc[y_axis_clusters]

# Plot heatmap with enhanced colormap and custom vmax
plt.figure(figsize=(9, 16))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(
    niche_groups_filtered, 
    cmap='coolwarm', 
    cbar_kws={'label': 'Count', 'ticks': [0, 500, 1000, 2000], 'format': '%.0f'}, 
    linewidths=0.05, 
    linecolor='black', 
    vmax=3000
)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Type')
plt.xticks(rotation=90, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 500, 1000, 2000])
cbar.ax.set_yticklabels(['0', '500', '1,000', '>2,000'])  # Set custom tick labels

plt.tight_layout()  # Ensures all elements fit within the figure area
# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Lvl4_Clusters_Niches_Xenium.pdf"
#plt.savefig(output_path, format='pdf')
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'adata.obs' is available and contains 'niche_knn30k11' and 'spatial_cluster'
df = pd.DataFrame(adata.obs)
df['niche_knn50k10_merged'] = df['niche_knn50k10_merged'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['niche_knn50k10_merged', 'Lvl2.5']).size().unstack(fill_value=0)

# Transpose the DataFrame to swap axes
niche_groups = niche_groups.T

# Define custom order for the x-axis (after transposing, so this is the niche clusters now)
custom_order = ['Crevicular', 'Keratin', 'Spinous',  'Epi-CT',  'Fibrous CT', 'Plasma-Fib CT',  'Plasma', 'Lymphoid']

# Reindex columns based on the custom order
niche_groups = niche_groups.reindex(columns=custom_order)

# Define subset of clusters for the y-axis
y_axis_clusters = ['Crev', 'Keratin', 'Spinous', 'Basal', 'Fib', 'VEC', 'SMC', 'mAPC', 'Mast', 'Plasma',  'T', 'B']
niche_groups_filtered = niche_groups.loc[y_axis_clusters]

# Plot heatmap with enhanced colormap and custom vmax
plt.figure(figsize=(9, 16))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap( niche_groups_filtered, 
    cmap='coolwarm', 
    cbar_kws={'label': 'Count', 'ticks': [0, 2000, 5000, 10000], 'format': '%.0f'}, 
    linewidths=0.05, 
    linecolor='black', 
    vmax=10000
)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Type')
plt.xticks(rotation=90, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 2000, 5000, 10000])
cbar.ax.set_yticklabels(['0', '2000', '5,000', '>10,000'])  # Set custom tick labels

plt.tight_layout()  # Ensures all elements fit within the figure area
# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Lvl2-3_Clusters_Niches_Xenium.pdf"
plt.savefig(output_path, format='pdf')
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np  # You need this for np.log1p

# Create DataFrame from adata.obs
df = pd.DataFrame(adata.obs)
df['niche_knn50k10_merged'] = df['niche_knn50k10_merged'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['niche_knn50k10_merged', 'Lvl2.5']).size().unstack(fill_value=0)

# Transpose the DataFrame
niche_groups = niche_groups.T

# Custom x-axis (niche) order
custom_order = ['Keratin', 'Spinous', 'Epi-CT', 'TAE',  
                'Im-Str', 'FibCT', 'Plasma', 'T-B-APC']
niche_groups = niche_groups.reindex(columns=custom_order)

# Custom y-axis (cell type) selection
y_axis_clusters = ['Keratin', 'Spinous', 'Basal', 'TAE', 'APC', 'Mast', 'Fibroblast', 'VEC', 'SMC', 'Plasma', 'B', 'T']
niche_groups_filtered = niche_groups.loc[y_axis_clusters]

# Transpose so clusters are columns (x-axis), and niches are rows (y-axis)
niche_groups_flipped = niche_groups_filtered.T  # Now niches are rows, clusters are columns

# Apply log1p transformation
niche_groups_log = niche_groups_flipped.applymap(lambda x: np.log1p(x))

# Plot heatmap
plt.figure(figsize=(7, 5))
sns.heatmap(
    niche_groups_log,
    vmax=np.log1p(20000), 
    vmin=np.log1p(2000),       # log(0+1)
    cmap='plasma',
    cbar_kws={'label': 'log(Count + 1)'},
    linewidths=0.05,
    linecolor='black'
)

plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Type')
plt.xticks(rotation=90, ha='right')
plt.yticks(rotation=0)

# Adjust colorbar ticks (log scale)
cbar = plt.gca().collections[0].colorbar
log_ticks = [np.log1p(2000), np.log1p(10000), np.log1p(20000)]
cbar.set_ticks(log_ticks)
cbar.ax.set_yticklabels(['2000', '10,000', '20,000'])

plt.tight_layout()
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_LV3_Clusters_Niches_Log.pdf"
plt.savefig(output_path, format='pdf')
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert AnnData obs to DataFrame
df = pd.DataFrame(adata.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['Lvl4', 'niche_knn50k10_merged']).size().unstack(fill_value=0)

# Manually specify which clusters (x-axis) and niches (y-axis) to show, with order
selected_clusters = ['Ep.K',  'Ep.Sp', 'Ep.PB',   'Ep.B-PB',  'Ep.prol', 
                   'Ep.B',   'Mel',  'Lang',  'T.IE', 'cDC2', 
                   'Ep.Cr.B',  'Ep.Cr.PB',  'Ep.Cr.Sp.1',  'Ep.Cr.Sp.2',  'Fib.1',  'Fib.2',  'Fib.3',  'Fib.4',  'Fib.5', 'MyoF',
                    'VEC.1', 'VEC.2', 'VEC.3', 'VEC.4', 'VEC.5',   'SMC', 'LEC', 'Neur',  'Mac', 'Mast', 'Mono', 'pDC',  'cDC1',
                     'Pl.1', 'Pl.2',  'PB', 'NK', 'Tc', 'Treg', 'mregDC',    'Th_proximity_to_B', 'Th', 'T.IL7Rhi',
 'T_proximity_to_Fib', 'B']
selected_niches = ['Keratin', 'Spinous', 'Epi-CT', 'Crevicular',  'Fibrous CT', 'Plasma-Fib CT',  'Plasma', 'Lymphoid']  # Replace with actual niche names

# Ensure selected clusters and niches exist in the DataFrame
niche_groups_filtered = niche_groups.loc[
    niche_groups.index.intersection(selected_clusters),  # Use intersection() to filter selected clusters
    niche_groups.columns.intersection(selected_niches)   # Use intersection() to filter selected niches
]

# Calculate relative frequencies across all niches, then select the relevant ones
niche_groups_total = niche_groups.sum(axis=1)  # Sum across all niches for each cluster
niche_groups_relative = niche_groups.div(niche_groups_total, axis=0)  # Compute relative frequencies

# Filter the relative frequencies based on selected clusters and niches
niche_groups_relative_filtered = niche_groups_relative.loc[
    niche_groups_relative.index.intersection(selected_clusters),
    niche_groups_relative.columns.intersection(selected_niches)
]

# Specify the order of clusters and niches
cluster_order = ['Ep.K',  'Ep.Sp', 'Ep.PB',   'Ep.B-PB',  'Ep.prol', 
                   'Ep.B',   'Mel',  'Lang',  'T.IE', 'cDC2', 
                   'Ep.Cr.B',  'Ep.Cr.PB',  'Ep.Cr.Sp.1',  'Ep.Cr.Sp.2',  'Fib.1',  'Fib.2',  'Fib.3',  'Fib.4',  'Fib.5', 'MyoF',
                    'VEC.1', 'VEC.2', 'VEC.3', 'VEC.4', 'VEC.5',   'SMC', 'LEC', 'Neur',  'Mac', 'Mast', 'Mono', 'pDC',  'cDC1',
                     'Pl.1', 'Pl.2',  'PB', 'NK', 'Tc', 'Treg', 'mregDC',    'Th_proximity_to_B', 'Th', 'T.IL7Rhi',
 'T_proximity_to_Fib', 'B']  # Adjust the order as needed

niche_order = ['Keratin', 'Spinous', 'Epi-CT', 'Crevicular',  'Fibrous CT', 'Plasma-Fib CT',  'Plasma', 'Lymphoid']  # Adjust the order as needed

# Plot heatmap with niches on y-axis and clusters on x-axis
plt.figure(figsize=(16, 6))  # Adjusted figsize for readability
sns.heatmap(
    niche_groups_relative_filtered[niche_order].T[cluster_order], cmap='coolwarm',  # Apply niche_order on y-axis and cluster_order on x-axis
    cbar_kws={'label': 'Relative Frequency'}, 
    linewidths=0.05, linecolor='black',
    vmax=0.6,
    xticklabels=cluster_order,  # Apply cluster_order on x-axis
    yticklabels=niche_order  # Apply niche_order on y-axis
)
plt.title('Relative Cell Type Distribution in Niches')
plt.xlabel('Cell Cluster')
plt.ylabel('Niche')
plt.xticks(rotation=45, ha='right')  # Improved alignment
plt.yticks(rotation=0)
plt.tight_layout()  # Prevents clipping
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_perio_clusters_niches.svg'
fig = plt.gcf()  # Get current figure
#plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define the rotation angle in degrees
    rotation_angle = -10
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad),  np.cos(rotation_rad)]
    ])
    
    # Horizontal flip matrix
    horizontal_flip_matrix = np.array([
        [1, 0],
        [0, 1]
    ])
    
    # Combined transformation matrix: Horizontal flip followed by rotation
    combined_matrix = rotation_matrix @ horizontal_flip_matrix

    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        # Subset the data for the current sample
        subset_adata = adata[adata.obs['sample'] == sample_name]

        # Get the coordinates
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply the combined transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = combined_matrix @ coords
        
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate the aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range

        # Set the figure size based on the aspect ratio
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate the scatter plot with adjusted coordinates
        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()

        # Set black background
        fig.patch.set_facecolor('black')
        ax.set_facecolor('black')

        scatter = plt.scatter(
            x=new_x_coords,
            y=new_y_coords,
            c=subset_adata.obs[cluster_key].map(cluster_colors),
            s=4.9  # Slightly smaller dot size
        )

        # Remove grids
        ax.grid(False)

        # Remove ticks and their labels
        ax.set_xticks([])
        ax.set_yticks([])

        # Set axis limits
        ax.set_xlim(new_x_coords.min(), new_x_coords.max())
        ax.set_ylim(new_y_coords.min(), new_y_coords.max())

        # Remove figure spines
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200  # μm

    # Calculate the starting and ending positions for the scale bar
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length

    # Place the scale bar slightly above the bottom of the plot
        scale_bar_y = new_y_coords.min() + 0.01 * (new_y_coords.max() - new_y_coords.min())

    # Plot the scale bar
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],  # X coordinates of the scale bar
            [scale_bar_y, scale_bar_y],  # Y coordinates (constant for a horizontal bar)
            color='white',  # Scale bar color
            linewidth=8     # Thickness of the scale bar
        )


        # Set DPI and save the plot with specific filename
        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='tiff', bbox_inches='tight', facecolor=fig.get_facecolor())  # Save the plot
        plt.show()
        plt.close()


# Example usage:
sample_names = ['s2r1_HV207']  # List of sample names
output_path = '/data/vasileiosionat2/Xenium/Figures/Cluster_tifs/Lvl1/{}.tif'

lvl1_cluster_colors = {
    'Immune': '#008000',
    'Fibroblast': '#0000FF',
    'Epithelial': '#FFEA00',
    'Vascular': '#990F0FFF',
    'Other': '#e5e5e5'
}
lvl1_cluster_key = 'Lvl1'

# Generate plots for level 1 clusters
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path.format("HV207"))



In [None]:
adata.obs.columns

In [None]:
adata.obs['sample'].unique().tolist()

In [None]:
adata.obs['pt.id'].unique().tolist()

In [None]:
import matplotlib.pyplot as plt
import scimap.plotting as sm
import matplotlib.ticker as mtick
from matplotlib.lines import Line2D

# List of SampleIDs to exclude
excluded_sample_ids = ['s2r9_a', 's2r9_b', 'HV140A_C', 'HV140A_D', 's2r4_a', 's2r4_b', 's2r5', 's2r8_HV188_a', 's2r8_HV188_b', 's2r8_HV211', 
                       'HV192A_A', 'HV192A_B','HV192A_C']

# Create a subset of the AnnData object excluding the specific SampleIDs
adata_subset = adata[~adata.obs['sample'].isin(excluded_sample_ids)].copy()

# Create a new figure with custom dimensions
plt.figure(figsize=(50, 6))  # Adjust width and height as needed

# Define the specific order of major_cluster categories
anticipated_order = ['Other', 'Epithelial', 'Immune', 'Fibroblast', 'Vascular']

# Reorder the major_cluster column in the copied AnnData object
adata_subset.obs['Lvl1'] = adata_subset.obs['Lvl1'].astype('category')
adata_subset.obs['Lvl1'] = adata_subset.obs['Lvl1'].cat.reorder_categories(anticipated_order)
adata_subset.obs = adata_subset.obs.sort_values('Lvl1')

# Define custom colors for each major_cluster category
colors = {
    'Immune': '#008000',
    'Fibroblast': '#0000FF',
    'Epithelial': '#FFEA00',
    'Vascular': '#990F0FFF',
    'Other': '#e5e5e5'
}

# Plot the stacked bar plot with specified colors
plot = sm.stacked_barplot(adata_subset, x_axis='status.3', y_axis='Lvl1',
                          order_yaxis=anticipated_order, method='percent', plot_tool='matplotlib', color=colors, figsize=(2, 6))

# Get the current axes and set its background color to white
plt.gca().set_facecolor('white')

# Format y-axis ticks as percentages
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))

# Get the x-axis limits
x_min, x_max = plt.xlim()

# Plot horizontal dotted lines at y = 0.25, 0.5, and 0.75
plt.hlines(y=[0, 0.25, 0.5, 0.75, 1], xmin=x_min, xmax=x_max, linestyle='--', colors='grey')

# Set y-axis tick labels to match the percentages
plt.yticks([0, 0.25, 0.5, 0.75, 1], ['0%', '25%', '50%', '75%', '100%'])

# Modify the borders
plt.gca().spines['top'].set_visible(False)       # Make top border invisible
plt.gca().spines['right'].set_visible(False)     # Make right border invisible
plt.gca().spines['bottom'].set_color('black')    # Set bottom border color to black
plt.gca().spines['left'].set_color('black')      # Set left border color to black

# Adjust y-axis limits to start a little above the lower edge of the outline
y_min, y_max = plt.ylim()
plt.ylim(y_min - 0.03, y_max)

# Remove 'clinical_status' from x-axis title
plt.xlabel('')

# Increase font size of x-axis tick labels (clinical_status categories)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)  # Adjust font size as needed

# Rotate x-axis labels diagonally
plt.xticks(rotation=45, ha='right')

# Custom legend handles as circles, sorted by anticipated_order in reverse
sorted_legend_handles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[category], markersize=10) 
                         for category in reversed(anticipated_order)]
sorted_legend_labels = reversed(anticipated_order)

# Add legend with sorted custom handles and labels outside the plot
legend = plt.legend(sorted_legend_handles, sorted_legend_labels, loc='upper left', bbox_to_anchor=(1, 1), markerscale=1, frameon=False, borderpad=2)

# Save the plot as TIFF file
plt.savefig('/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure1/Xenium_stacked_barplot.svg', format='svg')

# Display the plot
plt.show()


In [None]:
import scanpy as sc

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = ['Epithelial', 'Fibroblast', 'Immune', 'Vascular']

# Define the subset of genes you want to plot in the desired order
correct_order = [
    'KRT19', 'EHF', 'KRT5', 
    'COL5A2', 'VCAN', 'PCOLCE', 
    'CYTIP', 'PTPRC', 'CXCR4',
    'CD34', 'EGFL7', 'VWF'
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl1'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl1'] = adata_subset.obs['Lvl1'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl1',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8                       # Set a max value for the scale
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure1/Xenium_Lvl1_dotplot.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Load the compressed CSV file
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Preview the data
print(data.head())

# Group the data by 'cell_id'
grouped = data.groupby('cell_id')

# Set the style for a black background
plt.style.use('dark_background')

# Create the plot
plt.figure(figsize=(12, 10))

# Plot each mask
for cell_id, group in grouped:
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    # Plot the mask with grey fill and black contour
    plt.fill(x, y, color='darkgrey', edgecolor='white', linewidth=0.5, alpha=0.7)

# Customize the plot
plt.title("Nuclei Masks (Full Dataset)")
plt.xlabel("X Coordinate")
plt.ylabel("Y Coordinate")
plt.axis("equal")  # Ensure equal aspect ratio
plt.show()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Load the compressed CSV file
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Preview the data
print(data.head())

# Group the data by 'cell_id'
grouped = data.groupby('cell_id')

# Set the style for a black background
plt.style.use('dark_background')

# Create the plot
plt.figure(figsize=(12, 10))

# Plot each mask
for cell_id, group in grouped:
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    # Plot the mask with grey fill and black contour
    plt.fill(x, y, color='darkgrey', edgecolor='white', linewidth=0.5, alpha=0.7)

# Customize the plot
plt.title("Nuclei Masks (Full Dataset)")
plt.xlabel("X Coordinate")
plt.ylabel("Y Coordinate")
plt.axis("equal")  # Ensure equal aspect ratio
plt.show()

In [None]:
import pandas as pd

# Load the compressed CSV file
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r2_HV184/nucleus_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Preview the data
print(data.head())


In [None]:
import pandas as pd

# Load the compressed CSV file
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r2_HV184/transcripts.csv.gz"
data1 = pd.read_csv(csv_path)

# Preview the data
print(data1.head())

In [None]:
xmin, xmax = 4500, 5000  # Horizontal bounds
ymin, ymax = 2500, 3000  # Vertical bounds


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/nucleus_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
total_xmin, total_xmax = 200, 3800  # Horizontal bounds
total_ymin, total_ymax = 200, 2500  # Vertical bounds

total_vertices = data[
    (data['vertex_x'] >= total_xmin) & (data['vertex_x'] <= total_xmax) &
    (data['vertex_y'] >= total_ymin) & (data['vertex_y'] <= total_ymax)
]
total_cell_ids = total_vertices['cell_id'].unique()
total_data = data[data['cell_id'].isin(total_cell_ids)]
grouped = total_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
unique_cluster_colors = {
    'Epi': "red",     # Smooth Muscle Cells (SMC) - Red
    'CT': "blue",
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#E5E5E5"

# Subset the AnnData object for the sample
sample_id = "HV205B"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(total_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['region'].to_dict()
# Add a cluster column to the filtered CSV data
total_data = total_vertices.copy()
total_data['region'] = total_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
total_data = total_data.dropna(subset=['region'])

# Group by cell_id and plot each mask
plt.figure(figsize=(36, 23))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in total_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['region'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])

# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(total_xmin, total_xmax)
plt.ylim(total_ymin, total_ymax)

# Add the scale bar
scale_bar_length_px = 200  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Define the ROIs and add squares
rois = [
    (2800, 3075, 1300, 1575),  # Lymphoid ROI
    (2300, 2575, 1000, 1275),  # Deep ROI
    (2800, 3075, 225, 500),    # Oral ROI
    (2925, 3200, 1725, 2000)   # Crevicular ROI
]

# Draw the squares with no fill and white outline
for xmin, xmax, ymin, ymax in rois:
    plt.gca().add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, 
                                      edgecolor='white', facecolor='none', linewidth=2))

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Total_Segmentation_Mask_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black', bbox_inches='tight')

# Show the plot
plt.show()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/nucleus_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
total_xmin, total_xmax = 2832, 4250  # Horizontal bounds
total_ymin, total_ymax = 480, 2374  # Vertical bounds

total_vertices = data[
    (data['vertex_x'] >= total_xmin) & (data['vertex_x'] <= total_xmax) &
    (data['vertex_y'] >= total_ymin) & (data['vertex_y'] <= total_ymax)
]
total_cell_ids = total_vertices['cell_id'].unique()
total_data = data[data['cell_id'].isin(total_cell_ids)]
grouped = total_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
unique_cluster_colors = {
    'Epi': "red",     # Smooth Muscle Cells (SMC) - Red
    'CT': "blue",
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#E5E5E5"

# Subset the AnnData object for the sample
sample_id = "s1r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(total_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['region'].to_dict()
# Add a cluster column to the filtered CSV data
total_data = total_vertices.copy()
total_data['region'] = total_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
total_data = total_data.dropna(subset=['region'])

# Group by cell_id and plot each mask
plt.figure(figsize=(36, 23))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in total_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['region'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])

# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(total_xmin, total_xmax)
plt.ylim(total_ymin, total_ymax)

# Add the scale bar
scale_bar_length_px = 200  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Define the ROIs and add squares
rois = [
    (3300, 3575, 900, 1175),  # Lymphoid ROI
    (2875, 3150, 1400, 1675),  # Deep ROI
    (3475, 3750, 1800, 2075),    # Oral ROI
    (3800, 4075, 750, 1025)   # Crevicular ROI
]

# Draw the squares with no fill and white outline
for xmin, xmax, ymin, ymax in rois:
    plt.gca().add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, 
                                      edgecolor='white', facecolor='none', linewidth=2))

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Total_Segmentation_Mask_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black', bbox_inches='tight')

# Show the plot
plt.show()

In [None]:
print(total_data)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/nucleus_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
crev_xmin, crev_xmax = 2925, 3200  # Horizontal bounds
crev_ymin, crev_ymax = 1725, 2000  # Vertical bounds

crev_vertices = data[
    (data['vertex_x'] >= crev_xmin) & (data['vertex_x'] <= crev_xmax) &
    (data['vertex_y'] >= crev_ymin) & (data['vertex_y'] <= crev_ymax)
]
crev_cell_ids = crev_vertices['cell_id'].unique()
crev_data = data[data['cell_id'].isin(crev_cell_ids)]
grouped = crev_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
unique_cluster_colors = {
    'Crev': "#702963",     
    'VEC': "#FFAC1C",
    'Fib': "#FFEA00",     
    'mAPC': "#097969",    
}
# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#BEBEBE"

# Subset the AnnData object for the sample
sample_id = "HV205B"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(crev_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl2.5'].to_dict()

# Add a cluster column to the filtered CSV data
crev_data = crev_vertices.copy()
crev_data['Lvl2.5'] = crev_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
crev_data = crev_data.dropna(subset=['Lvl2.5'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in crev_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl2.5'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(crev_xmin, crev_xmax)
plt.ylim(crev_ymin, crev_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Crev_Segmentation_Mask_{sample_id}.svg"
# Save the plot as an SVG file with a black background
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()



In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/nucleus_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
crev_xmin, crev_xmax = 3800, 4075  # Horizontal bounds
crev_ymin, crev_ymax = 750, 1025  # Vertical bounds

crev_vertices = data[
    (data['vertex_x'] >= crev_xmin) & (data['vertex_x'] <= crev_xmax) &
    (data['vertex_y'] >= crev_ymin) & (data['vertex_y'] <= crev_ymax)
]
crev_cell_ids = crev_vertices['cell_id'].unique()
crev_data = data[data['cell_id'].isin(crev_cell_ids)]
grouped = crev_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
unique_cluster_colors = {
    'Crev': "#702963",     
    'VEC': "#FFAC1C",
    'Fib': "#FFEA00",     
    'mAPC': "#097969",    
}
# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#BEBEBE"

# Subset the AnnData object for the sample
sample_id = "s1r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(crev_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl2.5'].to_dict()

# Add a cluster column to the filtered CSV data
crev_data = crev_vertices.copy()
crev_data['Lvl2.5'] = crev_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
crev_data = crev_data.dropna(subset=['Lvl2.5'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in crev_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl2.5'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(crev_xmin, crev_xmax)
plt.ylim(crev_ymin, crev_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Crev_Segmentation_Mask_{sample_id}.svg"
# Save the plot as an SVG file with a black background
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()



In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/nucleus_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
oral_xmin, oral_xmax = 2800, 3075  # Horizontal bounds
oral_ymin, oral_ymax = 225, 500  # Vertical bounds

oral_vertices = data[
    (data['vertex_x'] >= oral_xmin) & (data['vertex_x'] <= oral_xmax) &
    (data['vertex_y'] >= oral_ymin) & (data['vertex_y'] <= oral_ymax)
]
oral_cell_ids = oral_vertices['cell_id'].unique()
oral_data = data[data['cell_id'].isin(oral_cell_ids)]
grouped = oral_data.groupby('cell_id')


# Define the color mapping for the specific clusters you want to color uniquely
unique_cluster_colors = {
    'Keratin': "#A7C7E7",
    'Spinous': "#5D3FD3",
    'Basal': "#0096FF",
    'mAPC': "#097969"
}


# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#BEBEBE"

# Subset the AnnData object for the sample
sample_id = "HV205B"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(oral_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl2.5'].to_dict()

# Add a cluster column to the filtered CSV data
oral_data = oral_vertices.copy()
oral_data['Lvl2.5'] = oral_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
oral_data = oral_data.dropna(subset=['Lvl2.5'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in oral_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl2.5'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(oral_xmin, oral_xmax)
plt.ylim(oral_ymin, oral_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Oral_Segmentation_Mask_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/nucleus_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
oral_xmin, oral_xmax = 3475, 3750  # Horizontal bounds
oral_ymin, oral_ymax = 1800, 2075  # Vertical bounds

oral_vertices = data[
    (data['vertex_x'] >= oral_xmin) & (data['vertex_x'] <= oral_xmax) &
    (data['vertex_y'] >= oral_ymin) & (data['vertex_y'] <= oral_ymax)
]
oral_cell_ids = oral_vertices['cell_id'].unique()
oral_data = data[data['cell_id'].isin(oral_cell_ids)]
grouped = oral_data.groupby('cell_id')


# Define the color mapping for the specific clusters you want to color uniquely
unique_cluster_colors = {
    'Keratin': "#A7C7E7",
    'Spinous': "#5D3FD3",
    'Basal': "#0096FF",
    'mAPC': "#097969"
}


# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#BEBEBE"

# Subset the AnnData object for the sample
sample_id = "s1r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(oral_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl2.5'].to_dict()

# Add a cluster column to the filtered CSV data
oral_data = oral_vertices.copy()
oral_data['Lvl2.5'] = oral_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
oral_data = oral_data.dropna(subset=['Lvl2.5'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in oral_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl2.5'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(oral_xmin, oral_xmax)
plt.ylim(oral_ymin, oral_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Oral_Segmentation_Mask_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 2300, 2575  # Horizontal bounds
deep_ymin, deep_ymax = 1000, 1275  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
    'Fib': "#FFEA00",
    'VEC': "#FFAC1C",
    'mAPC': "#097969",
    'Plasma' : '#7F00FF',
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#BEBEBE"

# Subset the AnnData object for the sample
sample_id = "HV205B"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl2.5'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl2.5'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl2.5'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl2.5'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Deep_Segmentation_Mask_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 2875, 3150  # Horizontal bounds
deep_ymin, deep_ymax = 1400, 1675  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
    'Fib': "#FFEA00",
    'VEC': "#FFAC1C",
    'mAPC': "#097969",
    'Plasma' : '#7F00FF',
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#BEBEBE"

# Subset the AnnData object for the sample
sample_id = "s1r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl2.5'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl2.5'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl2.5'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl2.5'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Deep_Segmentation_Mask_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/nucleus_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
lymph_xmin, lymph_xmax = 2800, 3075  # Horizontal bounds
lymph_ymin, lymph_ymax = 1300, 1575  # Vertical bounds

lymph_vertices = data[
    (data['vertex_x'] >= lymph_xmin) & (data['vertex_x'] <= lymph_xmax) &
    (data['vertex_y'] >= lymph_ymin) & (data['vertex_y'] <= lymph_ymax)
]
lymph_cell_ids = lymph_vertices['cell_id'].unique()
lymph_data = data[data['cell_id'].isin(lymph_cell_ids)]
grouped = lymph_data.groupby('cell_id')


unique_cluster_colors = {
    'Plasma': "#7F00FF",   # Plasma Cells - Purple
    'T': "#8B0000",       # T Cells - Dark Red
    'mAPC': "#097969",    # Macrophages (Mac) - Dark Orange
    'B': '#CF9FFF',
    'Fib': '#FFEA00'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#E5E5E5"

# Subset the AnnData object for the sample
sample_id = "HV205B"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(lymph_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl2.5'].to_dict()

# Add a cluster column to the filtered CSV data
lymph_data = lymph_vertices.copy()
lymph_data['Lvl2.5'] = lymph_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
lymph_data = lymph_data.dropna(subset=['Lvl2.5'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in lymph_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl2.5'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(lymph_xmin, lymph_xmax)
plt.ylim(lymph_ymin, lymph_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Lymph_Segmentation_Mask_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/nucleus_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
lymph_xmin, lymph_xmax = 3300, 3575  # Horizontal bounds
lymph_ymin, lymph_ymax = 900, 1175  # Vertical bounds

lymph_vertices = data[
    (data['vertex_x'] >= lymph_xmin) & (data['vertex_x'] <= lymph_xmax) &
    (data['vertex_y'] >= lymph_ymin) & (data['vertex_y'] <= lymph_ymax)
]
lymph_cell_ids = lymph_vertices['cell_id'].unique()
lymph_data = data[data['cell_id'].isin(lymph_cell_ids)]
grouped = lymph_data.groupby('cell_id')


unique_cluster_colors = {
    'Plasma': "#7F00FF",   # Plasma Cells - Purple
    'T': "#8B0000",       # T Cells - Dark Red
    'mAPC': "#097969",    # Macrophages (Mac) - Dark Orange
    'B': '#CF9FFF',
    'Fib': '#FFEA00'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#E5E5E5"

# Subset the AnnData object for the sample
sample_id = "s1r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(lymph_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl2.5'].to_dict()

# Add a cluster column to the filtered CSV data
lymph_data = lymph_vertices.copy()
lymph_data['Lvl2.5'] = lymph_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
lymph_data = lymph_data.dropna(subset=['Lvl2.5'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in lymph_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl2.5'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(lymph_xmin, lymph_xmax)
plt.ylim(lymph_ymin, lymph_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Lymph_Segmentation_Mask_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
adata.obs['Lvl2.5'].unique().tolist()

In [None]:
adata_cropped.obs.columns.unique()

In [None]:
total_data['region'] = total_data['cell_id'].map(adata_cropped.obs['region'])
print(total_data[['cell_id', 'region']].head())

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/nucleus_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

# Define the boundaries of the cropped area
xmin, xmax = 2800, 3075  # Horizontal bounds
ymin, ymax = 225, 500   # Vertical bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = ["KRT19", "KRT5", "HLA-DRB5", "CNFN"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "KRT19": "#5D3FD3",  
    "HLA-DRB5": "#0FFF50",  
    "KRT5": "#0096FF",
    "CNFN": "#40E0D0",
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=1)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=2, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Oral_transcripts_Mask_HV205B.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/nucleus_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

# Define the boundaries of the cropped area
xmin, xmax = 3475, 3750  # Horizontal bounds
ymin, ymax = 1800, 2075

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = ["KRT19", "KRT5", "HLA-DRB5", "CNFN"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "KRT19": "#5D3FD3",  
    "HLA-DRB5": "#0FFF50",  
    "KRT5": "#0096FF",
    "CNFN": "#40E0D0",
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=1)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=2, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Oral_transcripts_Mask_HV214As.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/nucleus_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

# Define the boundaries of the cropped area
xmin, xmax = 2300, 2575
ymin, ymax =1000, 1275  # Horizontal bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = ["VCAN", "VWF", "PRDM1", "HLA-DRB5"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "VCAN": "#FF5F1F",  
    "VWF": "#FFFF00",  
    "PRDM1": "#00FFFF",
    "HLA-DRB5": "#0FFF50"
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=0.25)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=2, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Deep_transcripts_Mask_HV205B.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/cell_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

# Define the boundaries of the cropped area
xmin, xmax = 2875, 3150  # Horizontal bounds
ymin, ymax = 1400, 1675  # Vertical bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = ["VCAN", "VWF", "PRDM1", "HLA-DRB5"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "VCAN": "#FF5F1F",  
    "VWF": "#FFFF00",  
    "PRDM1": "#00FFFF",
    "HLA-DRB5": "#0FFF50"
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=0.25)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=2, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Deep_transcripts_Mask_HV214Bs.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/nucleus_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

# Define the boundaries of the cropped area
xmin, xmax = 2925, 3200  # Horizontal bounds
ymin, ymax = 1725, 2000  # Horizontal bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]


# Filter the specific transcripts you want to plot
transcript_of_interest = ["ODAM", "VCAN", "VWF", "HLA-DRB5"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "ODAM": "#DA70D6",  
    "VCAN": "#FF5F1F",  
    "HLA-DRB5": "#0FFF50",  
    "VWF": "#FFFF00",
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=0.25)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=2, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Crev_transcripts_Mask_HV205B.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r3_HV214As/cell_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

# Define the boundaries of the cropped area
xmin, xmax = 3800, 4075  # Horizontal bounds
ymin, ymax = 750, 1025  # Horizontal bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = ["ODAM", "VCAN", "VWF", "HLA-DRB5"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "ODAM": "#DA70D6",  
    "VCAN": "#FF5F1F",  
    "HLA-DRB5": "#0FFF50",  
    "VWF": "#FFFF00",
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=0.25)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=2, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Crev_transcripts_Mask_HV214Bs.svg"
plt.savefig(output_path, format='svg', facecolor='black', bbox_inches='tight')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV205B/nucleus_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

# Define the boundaries of the cropped area
xmin, xmax = 2800, 3075  # Horizontal bounds
ymin, ymax = 1300, 1575  # Vertical bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = [ "TRAC", "BANK1", "HLA-DRB5", "PRDM1"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    'BANK1': '#F9F6EE',
    "TRAC": "#E0115F",  
    "HLA-DRB5": "#0FFF50",  
    "PRDM1": "#00FFFF",
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=0.25)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=2, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Lymph_transcripts_Mask_HV205B.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
adata.obs['Lvl2.5'].unique().tolist()

In [None]:
adata_cropped.obs.columns.unique()

In [None]:
total_data['region'] = total_data['cell_id'].map(adata_cropped.obs['region'])
print(total_data[['cell_id', 'region']].head())

In [None]:
import scanpy as sc

# Step 1: Ensure observation names are unique
adata.obs_names_make_unique()

# Step 1: Subset the adata object to only the specific clusters and create a copy
desired_clusters = [ 'Plasma', 'T', 'VEC', 'Fib', 'mAPC', 'Mast', 'SMC', 'Basal', 'Spinous', 'Keratin', 'NK', 'Neur', 'Crev', 'B', 'Mel']

adata_subset = adata[adata.obs['Lvl2.5'].isin(desired_clusters)].copy()

# Ensure 'Lvl2.5' is categorical
adata_subset.obs['Lvl2.5'] = adata_subset.obs['Lvl2.5'].astype('category')

# Step 2: Check if the data is already log-transformed, otherwise transform it
if 'log1p' not in adata_subset.uns_keys():
    sc.pp.log1p(adata_subset)  # Apply log transformation if not already done

# Manually set the base in log1p if missing
if 'log1p' in adata_subset.uns_keys() and 'base' not in adata_subset.uns['log1p']:
    adata_subset.uns['log1p']['base'] = 2.718281828459045  # Default to e

# Step 3: Perform differential expression analysis for the selected clusters
sc.tl.rank_genes_groups(adata_subset, 'Lvl2.5', method='wilcoxon', use_raw=False)

# Step 4: Extract top marker genes for the selected clusters
top_genes_per_cluster = {}
for cluster in adata_subset.obs['Lvl2.5'].cat.categories:
    top_genes_per_cluster[cluster] = adata_subset.uns['rank_genes_groups']['names'][cluster][:10]

# Step 5: Generate the dendrogram to get cluster order
sc.tl.dendrogram(adata_subset, groupby='Lvl2.5')

# Step 6: Retrieve the cluster order based on the dendrogram
cluster_order = adata_subset.uns['dendrogram_Lvl2.5']['categories_ordered']

# Step 7: Ensure each cluster has at least 3 unique genes
unique_genes = set()
genes_for_plot = []

for cluster in cluster_order:
    if cluster in top_genes_per_cluster:
        cluster_genes = top_genes_per_cluster[cluster]
        genes_for_this_cluster = []
        for gene in cluster_genes:
            if len(genes_for_this_cluster) >= 5:
                break
            if gene not in unique_genes:
                unique_genes.add(gene)
                genes_for_this_cluster.append(gene)
        genes_for_plot.extend(genes_for_this_cluster)

# Step 8: Plot the matrixplot with the unique genes
sc.pl.dotplot(adata_subset, var_names=genes_for_plot, groupby='Lvl2.5', 
                 dendrogram=True, use_raw=False, cmap="vlag", 
                 standard_scale='var', swap_axes=True)
print(top_genes_per_cluster)

In [None]:
import scanpy as sc
# Start plotting with black background
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams['font.family'] = 'Arial'
plt.rcParams['pdf.fonttype'] = 42  # Ensures fonts are embedded as text, not outlines
plt.rcParams['ps.fonttype'] = 42

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = ['Keratin', 'Spinous', 'Basal', 'Crev', 'Plasma', 'T', 'B', 'mAPC', 'VEC', 'Mast',  'Fib', 'SMC']

# Define the subset of genes you want to plot in the desired order
correct_order = [
    'CNFN', 'EHF', 'AQP3', 
    'KRT19', 'SDC1', 'KRT5', 'COL17A1', 
    'ODAM',    'PRDM1', 'MZB1', 'TRAC', 'CD3E',
    'BANK1', 'HLA-DRB5', 'MS4A6A', 'CD14',
    'CD34', 'VWF', 'CPA3', 'MS4A2',  'VCAN', 'COL16A1', 'MYH11', 'ACTA2'
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl2.5'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl2.5'] = adata_subset.obs['Lvl2.5'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl2.5',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8, # Set a max value for the scale
    figsize=(6, 10)
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Lvl2.5_dotplot.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)


In [None]:
adata.obs['niche_knn50k10_merged'].unique().tolist()

In [None]:
import scanpy as sc

# Define the replacement dictionary
replacement_dict = {
    'Crevicular':'TAE',
    'Plasma-Fib CT' : 'Im-Str',
    'Fibrous CT': 'FibCT',
    'Lymphoid' : 'T-B-APC',
}

# Replace values in the 'niche_knn30k11' column
adata.obs['niche_knn50k10_merged'] = adata.obs['niche_knn50k10_merged'].replace(replacement_dict)

# Print unique values after replacement
print(adata.obs['niche_knn50k10_merged'].unique().tolist())

In [None]:
adata.obs['Lvl2.5'].unique().tolist()

In [None]:
import scanpy as sc

# Define the replacement dictionary
replacement_dict = {
    'Fib':'Fibroblast',
    'mAPC' : 'APC',
    'Crev': 'TAE',
}

# Replace values in the 'niche_knn30k11' column
adata.obs['Lvl2.5'] = adata.obs['Lvl2.5'].replace(replacement_dict)

# Print unique values after replacement
print(adata.obs['Lvl2.5'].unique().tolist())

In [None]:
import scanpy as sc
# Start plotting with black background
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams['font.family'] = 'Arial'
plt.rcParams['pdf.fonttype'] = 42  # Ensures fonts are embedded as text, not outlines
plt.rcParams['ps.fonttype'] = 42

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = ['Keratin', 'Spinous', 'Basal', 'TAE', 'APC', 'Mast', 'Fibroblast', 'VEC', 'SMC', 'Plasma', 'B', 'T']

# Define the subset of genes you want to plot in the desired order
correct_order = [
    'CNFN', 'EHF', 'AQP3', 
    'KRT19', 'SDC1', 'KRT5', 'COL17A1', 
    'ODAM',   'HLA-DRB5', 'MS4A6A', 'CD14', 'CPA3', 'MS4A2',  'VCAN', 'COL16A1',
    'CD34', 'VWF', 'MYH11', 'ACTA2', 'PRDM1', 'MZB1',
    'BANK1', 'TRAC', 'CD3E'
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl2.5'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl2.5'] = adata_subset.obs['Lvl2.5'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl2.5',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8, # Set a max value for the scale
    figsize=(6, 7)
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Lvl2.5_dotplot.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)


In [None]:
adata.obs['Lvl2'].unique().tolist()

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import ranksums
from statsmodels.stats.multitest import multipletests


# Extract indices for Crevicular and Oral groups
crevicular_indices = adata.obs['Lvl2'] == 'Crevicular Epi'
oral_indices = adata.obs['Lvl2'] == 'Oral Epi'

# Extract expression data for Crevicular and Oral groups
crev_expr = adata[crevicular_indices, :].X.toarray()
oral_expr = adata[oral_indices, :].X.toarray()

# Initialize lists to store results
gene_names = []  # List for gene names
log2fc = []      # List for log2 fold changes
p_values = []     # List for p-values

# Assuming the gene names are stored in the column headers of crev_expr and oral_expr
# You can adjust this depending on how your gene names are stored in your data
gene_names = adata.var_names.tolist()

# Perform Wilcoxon Rank-Sum test for each gene
for gene in range(crev_expr.shape[1]):  # Iterate over genes (columns)
    crev_gene_expression = crev_expr[:, gene]  # Expression data for gene in Crevicular Epi
    oral_gene_expression = oral_expr[:, gene]  # Expression data for gene in Oral Epi

    # Wilcoxon Rank-Sum Test (Mann-Whitney U Test)
    stat, p_val = ranksums(crev_gene_expression, oral_gene_expression)

    # Log2 Fold Change (mean of Crevicular Epi / Oral Epi)
    mean_crev = np.mean(crev_gene_expression)
    mean_oral = np.mean(oral_gene_expression)
    log2_fold_change = np.log2(mean_crev / mean_oral)  # Crevicular Epi vs Oral Epi fold change
    
    # Store results
    log2fc.append(log2_fold_change)
    p_values.append(p_val)

# Step 3: Adjust p-values for multiple testing using Benjamini-Hochberg correction
_, p_values_adj, _, _ = multipletests(p_values, method='fdr_bh')

# Step 4: Create a DataFrame with the results
dge_results = pd.DataFrame({
    'Gene Name': gene_names,
    'Log2 Fold Change (Crev vs Oral)': log2fc,
    'P-Value': p_values,
    'Adjusted P-Value (FDR)': p_values_adj
})

# Step 5: Display the top significant genes (optional, based on FDR)
# You can filter based on your preferred significance threshold (e.g., FDR < 0.05)
significant_genes = dge_results[dge_results['Adjusted P-Value (FDR)'] < 0.01]

# Display results
print(dge_results.head())  # Show the first few results
print(f"Number of significant genes (FDR < 0.01): {len(significant_genes)}")

In [None]:
# Assuming dge_results is the dataframe containing statistics results (with 'pvalue' column)
pval_0 = dge_results[dge_results['Adjusted P-Value (FDR)'] == 0]

In [None]:
# Creating a jittered list of very small p-values around 1e-300
pval_jitter = np.round(np.random.normal(loc=295, scale=5, size=pval_0.shape[0]), 0)
pval_jitter = 1 * 10 ** -pval_jitter

In [None]:
dge_results.loc[dge_results['Adjusted P-Value (FDR)'] == 0, 'Adjusted P-Value (FDR)'] = pval_jitter

In [None]:
dge_results

In [None]:
# Filter for rows with fold change > 2 or < -2
filtered_genes = most_significant[(most_significant['Log2 Fold Change (Crev vs Oral)'] > 2) | 
                                  (most_significant['Log2 Fold Change (Crev vs Oral)'] < -2)]

print("\nFiltered Most Significant Genes:")
print(filtered_genes[['Gene Name', 'Log2 Fold Change (Crev vs Oral)', 'Adjusted P-Value (FDR)']])

In [None]:
# Print the row corresponding to the 'ODAM' gene
odam_row = dge_results.loc[dge_results['Gene Name'] == 'ODAM']
print(odam_row)

In [None]:
!pip install adjustText

In [None]:
from brokenaxes import brokenaxes
import matplotlib.pyplot as plt
import numpy as np
from adjustText import adjust_text

# List of genes you want to annotate
genes_of_interest = ['ICAM1', 'LAMC2', "ODAM", "FGFBP1", "ODAPH", "SAA2", "CXCL2", "SAA1", "CXCL1", "CSF3", "ADAM28", "SERPINB2", "CXCL14", "KLK11", "CFHR3", "C3", "CXCL6"]

# Define your data (replace these with your actual data)
log2fc_values = dge_results['Log2 Fold Change (Crev vs Oral)']
pvalues = dge_results['Adjusted P-Value (FDR)']
gene_names = dge_results['Gene Name']

# Transform p-values to -log10 scale and handle 0-values
log10_pvalues = -np.log10(np.maximum(pvalues, 1e-300))

# Create a figure with broken y-axis, starting at ymin=50
fig = plt.figure(figsize=(10, 8))
bax = brokenaxes(ylims=((-1, 85), (280, 310)), hspace=0.02)

# Define colors and annotations
colors = []
texts = []
for i, (log2fc, log10_pval, gene_name) in enumerate(zip(log2fc_values, log10_pvalues, gene_names)):
    # Determine color
    if log2fc < -2:
        colors.append('blue')
    elif log2fc > 2:
        colors.append('red')
    else:
        colors.append('grey')

    # Add text annotations only for genes of interest
    if gene_name in genes_of_interest:
        # Determine which axes segment the point belongs to
        for ax in bax.axs:
            xlim, ylim = ax.get_xlim(), ax.get_ylim()
            if ylim[0] < log10_pval < ylim[1]:
                # Add annotation to the correct sub-plot
                texts.append(ax.text(log2fc, log10_pval, gene_name, fontsize=9, 
                                     alpha=0.9, color='black', weight='bold', ha='right'))
                break

# Scatter plot
bax.scatter(log2fc_values, log10_pvalues, c=colors, alpha=0.7)

# Adjust text to avoid overlaps
adjust_text(texts, only_move={'points': 'xy', 'text': 'xy'}, 
            arrowprops=dict(arrowstyle="->", color='black'))

# Labels and Title
bax.set_xlabel('Log2 Fold Change', fontsize=14, color='black')
bax.set_ylabel('-Log10(Adjusted P-Value)', fontsize=14)
bax.fig.suptitle('Volcano Plot: Log2 Fold Change vs Adjusted P-Value (FDR)', fontsize=16)

# Save the plot
plt.savefig('/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure3/OralvsCrevicular_Volcano.svg', format='svg')

# Show the plot
plt.show()




In [None]:
import scanpy as sc
# Start plotting with black background
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams['font.family'] = 'Arial'
plt.rcParams['pdf.fonttype'] = 42  # Ensures fonts are embedded as text, not outlines
plt.rcParams['ps.fonttype'] = 42

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = ['Ep.B.1',  'Ep.B.2', 'Ep.B-PB', 'Ep.PB.1', 'Ep.PB.2', 'Ep.PB.3', 'Ep.prol', 'Ep.Sp', 'Ep.K', 
                        'Ep.Cr.Sp.1', 'Ep.Cr.Sp.2', 'Ep.Cr.B', 'Ep.Cr.PB']

# Define the subset of genes you want to plot in the desired order
correct_order = [
   'KRT5', 'COL17A1', 'DST', 'ITGB4', 'CXCL14', 'C1R', 'IL1R2',
      'CAVIN1', 'PDPN', 'BASP1',
'SLC26A2', 'MYC', 'CFH', 'PTN',
'MKI67', 'CDK1', 'AQP3', 'KRT19', 'FGFBP1', 'S100A16', 'LY6D', 'SDC1', 
  'CYP4B1', 'MAMDC2', 'SERPINB3', 'NOD2',
'S100A16', 'SLPI', 'IL1RN', 'ANXA1',
'C15orf48', 'IL36A','CNFN', 'IL36G',
 'SERPINB2', 'EHF',  'ERBB2', 'ODAM', 'ODAPH', 'LAMC2', 'CXCL1', 'CXCL2', 'CXCL6', 'SLPI',
'IL1A', 'IL36G', 'C15orf48', 'ANXA1', 'DNASE1L3',
'COL17A1', 'DST', 'C1R', 'ITGB4', 'SAA1', 'SAA2',
'CYP4B1', 'CFH', 'EHF',  'CLEC7A', 'CFHR3'
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl5'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl5'] = adata_subset.obs['Lvl5'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl5',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8,                       # Set a max value for the scale
    figsize=(20, 4)

)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=False)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8,                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Total_Epi_Lvl5_dotplot.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)


In [None]:
import scanpy as sc
# Start plotting with black background
import matplotlib.pyplot as plt
import numpy as np

plt.style.use('default')  # Set the background to black

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = ['Ep.Cr.Sp.1', 'Ep.Cr.Sp.2', 'Ep.Cr.B', 'Ep.Cr.PB']

# Define the subset of genes you want to plot in the desired order
correct_order = [
'ODAM', 'ODAPH', 'LAMC2', 'CXCL1', 'CXCL2', 'CXCL6', 'SLPI',
'IL1A', 'IL36G', 'C15orf48', 'ANXA1', 'DNASE1L3',
'COL17A1', 'DST', 'C1R', 'ITGB4', 'SAA1', 'SAA2',
'CYP4B1', 'CFH', 'EHF', 'KRT19', 'CLEC7A', 'CFHR3'
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl4'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl4',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8                       # Set a max value for the scale
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)
plt.rcParams.update({'font.size': 14, 'text.color': 'black', 'font.weight': 'normal', 'axes.spines.right' : False, 'axes.spines.top': False, 'axes.linewidth': 2})


# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Crev_Epi_Lvl4_dotplot.svg'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)


In [None]:
import scanpy as sc

# Duplicate the Lvl4 column
adata.obs['Epithelial_layers'] = adata.obs['Lvl4'].copy()

# Step 1: Inspect and ensure unique values
unique_categories = adata.obs['Epithelial_layers'].unique()
print("Original Categories in 'Lvl4':", unique_categories)

In [None]:
# Step 1: Duplicate the column
adata.obs['Epithelial_layers'] = adata.obs['Lvl4'].copy()

# Step 2: Convert the new column to categorical type
adata.obs['Epithelial_layers'] = adata.obs['Epithelial_layers'].astype('category')

# Step 3: Rename categories using a mapping
category_mapping = {
    'Ep.Cr.B': 'Ep.Cr.lower',
    'Ep.Cr.PB': 'Ep.Cr.lower',
    'Ep.Cr.Sp.1': 'Ep.Cr.upper',
    'Ep.Cr.Sp.2': 'Ep.Cr.upper',
    'Ep.B.1': 'Ep.Oral.lower',
    'Ep.B.2': 'Ep.Oral.lower',
    'Ep.B.3': 'Ep.Oral.lower',
    'Ep.B.4': 'Ep.Oral.lower',
    'Ep.B-PB': 'Ep.Oral.lower',
    'Ep.PB.1': 'Ep.Oral.lower',
    'Ep.PB.2': 'Ep.Oral.lower',
    'Ep.PB.3': 'Ep.Oral.lower',
    'Ep.PB.4': 'Ep.Oral.lower',
    'Ep.prol': 'Ep.Oral.lower',
    'Ep.Sp': 'Ep.Oral.upper',
    'Ep.K': 'Ep.Oral.upper'

}

# Map the values manually
adata.obs['Epithelial_layers'] = adata.obs['Epithelial_layers'].map(category_mapping).fillna(adata.obs['Epithelial_layers'])

# Step 4: Remove unused categories
adata.obs['Epithelial_layers'] = adata.obs['Epithelial_layers'].astype('category')

# Step 5: Verify the changes
print("Updated Categories in 'Epithelial_layers':", adata.obs['Epithelial_layers'].cat.categories)


In [None]:
adata.obs['Epithelial_layers'].unique().tolist()

In [None]:
adata.obs['Lvl4'].unique().tolist()

In [None]:
import scanpy as sc

# Step 1: Ensure observation names are unique
adata.obs_names_make_unique()

# Step 1: Subset the adata object to only the specific clusters and create a copy
desired_clusters = ['Ep.Oral.upper', 'Ep.Oral.lower', 'Ep.Cr.upper', 'Ep.Cr.lower']

adata_subset = adata[adata.obs['Epithelial_layers'].isin(desired_clusters)].copy()

# Ensure 'Lvl4' is categorical
adata_subset.obs['Epithelial_layers'] = adata_subset.obs['Epithelial_layers'].astype('category')

# Step 2: Check if the data is already log-transformed, otherwise transform it
if 'log1p' not in adata_subset.uns_keys():
    sc.pp.log1p(adata_subset)  # Apply log transformation if not already done

# Manually set the base in log1p if missing
if 'log1p' in adata_subset.uns_keys() and 'base' not in adata_subset.uns['log1p']:
    adata_subset.uns['log1p']['base'] = 2.718281828459045  # Default to e

# Step 3: Perform differential expression analysis for the selected clusters
sc.tl.rank_genes_groups(adata_subset, 'Epithelial_layers', method='wilcoxon', use_raw=False)

# Step 4: Extract top marker genes for the selected clusters
top_genes_per_cluster = {}
for cluster in adata_subset.obs['Epithelial_layers'].cat.categories:
    top_genes_per_cluster[cluster] = adata_subset.uns['rank_genes_groups']['names'][cluster]

# Step 5: Generate the dendrogram to get cluster order
sc.tl.dendrogram(adata_subset, groupby='Epithelial_layers')

# Step 6: Retrieve the cluster order based on the dendrogram
cluster_order = adata_subset.uns['dendrogram_Epithelial_layers']['categories_ordered']

# Step 7: Ensure each cluster has at least 3 unique genes
unique_genes = set()
genes_for_plot = []

for cluster in cluster_order:
    if cluster in top_genes_per_cluster:
        cluster_genes = top_genes_per_cluster[cluster]
        genes_for_this_cluster = []
        for gene in cluster_genes:
            if len(genes_for_this_cluster) >= 10:
                break
            if gene not in unique_genes:
                unique_genes.add(gene)
                genes_for_this_cluster.append(gene)
        genes_for_plot.extend(genes_for_this_cluster)

# Step 8: Plot the matrixplot with the unique genes
sc.pl.dotplot(adata_subset, var_names=genes_for_plot, groupby='Epithelial_layers', 
                 dendrogram=True, use_raw=False, cmap="vlag", 
                 standard_scale='var', swap_axes=True)
print(top_genes_per_cluster)

In [None]:
import scanpy as sc

# Step 1: Ensure observation names are unique
adata.obs_names_make_unique()

# Step 1: Subset the adata object to only the specific clusters and create a copy
desired_clusters = ['Ep.B.1',  'Ep.B.2', 'Ep.B-PB', 'Ep.PB.1', 'Ep.PB.2', 'Ep.PB.3', 'Ep.prol', 'Ep.Sp', 'Ep.K', 
                        'Ep.Cr.Sp.1', 'Ep.Cr.Sp.2', 'Ep.Cr.B', 'Ep.Cr.PB']

adata_subset = adata[adata.obs['Lvl5'].isin(desired_clusters)].copy()

# Ensure 'Lvl4' is categorical
adata_subset.obs['Lvl5'] = adata_subset.obs['Lvl5'].astype('category')

# Step 2: Check if the data is already log-transformed, otherwise transform it
if 'log1p' not in adata_subset.uns_keys():
    sc.pp.log1p(adata_subset)  # Apply log transformation if not already done

# Manually set the base in log1p if missing
if 'log1p' in adata_subset.uns_keys() and 'base' not in adata_subset.uns['log1p']:
    adata_subset.uns['log1p']['base'] = 2.718281828459045  # Default to e

# Step 3: Perform differential expression analysis for the selected clusters
sc.tl.rank_genes_groups(adata_subset, 'Lvl5', method='wilcoxon', use_raw=False)

# Step 4: Extract top marker genes for the selected clusters
top_genes_per_cluster = {}
for cluster in adata_subset.obs['Lvl5'].cat.categories:
    top_genes_per_cluster[cluster] = adata_subset.uns['rank_genes_groups']['names'][cluster]

# Step 5: Generate the dendrogram to get cluster order
sc.tl.dendrogram(adata_subset, groupby='Lvl5')

# Step 6: Retrieve the cluster order based on the dendrogram
cluster_order = adata_subset.uns['dendrogram_Lvl5']['categories_ordered']

# Step 7: Ensure each cluster has at least 3 unique genes
unique_genes = set()
genes_for_plot = []

for cluster in cluster_order:
    if cluster in top_genes_per_cluster:
        cluster_genes = top_genes_per_cluster[cluster]
        genes_for_this_cluster = []
        for gene in cluster_genes:
            if len(genes_for_this_cluster) >= 5:
                break
            if gene not in unique_genes:
                unique_genes.add(gene)
                genes_for_this_cluster.append(gene)
        genes_for_plot.extend(genes_for_this_cluster)

# Step 8: Plot the matrixplot with the unique genes
sc.pl.dotplot(adata_subset, var_names=genes_for_plot, groupby='Lvl5', 
                 dendrogram=True, use_raw=False, cmap="vlag", 
                 standard_scale='var', swap_axes=True)
print(top_genes_per_cluster)

In [None]:
# Ensure 'PatientID_Atlas' is a string type
adata.obs['Lvl4'] = adata.obs['Lvl4'].astype(str)

# Define the replacement dictionary
replacement_dict = {
'Ep.PB.1' : 'Ep.PB',
'Ep.PB.2' : 'Ep.PB',
'Ep.PB.3'	: 'Ep.PB',
'Ep.PB.4' : 'Ep.PB',
'Ep.B.1' : 'Ep.B',
'Ep.B.2' : 'Ep.B',
'Ep.B.3'	: 'Ep.B',
'Ep.B.4' : 'Ep.B',
}

# Replace only the specified categories in 'T_leiden2'
adata.obs['Lvl4'] = adata.obs['Lvl4'].astype(str)  # Ensure it's string
adata.obs['Lvl4'] = adata.obs['Lvl4'].replace(replacement_dict)

# Check the unique values in 'T_leiden2' to confirm
print(adata.obs['Lvl4'].unique())


In [None]:
import scanpy as sc

# Step 1: Ensure observation names are unique
adata.obs_names_make_unique()

# Step 1: Subset the adata object to only the specific clusters and create a copy
desired_clusters = ['Ep.B', 'Ep.B-PB', 'Ep.PB', 'Ep.prol', 'Ep.Sp', 'Ep.K', 
                        'Ep.Cr.Sp.1', 'Ep.Cr.Sp.2', 'Ep.Cr.B', 'Ep.Cr.PB']

adata_subset = adata[adata.obs['Lvl4'].isin(desired_clusters)].copy()

# Ensure 'Lvl4' is categorical
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype('category')

# Step 2: Check if the data is already log-transformed, otherwise transform it
if 'log1p' not in adata_subset.uns_keys():
    sc.pp.log1p(adata_subset)  # Apply log transformation if not already done

# Manually set the base in log1p if missing
if 'log1p' in adata_subset.uns_keys() and 'base' not in adata_subset.uns['log1p']:
    adata_subset.uns['log1p']['base'] = 2.718281828459045  # Default to e

# Step 3: Perform differential expression analysis for the selected clusters
sc.tl.rank_genes_groups(adata_subset, 'Lvl4', method='wilcoxon', use_raw=False)

# Step 4: Extract top marker genes for the selected clusters
top_genes_per_cluster = {}
for cluster in adata_subset.obs['Lvl4'].cat.categories:
    top_genes_per_cluster[cluster] = adata_subset.uns['rank_genes_groups']['names'][cluster]

# Step 5: Generate the dendrogram to get cluster order
sc.tl.dendrogram(adata_subset, groupby='Lvl4')

# Step 6: Retrieve the cluster order based on the dendrogram
cluster_order = adata_subset.uns['dendrogram_Lvl4']['categories_ordered']

# Step 7: Ensure each cluster has at least 3 unique genes
unique_genes = set()
genes_for_plot = []

for cluster in cluster_order:
    if cluster in top_genes_per_cluster:
        cluster_genes = top_genes_per_cluster[cluster]
        genes_for_this_cluster = []
        for gene in cluster_genes:
            if len(genes_for_this_cluster) >= 5:
                break
            if gene not in unique_genes:
                unique_genes.add(gene)
                genes_for_this_cluster.append(gene)
        genes_for_plot.extend(genes_for_this_cluster)

# Step 8: Plot the matrixplot with the unique genes
sc.pl.dotplot(adata_subset, var_names=genes_for_plot, groupby='Lvl4', 
                 dendrogram=True, use_raw=False, cmap="vlag", 
                 standard_scale='var', swap_axes=True)
print(top_genes_per_cluster)

In [None]:
import scanpy as sc
# Start plotting with black background
import matplotlib.pyplot as plt
import numpy as np

plt.style.use('default')  # Set the background to black

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = ['Ep.Oral.lower', 'Ep.Cr.lower', 'Ep.Cr.upper', 'Ep.Oral.upper']

# Define the subset of genes you want to plot in the desired order
correct_order = [
'S100A16','KRT5', 'DST', 'MYC', 'EGFR', 'COL17A1',  'CXCL14',
'FGFBP1', 'CFHR3', 'C1R', 'SAA1', 'SAA2',
'ADAM28', 'CFH', 'ODAM', 'ODAPH', 'CXCL1', 'CXCL6',
'CSF3', 'DNASE1L3', 'LAMC2', 'IL1A', 'ANXA1', 'SERPINB2', 'CNFN', 'ELF5', 'IL36G', 'IL36A', 'C15orf48'
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Epithelial_layers'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Epithelial_layers'] = adata_subset.obs['Epithelial_layers'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Epithelial_layers',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8                       # Set a max value for the scale
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)
plt.rcParams.update({'font.size': 14, 'text.color': 'black', 'font.weight': 'normal', 'axes.spines.right' : False, 'axes.spines.top': False, 'axes.linewidth': 2})


# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Or_Crev_layers_dotplot.svg'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)


In [None]:
adata.obs.columns.unique().tolist()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
import os


rotation_angle = 30  # Define rotation for this sample

# Specify the directory where you want to save the PDF
save_directory = '/data/vasileiosionat2/Xenium/Figures/'
pdf_filename = os.path.join(save_directory, f'all_clusters_4rows.pdf')

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV140A_A']

# Extract unique clusters in the 'Lvl4' column matching the specified categories
unique_clusters = adata_sample.obs.loc[
    adata_sample.obs['Lvl5'].isin([
'Ep.B.1',  'Ep.B.2', 'Ep.B-PB', 'Ep.PB.1', 'Ep.PB.2', 'Ep.PB.3', 'Ep.prol', 'Ep.Sp', 'Ep.K', 'Ep.Cr.Sp.1', 'Ep.Cr.Sp.2', 'Ep.Cr.B', 'Ep.Cr.PB', 
    'Fib.3', 'VEC.2', 'VEC.5', 'cDC2', 'Lang', 'MyoF', 'Mac']), 
    'Lvl4'
].unique()

# Print the unique clusters
print(unique_clusters)



# Reorder clusters (if a custom order is provided, replace `sorted(unique_clusters)`)
ordered_clusters = sorted(unique_clusters)

# Define the number of rows and columns
num_rows = 5
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns based on clusters and rows

# Create a PdfPages object to save the plots
with PdfPages(pdf_filename) as pdf:
    # Create a figure for the sample
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(26, 26))
    fig.patch.set_facecolor('white')

    # Flatten the axes array for easier indexing
    axes = axes.flatten()

    # Get the coordinates for the sample
    x_coords = adata_sample.obs['x_centroid']
    y_coords = adata_sample.obs['y_centroid']
    
    # Apply rotation
    angle = np.deg2rad(rotation_angle)
    new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
    new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

    # Determine aspect ratio
    x_range = new_x_coords.max() - new_x_coords.min()
    y_range = new_y_coords.max() - new_y_coords.min()
    aspect_ratio = x_range / y_range

       # Loop over each unique cluster in the ordered list
    for idx, cluster in enumerate(ordered_clusters):
        ax = axes[idx]

        # Set white background for the subplot
        ax.set_facecolor('white')

        # Remove the outline
        for spine in ax.spines.values():
            spine.set_visible(False)

        # Scatter plot for the grey dot203Bs (all other clusters)
        ax.scatter(
            x=new_x_coords[adata_sample.obs['Lvl5'] != cluster],
            y=new_y_coords[adata_sample.obs['Lvl5'] != cluster],
            c='#C0C0C0', 
            s=3  # Adjust dot size
        )

        # Scatter plot for the red dots (the current cluster)
        ax.scatter(
            x=new_x_coords[adata_sample.obs['Lvl5'] == cluster],
            y=new_y_coords[adata_sample.obs['Lvl5'] == cluster],
            c='red', 
            s=9  # Adjust dot size
        )

        # Set aspect ratio for each subplot
        ax.set_aspect(aspect_ratio)

        # Add the cluster name below the plot
        ax.text(
            0.5, 0.02, f'{cluster}', 
            horizontalalignment='center', 
            verticalalignment='center', 
            transform=ax.transAxes, 
            color='black', fontsize=20, weight='bold'
        )

        # Remove grids
        ax.grid(False)

        # Remove ticks and their labels
        ax.set_xticks([])
        ax.set_yticks([])

    # Turn off empty subplots if there are any
    for ax in axes[len(ordered_clusters):]:
        ax.set_visible(False)

    # Define the directory and file name for the TIFF file
    tiff_filename = os.path.join(save_directory, f'all_clusters_4rows.tiff')

    # Adjust the spacing between subplots
    plt.subplots_adjust(
        left=0.05,    # Space from left edge
        right=0.95,   # Space from right edge
        top=0.95,     # Space from top edge
        bottom=0.05,  # Space from bottom edge
        wspace=0.1,   # Width space between columns
        hspace=0.05    # Height space between rows
    )

    # Add a title to the overall figure
    plt.suptitle(f'All Clusters in s2r2_HV184', color='white', fontsize=20, weight='bold', y=1.02)

    # Save the current figure as a TIFF file
    #fig.savefig(tiff_filename, dpi=300, format='tiff')

    plt.show()
    plt.close(fig)

In [None]:
adata.obs['niche_cc14'].unique().tolist()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
import os


rotation_angle = 30  # Define rotation for this sample

# Specify the directory where you want to save the PDF
save_directory = '/data/vasileiosionat2/Xenium/Figures/'
pdf_filename = os.path.join(save_directory, f'all_clusters_4rows.pdf')

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV140A_A']

# Extract unique clusters in the 'Lvl4' column matching the specified categories
unique_clusters = adata_sample.obs.loc[
    adata_sample.obs['niche_cc14'].isin([
'Lymphoid',
 'Plasma',
 'Subepi CT (crevicular)',
 'Lymphoid-CT interface',
 'Subepi CT (oral)',
 'Plasma-CT interface',
 'BV',
 'Fibrous CT',
 'Myeloid',
 'Parabasal',
 'Basal',
 'Spinous',
 'Keratin',
 'Crevicular']), 
    'niche_cc14'
].unique()

# Print the unique clusters
print(unique_clusters)



# Reorder clusters (if a custom order is provided, replace `sorted(unique_clusters)`)
ordered_clusters = sorted(unique_clusters)

# Define the number of rows and columns
num_rows = 8
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns based on clusters and rows

# Create a PdfPages object to save the plots
with PdfPages(pdf_filename) as pdf:
    # Create a figure for the sample
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(22, 56))
    fig.patch.set_facecolor('white')

    # Flatten the axes array for easier indexing
    axes = axes.flatten()

    # Get the coordinates for the sample
    x_coords = adata_sample.obs['x_centroid']
    y_coords = adata_sample.obs['y_centroid']
    
    # Apply rotation
    angle = np.deg2rad(rotation_angle)
    new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
    new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

    # Determine aspect ratio
    x_range = new_x_coords.max() - new_x_coords.min()
    y_range = new_y_coords.max() - new_y_coords.min()
    aspect_ratio = x_range / y_range

       # Loop over each unique cluster in the ordered list
    for idx, cluster in enumerate(ordered_clusters):
        ax = axes[idx]

        # Set white background for the subplot
        ax.set_facecolor('white')

        # Remove the outline
        for spine in ax.spines.values():
            spine.set_visible(False)

        # Scatter plot for the grey dot203Bs (all other clusters)
        ax.scatter(
            x=new_x_coords[adata_sample.obs['niche_cc14'] != cluster],
            y=new_y_coords[adata_sample.obs['niche_cc14'] != cluster],
            c='#C0C0C0', 
            s=3  # Adjust dot size
        )

        # Scatter plot for the red dots (the current cluster)
        ax.scatter(
            x=new_x_coords[adata_sample.obs['niche_cc14'] == cluster],
            y=new_y_coords[adata_sample.obs['niche_cc14'] == cluster],
            c='red', 
            s=9  # Adjust dot size
        )

        # Set aspect ratio for each subplot
        ax.set_aspect(aspect_ratio)

        # Add the cluster name below the plot
        ax.text(
            0.5, 0.02, f'{cluster}', 
            horizontalalignment='center', 
            verticalalignment='center', 
            transform=ax.transAxes, 
            color='black', fontsize=20, weight='bold'
        )

        # Remove grids
        ax.grid(False)

        # Remove ticks and their labels
        ax.set_xticks([])
        ax.set_yticks([])

    # Turn off empty subplots if there are any
    for ax in axes[len(ordered_clusters):]:
        ax.set_visible(False)

    # Define the directory and file name for the TIFF file
    tiff_filename = os.path.join(save_directory, f'all_clusters_4rows.tiff')

    # Adjust the spacing between subplots
    plt.subplots_adjust(
        left=0.05,    # Space from left edge
        right=0.95,   # Space from right edge
        top=0.95,     # Space from top edge
        bottom=0.05,  # Space from bottom edge
        wspace=0.1,   # Width space between columns
        hspace=0.05    # Height space between rows
    )

    # Add a title to the overall figure
    plt.suptitle(f'All Clusters in s2r2_HV184', color='white', fontsize=20, weight='bold', y=1.02)

    # Save the current figure as a TIFF file
    #fig.savefig(tiff_filename, dpi=300, format='tiff')

    plt.show()
    plt.close(fig)

In [None]:
# Assuming 'adata.obs' is available
df = pd.DataFrame(adata.obs)
df['niche_cc14'] = df['niche_cc14'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['niche_cc14', 'Lvl4']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['Lvl4'].str.contains('Mix') | df['Lvl4'].str.contains('prol') | df['Lvl4'].str.contains('B.4') | df['Lvl4'].str.startswith('Pl.3')  | df['Lvl4'].str.startswith('Pl.4')  | df['Lvl4'].str.startswith('Ep.B.3') | df['Lvl4'].str.startswith('Oth.') | df['Lvl4'].str.startswith('Unclea')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl4'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups.drop(columns=clusters_to_remove, errors='ignore')

# Transpose the DataFrame to swap axes
niche_groups_filtered = niche_groups_filtered.T

# Define custom order for the x-axis (after transposing, so this is the niche clusters now)
custom_order = ['Basal', 'Parabasal', 'Spinous', 'Keratin', 'Crevicular', 'BV', 
                'Fibrous CT', 'Subepi CT (oral)', 'Subepi CT (crevicular)', 
                'Myeloid', 'Lymphoid', 'Plasma', 'Lymphoid-CT interface', 
                'Plasma-CT interface']

# Reindex columns based on the custom order
niche_groups_filtered = niche_groups_filtered.reindex(columns=custom_order)

# Plot heatmap with enhanced colormap
plt.figure(figsize=(16, 20))
sns.heatmap(niche_groups_filtered, cmap='inferno', cbar_kws={'label': 'Count', 'ticks': [0, 5000, 10000, 15000], 'format': '%.0f'}, linewidths=0.05, linecolor='black', vmax=15000)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche Cluster')  # Updated label after transposing
plt.ylabel('Cell Type')  # Updated label after transposing
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 5000, 10000, 15000])
cbar.ax.set_yticklabels(['0', '5000', '10000', '>15000'])  # Set custom tick labels

plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(adata.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['Lvl4', 'niche_cc14']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove =  df['Lvl4'].str.startswith('Oth.') | df['Lvl4'].str.startswith('Neuro')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl4'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups[~niche_groups.index.isin(clusters_to_remove)]

# Specific Lvl4 clusters to show
clusters_to_show = ['Ep.B.1',  'Ep.B.2', 'Ep.B-PB', 'Ep.PB.1', 'Ep.PB.2', 'Ep.PB.3', 'Ep.prol', 'Ep.Sp', 'Ep.K', 
                    'Ep.Cr.B', 'Ep.Cr.PB', 'Ep.Cr.Sp.1', 'Ep.Cr.Sp.2',  'Fib.3', 'VEC.2', 'VEC.5', 'cDC2', 'Lang', 'MyoF', 'Mac', 'Mac.Neut.Mix']

# Filter to include only specified clusters
niche_groups_filtered = niche_groups_filtered.loc[clusters_to_show]

# Calculate relative frequencies
niche_groups_relative = niche_groups_filtered.div(niche_groups_filtered.sum(axis=1), axis=0)

# Plot heatmap with enhanced colormap
plt.figure(figsize=(15, 21))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_relative, cmap='plasma', cbar_kws={'label': ''}, linewidths=0.05, linecolor='black')
plt.title('')
plt.xlabel('')
plt.ylabel('')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)
plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'adata.obs' is available and contains 'Lvl4' and 'niche_cc14'
df = pd.DataFrame(adata.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Step 1: Aggregate counts of niches for each Lvl4 cluster
niche_groups = df.groupby(['Lvl4', 'niche_cc14']).size().unstack(fill_value=0)

# Step 2: Calculate the total number of cells per cluster (across all niches)
total_cells_per_cluster = niche_groups.sum(axis=1)

# Step 3: Calculate relative frequencies for each niche using the total cells per cluster
niche_groups_relative = niche_groups.div(total_cells_per_cluster, axis=0)

# Step 4: Define the niches and clusters in the desired order
niches_to_plot = ['Basal', 'Parabasal', 'Spinous', 'Keratin', 'Crevicular', 'Subepi CT (oral)', 'Subepi CT (crevicular)']  # Replace with your desired niches
clusters_to_show = ['Ep.B', 'Ep.prol', 'Ep.B-PB', 'Ep.PB', 'Ep.Sp', 'Ep.K', 
                    'Ep.Cr.B', 'Ep.Cr.PB', 'Ep.Cr.Sp.1', 'Ep.Cr.Sp.2', 'Fib.3', 'VEC.2', 'cDC2', 'Lang', 'Mel']  # Replace with your clusters of interest

# Step 5: Check if the niches to plot are valid and present in the dataset
valid_niches = [niche for niche in niches_to_plot if niche in niche_groups.columns]
print(f"Valid niches to plot: {valid_niches}")

# Step 6: Filter to include only the valid niches for plotting
niche_groups_filtered = niche_groups_relative[valid_niches]

# Step 7: Filter Lvl4 clusters to only include the specified clusters
niche_groups_filtered = niche_groups_filtered[niche_groups_filtered.index.isin(clusters_to_show)]

# Step 8: Ensure custom order of niches and clusters
niche_groups_filtered = niche_groups_filtered[valid_niches]  # Ensure the custom order of niches
niche_groups_filtered = niche_groups_filtered.loc[clusters_to_show]  # Ensure the custom order of clusters

# Step 9: Filter clusters if necessary (example, remove clusters that contain specific terms)
clusters_to_remove =  df['Lvl4'].str.startswith('Oth.') | df['Lvl4'].str.startswith('Neuro')
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl4'].unique()

# Filter out the unwanted clusters
niche_groups_filtered = niche_groups_filtered[~niche_groups_filtered.index.isin(clusters_to_remove)]

# Step 10: Plot the heatmap with the filtered data and custom orders
plt.figure(figsize=(10, 14))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_filtered, cmap='coolwarm', cbar_kws={'label': ''}, linewidths=0.05, linecolor='black')
plt.title('Relative abundance of clusters in Epithelial niches')
plt.xlabel('')
plt.ylabel('')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)
plt.tight_layout()  # Ensures all elements fit within the figure area
# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_clusters_Epi.svg"
plt.savefig(output_path, format='svg')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV140A_A/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 1200, 3600  # Horizontal bounds
deep_ymin, deep_ymax = 100, 2700  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
   'Basal': 'yellow', 
   'Parabasal' : 'yellow', 
    'Spinous': 'yellow' , 
    'Keratin': 'yellow', 
    'Crevicular': '#FF00FF', 
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#4682B4"

# Subset the AnnData object for the sample
sample_id = "HV140A_A"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['niche_cc14'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['niche_cc14'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['niche_cc14'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['niche_cc14'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 200  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Define the ROIs and add squares
rois = [
    (2000, 2600, 280, 880),  # Lymphoid ROI
    (2100, 2600, 2000, 2500),  # Deep ROI
]

# Draw the squares with no fill and white outline
for xmin, xmax, ymin, ymax in rois:
    plt.gca().add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, 
                                      edgecolor='white', facecolor='none', linewidth=2))
# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Total_Epi_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV140A_A/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 2000, 2600  # Horizontal bounds
deep_ymin, deep_ymax = 280, 880  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
   'Ep.B': '#00FF7F', 
    'Ep.prol': '#FF69B4',
   'Ep.PB' : '#F88379', 
   'Ep.B-PB' : '#FFFF00', 
    'Ep.Sp': '#FF00FF' , 
    'Ep.K': '#D70040', 
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "blue"

# Subset the AnnData object for the sample
sample_id = "HV140A_A"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl4'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl4'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl4'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl4'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Oral_Epi_Clusters_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV140A_A/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 2100, 2600  # Horizontal bounds
deep_ymin, deep_ymax = 2000, 2500  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
   'Ep.Cr.B': 'cyan', 
    'Ep.Cr.PB': '#FFBF00',
   'Ep.Cr.Sp.1' : '#E34234', 
   'Ep.Cr.Sp.2' : '#4CBB17'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "blue"

# Subset the AnnData object for the sample
sample_id = "HV140A_A"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl4'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl4'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl4'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl4'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Crev_Epi_Clusters_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV140A_A/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV140A_A/cell_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

xmin, xmax = 2000, 2600  # Horizontal bounds
ymin, ymax = 280, 880  # Vertical bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = ["IL36A", "SERPINB3", "CXCL14", "MKI67", "MYC"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "IL36A": "red",  
    "SERPINB3": "#0FFF50",  
    "CXCL14": "yellow",  
    "MKI67": "cyan",
    "MYC" : '#DA70D6'
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=0.25)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=3, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_oral_transcripts1_HV140A.svg"
plt.savefig(output_path, format='svg', facecolor='black', bbox_inches='tight')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV140A_A/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV140A_A/cell_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

xmin, xmax = 2000, 2600  # Horizontal bounds
ymin, ymax = 280, 880  # Vertical bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = ["IL36G", "SERPINB2", "COL17A1", "FGFBP1"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "IL36G": "#DA70D6",  
    "SERPINB2": "#FFFF00",  
    "COL17A1": "#0FFF50",  
    "FGFBP1": "#C",
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=0.25)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=3, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Oral_transcripts2_HV140A.svg"
plt.savefig(output_path, format='svg', facecolor='black', bbox_inches='tight')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV140A_A/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV140A_A/cell_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

xmin, xmax = 2100, 2600  # Horizontal bounds
ymin, ymax = 2000, 2500  # Vertical bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = ["ODAM", "CXCL1", "COL17A1", "LAMC2"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "COL17A1": "cyan",  
    "CXCL1": "#FF5F1F",  
    "ODAM": "#DA70D6",  
    "LAMC2": "#FFFF00",
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=0.25)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=3, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Crev_transcripts1_HV140A.svg"
plt.savefig(output_path, format='svg', facecolor='black', bbox_inches='tight')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV140A_A/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/HV140A_A/cell_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

xmin, xmax = 2100, 2600  # Horizontal bounds
ymin, ymax = 2000, 2500  # Vertical bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = ["SAA1", "CFHR3", "IL1A", "DNASE1L3"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "SAA1": "#DA70D6",  
    "CFHR3": "red",  
    "IL1A": "#0FFF50",  
    "DNASE1L3": "#FFFF00",
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=0.25)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=3, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Crev_transcripts2_HV140A.svg"
plt.savefig(output_path, format='svg', facecolor='black', bbox_inches='tight')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import scanpy as sc
# Start plotting with black background
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams['font.family'] = 'Arial'
plt.rcParams['pdf.fonttype'] = 42  # Ensures fonts are embedded as text, not outlines
plt.rcParams['ps.fonttype'] = 42

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = ['Ep.B', 'Ep.prol', 'Ep.B-PB', 'Ep.PB',  'Ep.Sp', 'Ep.K', 'Ep.Cr.B', 'Ep.Cr.PB',
                        'Ep.Cr.Sp.1', 'Ep.Cr.Sp.2']

# Define the subset of genes you want to plot in the desired order
correct_order = [
   'KRT5', 'COL17A1', 'ITGB4', 'CXCL14', 'MKI67', 'CDK1',  'AQP3', 'KRT19', 'FGFBP1',  'S100A16', 'SLPI',  'MYC', 
 'SERPINB2', 'SERPINB3', 'SLPI', 'NOD2', 'IL36A','CNFN', 'IL36G',   'SAA1', 'SAA2',
 'CFH', 'CFHR3', 'ODAM', 'ODAPH', 'LAMC2', 'CXCL1', 'CXCL2', 'CXCL6', 
'IL1A', 'DNASE1L3',
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl4'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl4',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.1,
    vmax=1,                       # Set a max value for the scale
    figsize=(14, 4) 
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=False)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)


# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Total_Epi_Lvl4_dotplot_horizontal.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)

In [None]:
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r3/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)
data

In [None]:
adata.obs['sample'].unique().tolist()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r3/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 900, 2900  # Horizontal bounds
deep_ymin, deep_ymax = 2600, 4600  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
   'Lymphoid': 'cyan', 
    'Plasma': '#FF00FF', 
    'Crevicular' : 'yellow'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#4682B4"

# Subset the AnnData object for the sample
sample_id = "s2r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['niche_cc14'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['niche_cc14'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['niche_cc14'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['niche_cc14'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 200  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)
# Define the ROIs and add squares
rois = [
    (2080, 2380, 3600, 3900),  # Lymphoid ROI
]

# Draw the squares with no fill and white outline
for xmin, xmax, ymin, ymax in rois:
    plt.gca().add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, 
                                      edgecolor='white', facecolor='none', linewidth=2))
# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Total_Epi_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r3/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 1500, 3000  # Horizontal bounds
deep_ymin, deep_ymax = 2700, 4200  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
   'Lymphoid': 'cyan', 
    'Plasma': '#FF00FF', 
    'Crevicular' : 'yellow'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#4682B4"

# Subset the AnnData object for the sample
sample_id = "s2r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['niche_cc14'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['niche_cc14'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['niche_cc14'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['niche_cc14'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 200  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)
plt.gca().add_artist(scalebar)
# Define the ROIs and add squares
rois = [
    (1800, 2300, 3500, 4000),  # Lymphoid ROI
]

# Draw the squares with no fill and white outline
for xmin, xmax, ymin, ymax in rois:
    plt.gca().add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, 
                                      edgecolor='white', facecolor='none', linewidth=2))
# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Xenium_Lymphoid_Niches_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r3/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 1500, 3000  # Horizontal bounds
deep_ymin, deep_ymax = 2700, 4200  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
'B' : 'cyan', 'PB': 'yellow', 'Pl.1': '#FF5F15', 'Pl.2': '#FF5F15', 'Th_proximity_to_B': 'magenta'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#4682B4"

# Subset the AnnData object for the sample
sample_id = "s2r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl4'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl4'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl4'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl4'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 200  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)
plt.gca().add_artist(scalebar)
# Define the ROIs and add squares
rois = [
    (1800, 2300, 3500, 4000),  # Lymphoid ROI
]

# Draw the squares with no fill and white outline
for xmin, xmax, ymin, ymax in rois:
    plt.gca().add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, 
                                      edgecolor='white', facecolor='none', linewidth=2))
# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Xenium_Lymphoid_Clusters_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r3/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 1800, 2300  # Horizontal bounds
deep_ymin, deep_ymax = 3500, 4000  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
'B' : 'cyan', 'PB': 'yellow', 'Pl.1': '#FF5F15', 'Pl.2': '#FF5F15', 'Th_proximity_to_B': 'magenta'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#4682B4"

# Subset the AnnData object for the sample
sample_id = "s2r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl4'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl4'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl4'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl4'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 50  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=15,
    label_top=True,
)

plt.gca().add_artist(scalebar)
plt.gca().add_artist(scalebar)
# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Xenium_Lymphoid_clusters__small_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
merged.obs['final_label_citeSeq'].unique().tolist()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r3/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 1800, 2300  # Horizontal bounds
deep_ymin, deep_ymax = 3500, 4000  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
'B Naive' : '#0FFF50', 'B Memory 2' : 'cyan', 'B Memory 1' : 'cyan',
 'Plasmablast': 'yellow', 'Plasma 1': '#FF5F15', 'Plasma 2': '#FF5F15',
  'CD4 TFreg' : 'magenta', 'CD4 IL17A+ FOXP3+': 'pink',  
    'CD4 T naive': 'pink', 'CD4 TCM': 'pink', 'CD4 TFH': 'red',
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#4682B4"

# Subset the AnnData object for the sample
sample_id = "s2r3"  # Replace with your sample's ID
adata_sample = merged[merged.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Remove '-Xenium' from the index
adata_sample.obs.index = adata_sample.obs.index.str.replace(r'-Xenium$', '', regex=True)

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['final_label_citeSeq'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['final_label_citeSeq'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['final_label_citeSeq'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['final_label_citeSeq'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 50  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=15,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Xenium_CiteSeq_small{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r3/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
# Generate boundaries of cropped area
deep_xmin, deep_xmax = 1800, 2300  # Horizontal bounds
deep_ymin, deep_ymax = 3500, 4000  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
'CD4 TFreg' : 'orange', 'mregDC' : 'cyan', 'CD4 IL17A+ FOXP3+': 'purple',  'CD4 T naive': 'purple','CD4 Treg': 'red', 'CD4 TCM': 'purple', 'CD4 T naive': 'purple', 'CD8 T': 'yellow', 'CD8 T IKZF2+': 'yellow', 'CD8 T naive': 'yellow',
                       'CD8 T TNF+': 'yellow', 'CD8 TRM': 'yellow' , 'CD4 TFH': '#39e75f'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#e1e1e1"

# Subset the AnnData object for the sample
sample_id = "s2r3"  # Replace with your sample's ID
adata_sample = merged[merged.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Remove '-Xenium' from the index
adata_sample.obs.index = adata_sample.obs.index.str.replace(r'-Xenium$', '', regex=True)

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['final_label_citeSeq'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['final_label_citeSeq'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['final_label_citeSeq'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['final_label_citeSeq'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=10,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Total_Epi_Lvl4{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r3/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 1500, 3000  # Horizontal bounds
deep_ymin, deep_ymax = 2700, 4200  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
   'Lymphoid': 'cyan', 
    'Plasma': '#FF00FF', 
    'Crevicular' : 'yellow'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#4682B4"

# Subset the AnnData object for the sample
sample_id = "s2r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['niche_cc14'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['niche_cc14'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['niche_cc14'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['niche_cc14'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 200  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Total_Epi_{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r3/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 2080, 2380  # Horizontal bounds
deep_ymin, deep_ymax = 3600, 3900  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
   'PB': 'magenta', 
    'Pl.1': 'purple', 
    'Pl.2' : 'purple',
    'Th' : 'green',
    "Th_proximity_to_B" : 'green',
    'B': 'red', 
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#e1e1e1"

# Subset the AnnData object for the sample
sample_id = "s2r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl4'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl4'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl4'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl4'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=10,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Total_Epi_Lvl4{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
merged.obs.columns.unique().tolist()

In [None]:
merged = sc.read_h5ad('/data/vasileiosionat2/Xenium/Integration_2025_only_stromal/Modified_citeseq_files/Xenium_citeSeq_total_harmony.h5ad')

In [None]:
merged.obs

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r3/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 2080, 2380  # Horizontal bounds
deep_ymin, deep_ymax = 3600, 3900  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
   'CD4 T naive': 'green', 
    'CD4 TFH': 'yellow', 
    'CD4 TCM' : 'red',
    'CD4 TFreg' : 'cyan',
    'CD4 IL17A+ FOXP3+' : 'magenta',     
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#e1e1e1"

# Subset the AnnData object for the sample
sample_id = "s2r3"  # Replace with your sample's ID
adata_sample = merged[merged.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Remove '-Xenium' from the index
adata_sample.obs.index = adata_sample.obs.index.str.replace(r'-Xenium$', '', regex=True)

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['final_label_citeSeq'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['final_label_citeSeq'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['final_label_citeSeq'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['final_label_citeSeq'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=10,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Total_Epi_Lvl4{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
# Count the occurrences of each cell type
cell_type_counts = adata_cropped.obs['final_label_citeSeq'].value_counts()
print(cell_type_counts)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r3/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 1350, 1650  # Horizontal bounds
deep_ymin, deep_ymax = 3550, 3850  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
   'PB': '#FF00FF', 
    'Pl.1': '#AA336A', 
    'Pl.2' : '#AA336A',
    'Th' : '#00A36C',
    "Th_proximity_to_B" : '#00A36C',
    'Fib.2' : 'yellow',
    'VEC.1' : 'blue',
    'B': '#FF4433', 
    'mregDC':'#00FFFF',    
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#e1e1e1"

# Subset the AnnData object for the sample
sample_id = "s2r3"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl4'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl4'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl4'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl4'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=10,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Total_Epi_Lvl4{sample_id}.svg"
#plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()


In [None]:
data

In [None]:
adata.obs['sample'].unique().tolist()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r2_HV184/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 4200, 5000  # Horizontal bounds
deep_ymin, deep_ymax = 2400, 3500  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
    'Immune': '#008000',
    'Fibroblast': '#0000FF',
    'Epithelial': '#FFEA00',
    'Vascular': '#990F0FFF',
    'Other': '#e5e5e5'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#e1e1e1"

# Subset the AnnData object for the sample
sample_id = "s2r2_HV184"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl1'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl1'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl1'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl1'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 100  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure1/Major_clusters_Lvl1{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r2_HV184/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 4300, 4900  # Horizontal bounds
deep_ymin, deep_ymax = 2300, 3250  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
     'B' : '#7DF9FF',
     'Th' : '#0818A8',
     'Th_proximity_to_B' : '#0096FF',
     'mregDC' : '#40E0D0',
     'Pl.1' : '#800020',
     'Pl.2' : '#F88379' ,
     'PB' : '#FF0000' ,
     'Fib.2' : '#DFFF00',
     'Mac' : '#8B8000' ,
     'MyoF' : '#FFC000'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#454143"

# Subset the AnnData object for the sample
sample_id = "s2r2_HV184"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl4'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl4'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl4'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl4'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 100  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Lvl4_Xenium_TLS_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r2_HV184/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r2_HV184/cell_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

# Generate boundaries of cropped area
xmin, xmax = 4200, 5000  # Horizontal bounds
ymin, ymax = 2400, 3500  # Vertical bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = ["KRT19", "PTPRC", "VCAN", "VWF"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "KRT19": "#FFEA00",  
    "PTPRC": "#008000",  
    "VCAN": "#0000FF",  
    "VWF": "#990F0FFF",
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=0.25)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=3, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure1/Xenium_major_transcripts_HV184.svg"
plt.savefig(output_path, format='svg', facecolor='black', bbox_inches='tight')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s1r2_HV187A/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 1500, 2600  # Horizontal bounds
deep_ymin, deep_ymax = 450, 1250  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
    'Immune': '#008000',
    'Fibroblast': '#0000FF',
    'Epithelial': '#FFEA00',
    'Vascular': '#990F0FFF',
    'Other': '#e5e5e5'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#e1e1e1"

# Subset the AnnData object for the sample
sample_id = "s1r2"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl1'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl1'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl1'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl1'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 100  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure1/Major_clusters_Lvl1{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r1-HV207/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 4000, 4800  # Horizontal bounds
deep_ymin, deep_ymax = 1850, 2950  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
    'Immune': '#008000',
    'Fibroblast': '#0000FF',
    'Epithelial': '#FFEA00',
    'Vascular': '#990F0FFF',
    'Other': '#e5e5e5'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#e1e1e1"

# Subset the AnnData object for the sample
sample_id = "s2r1_HV207"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl1'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl1'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl1'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl1'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 100  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure1/Major_clusters_Lvl1{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r1-HV207/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 4000, 4800  # Horizontal bounds
deep_ymin, deep_ymax = 1850, 2950  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
    'Fibrous CT' : 'red',
     'Epi-CT': '#00FFFF',
     'Plasma-Fib CT' : '#CF9FFF',
     'Plasma' : 'magenta' ,
     'Spinous' : '#5D3FD3',
     'Keratin' : '#0096FF' ,
     'Crevicular' : '#800080' ,
     'Lymphoid' : 'yellow'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#e1e1e1"

# Subset the AnnData object for the sample
sample_id = "s2r1_HV207"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['niche_knn50k10_merged'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['niche_knn50k10_merged'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['niche_knn50k10_merged'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['niche_knn50k10_merged'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 100  
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Niches_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r1-HV207/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 4100, 4700  # Horizontal bounds
deep_ymin, deep_ymax = 1600, 2550  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
     'Plasma-Fib CT' : '#7F00FF',
     'Plasma' : 'red' ,
     'Crevicular' : '#D3D3D3' ,
     'Lymphoid' : 'cyan'
}

# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#454143"

# Subset the AnnData object for the sample
sample_id = "s2r1_HV207"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['niche_knn50k10_merged'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['niche_knn50k10_merged'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['niche_knn50k10_merged'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['niche_knn50k10_merged'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 100  
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_Niches_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()

In [None]:
adata.obs['Lvl4'].unique().tolist()

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import pandas as pd
import seaborn as sns

# Load the data
csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r1-HV207/cell_boundaries.csv.gz"
data = pd.read_csv(csv_path)

# Generate boundaries of cropped area
deep_xmin, deep_xmax = 4100, 4700  # Horizontal bounds
deep_ymin, deep_ymax = 1600, 2550  # Vertical bounds

deep_vertices = data[
    (data['vertex_x'] >= deep_xmin) & (data['vertex_x'] <= deep_xmax) &
    (data['vertex_y'] >= deep_ymin) & (data['vertex_y'] <= deep_ymax)
]
deep_cell_ids = deep_vertices['cell_id'].unique()
deep_data = data[data['cell_id'].isin(deep_cell_ids)]
grouped = deep_data.groupby('cell_id')

# Define the color mapping for the specific clusters you want to color uniquely
# Define the specific cluster colors
unique_cluster_colors = {
     'B' : '#7DF9FF',
     'Th' : '#0818A8',
     'Th_proximity_to_B' : '#0096FF',
     'mregDC' : '#40E0D0',
     'Pl.1' : '#800020',
     'Pl.2' : '#F88379' ,
     'PB' : '#FF0000' ,
     'Fib.2' : '#DFFF00',
     'Mac' : '#8B8000' ,
     'MyoF' : '#FFC000'
}


# For clusters not in `unique_cluster_colors`, set them to grey
default_color = "#454143"

# Subset the AnnData object for the sample
sample_id = "s2r1_HV207"  # Replace with your sample's ID
adata_sample = adata[adata.obs['sample'] == sample_id].copy()  # Ensure `sample_id` exists in `adata.obs`

# Ensure that the 'cell_id' column in `adata_sample.obs` matches the IDs from the CSV
adata_cropped = adata_sample[adata_sample.obs.index.isin(deep_cell_ids)].copy()
# Create a dictionary for cell_id -> lvl1_cluster from adata_cropped
cluster_mapping = adata_cropped.obs['Lvl4'].to_dict()

# Add a cluster column to the filtered CSV data
deep_data = deep_vertices.copy()
deep_data['Lvl4'] = deep_data['cell_id'].map(cluster_mapping)

# Drop rows with missing clusters (if any cell IDs in the cropped area are not in adata_cropped)
deep_data = deep_data.dropna(subset=['Lvl4'])

# Group by cell_id and plot each mask
plt.figure(figsize=(10, 10))

# Set the style for black background
plt.style.use('dark_background')

# Plot the nuclei masks with unique colors for the specified clusters
for cell_id, group in deep_data.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    cluster = group['Lvl4'].iloc[0]  # All rows for the same cell_id should have the same cluster
    
    # Get the color for the cluster or default to grey
    color = unique_cluster_colors.get(cluster, default_color)
    
    # Plot the mask
    plt.fill(x, y, color=color, edgecolor='black', linewidth=0.5, alpha=0.7)

# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  


# Hide the axes for better visual presentation
plt.axis('off')

# Set the axis limits to match the cropped area
plt.xlim(deep_xmin, deep_xmax)
plt.ylim(deep_ymin, deep_ymax)

# Add the scale bar
scale_bar_length_px = 100  
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=20,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_Lvl4_TLS_{sample_id}.svg"
plt.savefig(output_path, format='svg', facecolor='black')

# Show the plot with a black background
plt.gcf().patch.set_facecolor('black')
plt.show()

In [None]:
adata.obs['niche_knn50k10_merged'].unique().tolist()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar

# Load the CSV file for transcripts
transcript_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r1-HV207/transcripts.csv.gz"
transcripts = pd.read_csv(transcript_csv_path)

# Load the CSV file for the nucleus boundaries
nucleus_csv_path = "/data/vasileiosionat2/Xenium/Plotting_Paper_figures/s2r1-HV207/cell_boundaries.csv.gz"
nuclei = pd.read_csv(nucleus_csv_path)

# Generate boundaries of cropped area
xmin, xmax = 4000, 4800  # Horizontal bounds
ymin, ymax = 1850, 2950 # Vertical bounds

# Filter the transcripts based on the cropped area
filtered_transcripts = transcripts[
    (transcripts['x_location'] >= xmin) & (transcripts['x_location'] <= xmax) &
    (transcripts['y_location'] >= ymin) & (transcripts['y_location'] <= ymax)
]

# Filter the specific transcripts you want to plot
transcript_of_interest = ["KRT19", "PTPRC", "VCAN", "VWF"]
filtered_transcripts = filtered_transcripts[filtered_transcripts['feature_name'].isin(transcript_of_interest)]

# Filter the nuclei data based on the cropped area
filtered_nuclei = nuclei[
    (nuclei['vertex_x'] >= xmin) & (nuclei['vertex_x'] <= xmax) &
    (nuclei['vertex_y'] >= ymin) & (nuclei['vertex_y'] <= ymax)
]

# Manually define a colormap for each transcript
transcript_color_map = {
    "KRT19": "#FFEA00",  
    "PTPRC": "#008000",  
    "VCAN": "#0000FF",  
    "VWF": "#990F0FFF",
}

# Start plotting with black background
plt.style.use('dark_background')  # Set the background to black
plt.figure(figsize=(10, 10))

# Plot the nuclei masks in grey with black contours (no fill)
for cell_id, group in filtered_nuclei.groupby('cell_id'):
    x = group['vertex_x'].values
    y = group['vertex_y'].values
    plt.plot(x, y, color='grey', linewidth=0.25)  # Only outlines, no fill

# Plot the selected transcripts as points with unique colors
for idx, row in filtered_transcripts.iterrows():
    if row['feature_name'] in transcript_of_interest:
        # Get the color for the current transcript from the manual colormap
        color = transcript_color_map[row['feature_name']]
        plt.scatter(row['x_location'], row['y_location'], color=color, s=3, alpha=0.7)


# Set the aspect ratio to be equal to avoid distortion
plt.gca().set_aspect('equal', adjustable='box')

# Remove axes labels and ticks
plt.xticks([])  
plt.yticks([])  

# Add the scale bar
scale_bar_length_px = 40  # Calculated from 40 microns / 0.2125 microns per pixel
scalebar = AnchoredSizeBar(
    plt.gca().transData,
    scale_bar_length_px,  # Length in pixels
    "",  # Label
    loc="lower left",
    pad=0.5,
    color="white",
    frameon=False,
    size_vertical=5,
    label_top=True,
)

plt.gca().add_artist(scalebar)

# Save the plot as an SVG file
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure1/Xenium_major_transcripts_HV207.svg"
plt.savefig(output_path, format='svg', facecolor='black', bbox_inches='tight')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
import scanpy as sc

# Step 1: Ensure all observation names are unique
adata.obs_names_make_unique()

# Step 2: Log-transform the data if not already done
sc.pp.log1p(adata)

# Step 3: Perform differential expression analysis using the Wilcoxon method
sc.tl.rank_genes_groups(adata, 'niche_cc14', method='wilcoxon', use_raw=False)

# Step 4: Extract top marker genes for each cluster (optional for reference)
top_genes_per_cluster = {}
for cluster in adata.obs['niche_cc14'].cat.categories:
    top_genes_per_cluster[cluster] = adata.uns['rank_genes_groups']['names'][cluster][:10]  # Top 10 genes

# Step 5: Generate dendrogram for cluster ordering
sc.tl.dendrogram(adata, groupby='niche_cc14')

# Step 6: Retrieve the cluster order based on the dendrogram
cluster_order = adata.uns['dendrogram_niche_cc14']['categories_ordered']

# Step 7: Define your custom list of genes for plotting
custom_genes = ['ANPEP', 'ANXA1', 'APOA5', 'C3', 'CAMP', 'CCL19', 'CCL5', 'CCN1', 'CD274',
                'CD28', 'CD34', 'CD70', 'CD86', 'CFH', 'COL16A1', 'COL17A1', 'COL5A2', 'COL8A1', 
                'CSF3', 'CXCL1', 'CXCL10', 'CXCL12', 'CXCL14', 'CXCL2', 'CXCL5', 'CXCL6', 'CXCL9', 'EDN1', 
                'GDF15', 'HLA-DMB', 'HLA-DQA2', 'HLA-DQB2', 'HLA-DRB5', 'ICAM1', 'IGF1', 'IL1A', 'IL1RN', 
                'IL23A', 'IL33', 'ITGB2', 'KNG1', 'LAMC2', 'LPL', 'MMRN2', 'PECAM1', 'POSTN', 'PRG4', 
                'PTN', 'PTPRC', 'RETN', 'S100A12', 'SAA1', 'SLAMF7', 'SLPI', 'TFF2', 'THBS2', 'THY1', 'TNC', 'VCAN']  # Replace with your gene list

# Filter the custom list to include only genes present in the dataset
genes_for_plot = [gene for gene in custom_genes if gene in adata.var_names]

# Step 8: Plot the dotplot with your chosen genes
sc.pl.dotplot(
    adata,
    var_names=genes_for_plot,
    groupby='niche_cc14',
    dendrogram=True,
    use_raw=False,
    cmap="vlag",
    standard_scale='var',
    swap_axes=True, 
    dot_max= 0.7
)

In [None]:
import scanpy as sc

# Step 1: Ensure all observation names are unique
adata.obs_names_make_unique()

# Step 2: Log-transform the data if not already done
sc.pp.log1p(adata)

# Step 3: Perform differential expression analysis using the Wilcoxon method
sc.tl.rank_genes_groups(adata, 'niche_cc14', method='wilcoxon', use_raw=False)

# Step 4: Extract top marker genes for each cluster (optional for reference)
top_genes_per_cluster = {}
for cluster in adata.obs['niche_cc14'].cat.categories:
    top_genes_per_cluster[cluster] = adata.uns['rank_genes_groups']['names'][cluster][:10]  # Top 10 genes

# Step 5: Generate dendrogram for cluster ordering
sc.tl.dendrogram(adata, groupby='niche_cc14')

# Step 6: Retrieve the cluster order based on the dendrogram
cluster_order = adata.uns['dendrogram_niche_cc14']['categories_ordered']

# Step 7: Define your custom list of genes for plotting
custom_genes = ['SELE', 'EGFR', 'FPR1', 'LPL', 'CD19', 'ITGB2', 'VSIG4', 'CCR7', 
                'ACKR1', 'SDC1', 'TLR2', 'ITGB5', 'TLR4', 'PDCD1', 'CD86', 'CD247', 
                'CD4', 'SELL', 'CD27', 'CD28', 'CTLA4', 'CFB', 'ITGA1', 'CD93', 'CSF3R',
                'CSF2RA', 'CXCR4', 'EDNRB', 'ADGRL4', 'ERBB2', 'LAG3', 'IL2RA', 'ITGB4', 'IL1R2', 
                'IL23R', 'LILRB2', 'IL1RL1', 'ICAM1', 'THY1', 'CLEC14A', 'PECAM1', 'MRC1', 'SLAMF7']  # Replace with your gene list

# Filter the custom list to include only genes present in the dataset
genes_for_plot = [gene for gene in custom_genes if gene in adata.var_names]

# Step 8: Plot the dotplot with your chosen genes
sc.pl.dotplot(
    adata,
    var_names=genes_for_plot,
    groupby='niche_cc14',
    dendrogram=True,
    use_raw=False,
    cmap="vlag",
    standard_scale='var',
    swap_axes=True, 
    dot_max=0.7
)

In [None]:
adata.obs.columns.unique().tolist()

In [None]:
import pandas as pd
import numpy as np
import os

# Assuming adata is already loaded
# List of ligand-receptor pairs
ligand_receptor_pairs = [
    ('ANPEP', 'SELE'),
    ('ANXA1', 'EGFR'),
    ('ANXA1', 'FPR1'),
    ('APOA5', 'LPL'),
    ('C3', 'CD19'),
    ('C3', 'ITGB2'),
    ('C3', 'VSIG4'),
    ('CAMP', 'EGFR'),
    ('CCL19', 'CCR7'),
    ('CCL5', 'ACKR1'),
    ('CCL5', 'SDC1'),
    ('CCN1', 'TLR2'),
    ('CCN1', 'ITGB5'),
    ('CCN1', 'TLR4'),
    ('CCN1', 'ITGB2'),
    ('CD274', 'PDCD1'),
    ('CD28', 'CD86'),
    ('CD28', 'CD247'),
    ('CD28', 'CD4'),
    ('CD34', 'SELL'),
    ('CD34', 'SELE'),
    ('CD70', 'CD27'),
    ('CD86', 'CD28'),
    ('CD86', 'CTLA4'),
    ('CFH', 'CFB'),
    ('CFH', 'SELL'),
    ('COL16A1', 'ITGA1'),
    ('COL17A1', 'ITGA1'),
    ('COL5A2', 'SDC1'),
    ('COL5A2', 'CD93'),
    ('COL5A2', 'ITGA1'),
    ('COL8A1', 'ITGA1'),
    ('CSF3', 'CSF3R'),
    ('CSF3', 'CSF2RA'),
    ('CXCL1', 'ACKR1'),
    ('CXCL10', 'ACKR1'),
    ('CXCL12', 'CXCR4'),
    ('CXCL12', 'CD4'),
    ('CXCL14', 'CXCR4'),
    ('CXCL2', 'ACKR1'),
    ('CXCL5', 'ACKR1'),
    ('CXCL6', 'ACKR1'),
    ('CXCL9', 'ACKR1'),
    ('EDN1', 'EDNRB'),
    ('EDN1', 'ADGRL4'),
    ('EDN1', 'EGFR'),
    ('GDF15', 'ERBB2'),
    ('HLA-DMB', 'CD4'),
    ('HLA-DQA2', 'LAG3'),
    ('HLA-DQA2', 'CD4'),
    ('HLA-DQB2', 'LAG3'),
    ('HLA-DRB5', 'LAG3'),
    ('HLA-DRB5', 'CD4'),
    ('ICAM1', 'EGFR'),
    ('ICAM1', 'IL2RA'),
    ('ICAM1', 'ITGB2'),
    ('IGF1', 'ITGB4'),
    ('IL1A', 'IL1R2'),
    ('IL1RN', 'IL1R2'),
    ('IL23A', 'IL23R'),
    ('IL23A', 'LILRB2'),
    ('IL33', 'IL1RL1'),
    ('ITGB2', 'ICAM1'),
    ('ITGB2', 'THY1'),
    ('KNG1', 'ITGB2'),
    ('KNG1', 'CD93'),
    ('LAMC2', 'ITGB4'),
    ('LPL', 'SDC1'),
    ('MMRN2', 'CD93'),
    ('MMRN2', 'CLEC14A'),
    ('PECAM1', 'PECAM1'),
    ('POSTN', 'ITGB5'),
    ('PRG4', 'TLR2'),
    ('PRG4', 'TLR4'),
    ('PTN', 'SDC1'),
    ('PTPRC', 'MRC1'),
    ('PTPRC', 'CD247'),
    ('PTPRC', 'CD4'),
    ('RETN', 'TLR4'),
    ('S100A12', 'TLR4'),
    ('SAA1', 'FPR1'),
    ('SAA1', 'TLR2'),
    ('SLAMF7', 'SLAMF7'),
    ('SLPI', 'CD4'),
    ('TFF2', 'CXCR4'),
    ('THBS2', 'SDC1'),
    ('THY1', 'ITGB2'),
    ('TNC', 'SDC1'),
    ('TNC', 'EGFR'),
    ('VCAN', 'EGFR'),
    ('VCAN', 'SELL'),
    ('VCAN', 'TLR2')
]

# Directory to save files
output_dir = "/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/"

# Create a dictionary to store ligand-receptor pairs
ligand_dict = {}

# Loop through the pairs to collect receptor data for each ligand
for ligand, receptor in ligand_receptor_pairs:
    if ligand not in ligand_dict:
        ligand_dict[ligand] = []
    ligand_dict[ligand].append(receptor)

# Loop through each ligand and save its receptors in a separate CSV file
for ligand, receptors in ligand_dict.items():
    # Get the gene expression data for the ligand and its receptors from adata
    data = adata.obs[['cell_id', 'x_centroid', 'y_centroid', 'sample', 'niche_cc14', 'Lvl4']].copy()

    # Add a column for the ligand's expression
    if ligand in adata.var_names:
        ligand_expression = adata.raw[:, ligand].X if adata.raw is not None else adata.X[:, adata.var_names.get_loc(ligand)]
        ligand_expression_dense = ligand_expression.toarray() if hasattr(ligand_expression, 'toarray') else ligand_expression
        data[ligand] = ligand_expression_dense > 0

    # Add a column for each receptor's expression
    for receptor in receptors:
        if receptor in adata.var_names:
            # Check if raw data exists, otherwise use adata.X
            expression_data = adata.raw[:, receptor].X if adata.raw is not None else adata.X[:, adata.var_names.get_loc(receptor)]
            
            # Convert sparse matrix to dense format and check for expression
            expression_data_dense = expression_data.toarray() if hasattr(expression_data, 'toarray') else expression_data
            data[receptor] = expression_data_dense > 0  # You can change this condition based on the threshold you want to use

    # Save to a CSV file
    file_name = os.path.join(output_dir, f"{ligand}_receptors.csv")
    data.to_csv(file_name, index=False)
    print(f"Saved {ligand} receptors to {file_name}")


In [None]:
import pandas as pd
import os

# Directory where ligand-receptor CSV files are saved
input_dir = "/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/"

# Directory to save files for each sample
output_base_dir = "/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Samples/"

# List of ligand-receptor files (you can get this from the directory)
ligand_files = [f for f in os.listdir(input_dir) if f.endswith('_receptors.csv')]

# Loop through each ligand-receptor file
for ligand_file in ligand_files:
    ligand_path = os.path.join(input_dir, ligand_file)
    
    # Load the ligand-receptor data into a DataFrame
    data = pd.read_csv(ligand_path)
    
    # Get the unique samples from the 'sample' column
    unique_samples = data['sample'].unique()
    
    # Loop through each unique sample and save the filtered data
    for sample in unique_samples:
        # Filter the data for the current sample
        sample_data = data[data['sample'] == sample]
        
        # Create a folder for the sample if it doesn't exist
        sample_folder = os.path.join(output_base_dir, sample)
        os.makedirs(sample_folder, exist_ok=True)
        
        # Save the filtered data to a new CSV file
        sample_file_name = f"{ligand_file.replace('.csv', f'_{sample}.csv')}"
        sample_file_path = os.path.join(sample_folder, sample_file_name)
        sample_data.to_csv(sample_file_path, index=False)
        
        print(f"Saved {sample_file_name} to {sample_folder}")

In [None]:
import pandas as pd
import os

# Define the ligands and receptors
ligands = ['ANPEP', 'ANXA1', 'APOA5', 'C3', 'CAMP', 'CCL19', 'CCL5', 'CCN1', 'CD274',
                'CD28', 'CD34', 'CD70', 'CD86', 'CFH', 'COL16A1', 'COL17A1', 'COL5A2', 'COL8A1', 
                'CSF3', 'CXCL1', 'CXCL10', 'CXCL12', 'CXCL14', 'CXCL2', 'CXCL5', 'CXCL6', 'CXCL9', 'EDN1', 
                'GDF15', 'HLA-DMB', 'HLA-DQA2', 'HLA-DQB2', 'HLA-DRB5', 'ICAM1', 'IGF1', 'IL1A', 'IL1RN', 
                'IL23A', 'IL33', 'ITGB2', 'KNG1', 'LAMC2', 'LPL', 'MMRN2', 'PECAM1', 'POSTN', 'PRG4', 
                'PTN', 'PTPRC', 'RETN', 'S100A12', 'SAA1', 'SLAMF7', 'SLPI', 'TFF2', 'THBS2', 'THY1', 'TNC', 'VCAN']  # Replace with your actual list of ligands 

# Define the directory where your receptor CSVs are stored
csv_directory = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/Samples/HV220A_B'

# Initialize a dictionary to store results
ligand_table = []

# Iterate through each ligand
for ligand in ligands:
    print(f"Processing ligand: {ligand}")
    
    # Path to the ligand's receptor CSV
    receptor_csv_path = os.path.join(csv_directory, f"{ligand}_receptors_HV220A_B.csv")
    
    if os.path.exists(receptor_csv_path):
        # Read the receptor CSV
        receptor_df = pd.read_csv(receptor_csv_path)
        
        # Count the number of cells that express the ligand (i.e., the number of "True" in the ligand column)
        ligand_cell_count = receptor_df[ligand].sum()  # Assuming ligand column has boolean values (True/False)
        
        # Create a row for this ligand with the ligand name and ligand cell count
        ligand_row = [ligand, ligand_cell_count]
        
        # Append the row to the ligand_table list
        ligand_table.append(ligand_row)
    

# Convert the ligand_table to a DataFrame
ligand_table_df = pd.DataFrame(ligand_table, columns=['Ligand Name', 'Ligand Cell Count'])

# Save the table to CSV
output_table_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/Ligand_Stats.csv'
#ligand_table_df.to_csv(output_table_path, index=False)

# Print the saved table
ligand_table_df
print(f"Ligand table saved to {output_table_path}")


In [None]:
import pandas as pd
import os
import numpy as np
from sklearn.neighbors import BallTree

# Define the ligands and directory
ligands = ['ANPEP', 'ANXA1', 'APOA5', 'C3', 'CAMP', 'CCL19', 'CCL5', 'CCN1', 'CD274',
           'CD28', 'CD34', 'CD70', 'CD86', 'CFH', 'COL16A1', 'COL17A1', 'COL5A2', 'COL8A1',
           'CSF3', 'CXCL1', 'CXCL10', 'CXCL12', 'CXCL14', 'CXCL2', 'CXCL5', 'CXCL6', 'CXCL9',
           'EDN1', 'GDF15', 'HLA-DMB', 'HLA-DQA2', 'HLA-DQB2', 'HLA-DRB5', 'ICAM1', 'IGF1',
           'IL1A', 'IL1RN', 'IL23A', 'IL33', 'ITGB2', 'KNG1', 'LAMC2', 'LPL', 'MMRN2',
           'PECAM1', 'POSTN', 'PRG4', 'PTN', 'PTPRC', 'RETN', 'S100A12', 'SAA1', 'SLAMF7',
           'SLPI', 'TFF2', 'THBS2', 'THY1', 'TNC', 'VCAN']

csv_directory = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/Samples/HV220A_B'
output_table_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/Ligand_Stats.csv'

ligand_table = []

# Process each ligand CSV
for ligand in ligands:
    print(f"Processing ligand: {ligand}")

    receptor_csv_path = os.path.join(csv_directory, f"{ligand}_receptors_HV220A_B.csv")

    if not os.path.exists(receptor_csv_path):
        print(f"File for {ligand} not found.")
        continue

    receptor_df = pd.read_csv(receptor_csv_path)

    # Ensure coordinate columns exist
    if not {'x_centroid', 'y_centroid'}.issubset(receptor_df.columns):
        print(f"Missing coordinates for ligand {ligand}. Skipping.")
        continue

    # Count ligand-positive cells
    ligand_cell_count = receptor_df[ligand].sum() if ligand in receptor_df else 0

    # Handle NaN in coordinates
    receptor_df['z'] = receptor_df['z'].fillna(0) if 'z' in receptor_df.columns else 0
    receptor_df[['x_centroid', 'y_centroid', 'z']] = receptor_df[['x_centroid', 'y_centroid', 'z']].fillna(0)

    # Filter cells with ligand == True
    data_img = receptor_df.loc[receptor_df[ligand] == True, ['x_centroid', 'y_centroid', 'z']]
    
    if data_img.empty:
        print(f"No true ligand cells for {ligand}.")
        ligand_table.append([ligand, ligand_cell_count, 0])
        continue

    # Build BallTree and query neighbors
    kdt = BallTree(receptor_df[['x_centroid', 'y_centroid', 'z']], metric='euclidean')
    ind = kdt.query_radius(data_img, r=10, return_distance=False)

    # Count neighbors for each true ligand cell (excluding self)
    num_total_neighbors = sum(len(neighbors) - 1 for neighbors in ind)

    ligand_table.append([ligand, ligand_cell_count, num_total_neighbors])

# Save results
ligand_table_df = pd.DataFrame(ligand_table, columns=['Ligand Name', 'Ligand Cell Count', 'Total Neighbors'])
ligand_table_df.to_csv(output_table_path, index=False)

print(f"Ligand table saved to {output_table_path}")


In [None]:
import pandas as pd
import os
import numpy as np
from sklearn.neighbors import BallTree

# Define ligands and receptors
ligands = ['ANPEP', 'ANXA1', 'APOA5', 'C3', 'CAMP', 'CCL19', 'CCL5', 'CCN1', 'CD274',
           'CD28', 'CD34', 'CD70', 'CD86', 'CFH', 'COL16A1', 'COL17A1', 'COL5A2', 'COL8A1',
           'CSF3', 'CXCL1', 'CXCL10', 'CXCL12', 'CXCL14', 'CXCL2', 'CXCL5', 'CXCL6', 'CXCL9',
           'EDN1', 'GDF15', 'HLA-DMB', 'HLA-DQA2', 'HLA-DQB2', 'HLA-DRB5', 'ICAM1', 'IGF1',
           'IL1A', 'IL1RN', 'IL23A', 'IL33', 'ITGB2', 'KNG1', 'LAMC2', 'LPL', 'MMRN2',
           'PECAM1', 'POSTN', 'PRG4', 'PTN', 'PTPRC', 'RETN', 'S100A12', 'SAA1', 'SLAMF7',
           'SLPI', 'TFF2', 'THBS2', 'THY1', 'TNC', 'VCAN']

receptors = ['SELE', 'EGFR', 'FPR1', 'LPL', 'CD19', 'ITGB2', 'VSIG4', 'CCR7', 'ACKR1', 'SDC1',
             'TLR2', 'ITGB5', 'TLR4', 'PDCD1', 'CD86', 'CD247', 'CD4', 'SELL', 'CD27', 'CD28',
             'CTLA4', 'CFB', 'ITGA1', 'CD93', 'CSF3R', 'CSF2RA', 'CXCR4', 'EDNRB', 'ADGRL4',
             'ERBB2', 'LAG3', 'IL2RA', 'ITGB4', 'IL1R2', 'IL23R', 'LILRB2', 'IL1RL1', 'ICAM1',
             'THY1', 'CLEC14A', 'PECAM1', 'MRC1', 'SLAMF7']

base_directory = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/Samples/'
output_directory = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/Ligand_Neighbors/'

# Iterate over each sample folder
for sample_folder in os.listdir(base_directory):
    sample_path = os.path.join(base_directory, sample_folder)
    
    # Check if it's a folder
    if not os.path.isdir(sample_path):
        continue

    ligand_table = []

    # Process each ligand CSV in the current sample
    for ligand in ligands:
        print(f"Processing ligand: {ligand} for sample: {sample_folder}")

        receptor_csv_path = os.path.join(sample_path, f"{ligand}_receptors_{sample_folder}.csv")

        if not os.path.exists(receptor_csv_path):
            print(f"File for {ligand} not found in sample {sample_folder}.")
            continue

        receptor_df = pd.read_csv(receptor_csv_path)

        # Ensure coordinate columns exist
        if not {'x_centroid', 'y_centroid'}.issubset(receptor_df.columns):
            print(f"Missing coordinates for ligand {ligand} in sample {sample_folder}. Skipping.")
            continue

        # Count ligand-positive cells
        ligand_cell_count = receptor_df[ligand].sum() if ligand in receptor_df else 0

        # Handle NaN in coordinates
        receptor_df['z'] = receptor_df['z'].fillna(0) if 'z' in receptor_df.columns else 0
        receptor_df[['x_centroid', 'y_centroid', 'z']] = receptor_df[['x_centroid', 'y_centroid', 'z']].fillna(0)

        # Filter cells with ligand == True
        data_img = receptor_df.loc[receptor_df[ligand] == True, ['x_centroid', 'y_centroid', 'z']]

        if data_img.empty:
            print(f"No true ligand cells for {ligand} in sample {sample_folder}.")
            ligand_row = [ligand, ligand_cell_count, 0] + [0] * len(receptors)
            ligand_table.append(ligand_row)
            continue

        # Build BallTree and query neighbors
        kdt = BallTree(receptor_df[['x_centroid', 'y_centroid', 'z']], metric='euclidean')
        ind = kdt.query_radius(data_img, r=40, return_distance=False)

        # Count total neighbors and receptor-specific neighbors
        total_neighbors_per_ligand_cell = [max(len(neighbors) - 1, 0) for neighbors in ind]
        avg_total_neighbors = np.mean(total_neighbors_per_ligand_cell) if total_neighbors_per_ligand_cell else 0

        receptor_neighbor_counts_per_ligand_cell = []
        for receptor in receptors:
            if receptor not in receptor_df.columns:
                receptor_neighbor_counts_per_ligand_cell.append(0)
                continue

            receptor_positive_cells = receptor_df[receptor] == True
            receptor_indices = receptor_df.index[receptor_positive_cells].tolist()
            receptor_counts_per_ligand_cell = [
                sum(neighbor in receptor_indices for neighbor in neighbors) - 1 for neighbors in ind
            ]
            avg_receptor_neighbors = np.mean(receptor_counts_per_ligand_cell) if receptor_counts_per_ligand_cell else 0
            receptor_neighbor_counts_per_ligand_cell.append(max(avg_receptor_neighbors, 0))

        ligand_row = [ligand, ligand_cell_count, avg_total_neighbors] + receptor_neighbor_counts_per_ligand_cell
        ligand_table.append(ligand_row)

    # Save results for the current sample
    columns = ['Ligand Name', 'Ligand Cell Count', 'Average Total Neighbors per Ligand Cell'] + [f'Average {receptor} Neighbors per Ligand Cell' for receptor in receptors]
    ligand_table_df = pd.DataFrame(ligand_table, columns=columns)

    # Ensure output folder exists
    output_sample_path = os.path.join(output_directory, f"{sample_folder}")
    os.makedirs(output_sample_path, exist_ok=True)

    output_file_path = os.path.join(output_sample_path, f"Ligand_Stats_with_receptors_{sample_folder}.csv")
    ligand_table_df.to_csv(output_file_path, index=False)

    print(f"Ligand table with receptor neighbors for sample {sample_folder} saved to {output_file_path}")


In [None]:
ligand_table_df

In [None]:
import os
import pandas as pd

# Define the directory containing the CSV files
root_dir = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/Ligand_Neighbors'  # Change this to the folder path


# Function to process each CSV file
def process_csv(file_path):
    try:
        # Load the CSV file as a DataFrame
        df = pd.read_csv(file_path)
        
        # Select columns that match the pattern 'Average ... Neighbors per Ligand Cell'
        pattern = r'^Average .+ Neighbors per Ligand Cell$'
        selected_columns = df.filter(regex=pattern)

        # Extract gene names by removing the prefix 'Average ' and the suffix ' Neighbors per Ligand Cell'
        gene_names = selected_columns.columns.str.replace(r'^Average ', '', regex=True)
        gene_names = gene_names.str.replace(r' Neighbors per Ligand Cell$', '', regex=True)

        # Replace the selected columns in the original DataFrame with the modified columns (gene names)
        df[gene_names] = selected_columns
        df = df.drop(selected_columns.columns, axis=1)  # Drop the original columns with old names

        # Print the modified DataFrame to check
        print(f"Modified DataFrame for {file_path}:")
        print(df.head())  # Show the first few rows to check

        # Save the updated CSV file if needed
        df.to_csv(file_path, index=False)  # Uncomment this line to save the modified file

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Walk through all files in the directory and subdirectories
for root, dirs, files in os.walk(root_dir):
    for file in files:
        if file.endswith('.csv'):
            file_path = os.path.join(root, file)
            process_csv(file_path)


In [None]:
adata.obs['sample'].unique().tolist()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/Ligand_Neighbors/s1r1_b/Ligand_Stats_with_receptors_s1r1_b.csv')

# Create a list of ligand-receptor pairs
ligand_receptor_pairs = [
    ('ANPEP', 'SELE'), ('ANXA1', 'EGFR'), ('ANXA1', 'FPR1'), ('APOA5', 'LPL'),
    ('C3', 'CD19'), ('C3', 'ITGB2'), ('C3', 'VSIG4'), ('CAMP', 'EGFR'),
    ('CCL19', 'CCR7'), ('CCL5', 'ACKR1'), ('CCL5', 'SDC1'), ('CCN1', 'TLR2'),
    ('CCN1', 'ITGB5'), ('CCN1', 'TLR4'), ('CCN1', 'ITGB2'), ('CD274', 'PDCD1'),
    ('CD28', 'CD86'), ('CD28', 'CD247'), ('CD28', 'CD4'), ('CD34', 'SELL'),
    ('CD34', 'SELE'), ('CD70', 'CD27'), ('CD86', 'CD28'), ('CD86', 'CTLA4'),
    ('CFH', 'CFB'), ('CFH', 'SELL'), ('COL16A1', 'ITGA1'), ('COL17A1', 'ITGA1'),
    ('COL5A2', 'SDC1'), ('COL5A2', 'CD93'), ('COL5A2', 'ITGA1'), ('COL8A1', 'ITGA1'),
    ('CSF3', 'CSF3R'), ('CSF3', 'CSF2RA'), ('CXCL1', 'ACKR1'), ('CXCL10', 'ACKR1'),
    ('CXCL12', 'CXCR4'), ('CXCL12', 'CD4'), ('CXCL14', 'CXCR4'), ('CXCL2', 'ACKR1'),
    ('CXCL5', 'ACKR1'), ('CXCL6', 'ACKR1'), ('CXCL9', 'ACKR1'), ('EDN1', 'EDNRB'),
    ('EDN1', 'ADGRL4'), ('EDN1', 'EGFR'), ('GDF15', 'ERBB2'), ('HLA-DMB', 'CD4'),
    ('HLA-DQA2', 'LAG3'), ('HLA-DQA2', 'CD4'), ('HLA-DQB2', 'LAG3'), ('HLA-DRB5', 'LAG3'),
    ('HLA-DRB5', 'CD4'), ('ICAM1', 'EGFR'), ('ICAM1', 'IL2RA'), ('ICAM1', 'ITGB2'),
    ('IGF1', 'ITGB4'), ('IL1A', 'IL1R2'), ('IL1RN', 'IL1R2'), ('IL23A', 'IL23R'),
    ('IL23A', 'LILRB2'), ('IL33', 'IL1RL1'), ('ITGB2', 'ICAM1'), ('ITGB2', 'THY1'),
    ('KNG1', 'ITGB2'), ('KNG1', 'CD93'), ('LAMC2', 'ITGB4'), ('LPL', 'SDC1'),
    ('MMRN2', 'CD93'), ('MMRN2', 'CLEC14A'), ('PECAM1', 'PECAM1'), ('POSTN', 'ITGB5'),
    ('PRG4', 'TLR2'), ('PRG4', 'TLR4'), ('PTN', 'SDC1'), ('PTPRC', 'MRC1'),
    ('PTPRC', 'CD247'), ('PTPRC', 'CD4'), ('RETN', 'TLR4'), ('S100A12', 'TLR4'),
    ('SAA1', 'FPR1'), ('SAA1', 'TLR2'), ('SLAMF7', 'SLAMF7'), ('SLPI', 'CD4'),
    ('TFF2', 'CXCR4'), ('THBS2', 'SDC1'), ('THY1', 'ITGB2'), ('TNC', 'SDC1'),
    ('TNC', 'EGFR'), ('VCAN', 'EGFR'), ('VCAN', 'SELL'), ('VCAN', 'TLR2')
]

# Create a DataFrame that only contains the ligand-receptor pairs you are interested in
heatmap_data = []

for ligand, receptor in ligand_receptor_pairs:
    if ligand in df['Ligand Name'].values and receptor in df.columns:
        value = df.loc[df['Ligand Name'] == ligand, receptor].values[0]
        heatmap_data.append([ligand, receptor, value])

# Convert to DataFrame
heatmap_df = pd.DataFrame(heatmap_data, columns=['Ligand', 'Receptor', 'Interaction'])

# Pivot the DataFrame to get ligands as rows and receptors as columns
heatmap_df_pivot = heatmap_df.pivot(index='Ligand', columns='Receptor', values='Interaction')

# Plot the heatmap
plt.figure(figsize=(25, 20))
sns.heatmap(heatmap_df_pivot, cmap='coolwarm', annot=True, fmt='.2f', cbar=True)
plt.title('Ligand-Receptor Interaction Heatmap')
plt.tight_layout()
plt.show()


In [None]:
# List samples where 'status.3' is 'perio'
healthy_samples = adata.obs.loc[adata.obs['status.3'] == 'healthy', 'sample'].unique().tolist()
print(healthy_samples)

In [None]:
# List samples where 'status.3' is 'perio'
perio_samples = adata.obs.loc[adata.obs['status.3'] == 'perio', 'sample'].unique().tolist()
print(perio_samples)

In [None]:
import os
import pandas as pd

# Directory containing the subfolders
base_dir = "/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/Ligand_Neighbors/"

# Ligand-receptor pairs of interest
ligand_receptor_pairs = [
    ('ANPEP', 'SELE'),  ('ANXA1', 'EGFR'),  ('ANXA1', 'FPR1'),  ('APOA5', 'LPL'),   ('C3', 'CD19'),   ('C3', 'ITGB2'),  ('C3', 'VSIG4'),
    ('CAMP', 'EGFR'),  ('CCL19', 'CCR7'),  ('CCL5', 'ACKR1'),  ('CCL5', 'SDC1'),  ('CCN1', 'TLR2'),  ('CCN1', 'ITGB5'),  ('CCN1', 'TLR4'),
    ('CCN1', 'ITGB2'), ('CD274', 'PDCD1'), ('CD28', 'CD86'), ('CD28', 'CD247'), ('CD28', 'CD4'), ('CD34', 'SELL'), ('CD34', 'SELE'),
    ('CD70', 'CD27'), ('CD86', 'CD28'), ('CD86', 'CTLA4'),  ('CFH', 'CFB'),  ('CFH', 'SELL'),  ('COL16A1', 'ITGA1'),  ('COL17A1', 'ITGA1'),
    ('COL5A2', 'SDC1'), ('COL5A2', 'CD93'), ('COL5A2', 'ITGA1'), ('COL8A1', 'ITGA1'), ('CSF3', 'CSF3R'), ('CSF3', 'CSF2RA'),  ('CXCL1', 'ACKR1'),
    ('CXCL10', 'ACKR1'), ('CXCL12', 'CXCR4'), ('CXCL12', 'CD4'), ('CXCL14', 'CXCR4'), ('CXCL2', 'ACKR1'), ('CXCL5', 'ACKR1'),  ('CXCL6', 'ACKR1'),
    ('CXCL9', 'ACKR1'), ('EDN1', 'EDNRB'), ('EDN1', 'ADGRL4'), ('EDN1', 'EGFR'), ('GDF15', 'ERBB2'), ('HLA-DMB', 'CD4'),  ('HLA-DQA2', 'LAG3'),
    ('HLA-DQA2', 'CD4'), ('HLA-DQB2', 'LAG3'), ('HLA-DRB5', 'LAG3'), ('HLA-DRB5', 'CD4'), ('ICAM1', 'EGFR'), ('ICAM1', 'IL2RA'),  ('ICAM1', 'ITGB2'),
    ('IGF1', 'ITGB4'),  ('IL1A', 'IL1R2'), ('IL1RN', 'IL1R2'), ('IL23A', 'IL23R'), ('IL23A', 'LILRB2'), ('IL33', 'IL1RL1'),  ('ITGB2', 'ICAM1'),
    ('ITGB2', 'THY1'),  ('KNG1', 'ITGB2'),  ('KNG1', 'CD93'),  ('LAMC2', 'ITGB4'),  ('LPL', 'SDC1'),  ('MMRN2', 'CD93'),  ('MMRN2', 'CLEC14A'),
    ('PECAM1', 'PECAM1'),  ('POSTN', 'ITGB5'),  ('PRG4', 'TLR2'),  ('PRG4', 'TLR4'),  ('PTN', 'SDC1'),  ('PTPRC', 'MRC1'),  ('PTPRC', 'CD247'),
    ('PTPRC', 'CD4'),  ('RETN', 'TLR4'),  ('S100A12', 'TLR4'),  ('SAA1', 'FPR1'),  ('SAA1', 'TLR2'),  ('SLAMF7', 'SLAMF7'),  ('SLPI', 'CD4'),
    ('TFF2', 'CXCR4'),  ('THBS2', 'SDC1'), ('THY1', 'ITGB2'), ('TNC', 'SDC1'),  ('TNC', 'EGFR'),  ('VCAN', 'EGFR'),  ('VCAN', 'SELL'),  ('VCAN', 'TLR2')
]

# Define your custom sample order
custom_sample_order = [
    's1r2', 's1r6', 's2r1_HV203', 's2r1_HV207', 's2r7_a', 's2r7_b', 's2r9_a', 's2r9_b', 
    'HV171A', 'HV191A_A', 'HV191A_B', 'HV205A', 'HV205B', 'HV220A_A', 'HV220A_B',
    's1r1_a', 's1r1_b', 's1r3', 's1r5', 's2r2_HV137', 's2r2_HV184', 's2r3', 's2r4_a', 's2r4_b', 
    's2r5', 's2r8_HV185', 's2r8_HV188_a', 's2r8_HV188_b', 's2r8_HV211', 'HV137A', 'HV140A_A', 
    'HV140A_B', 'HV140A_C', 'HV140A_D', 'HV160C_A', 'HV160C_B', 'HV192A_A', 'HV192A_B', 'HV192A_C', 'HV198B_A', 'HV198B_B', 
    'HV219AB_A', 'HV219AB_B', 'HV219AS_A', 'HV219AS_B', 'HV219AS_C'
]

# Initialize a results DataFrame with ligand-receptor combinations as the index
index_pairs = [f"{ligand}-{receptor}" for ligand, receptor in ligand_receptor_pairs]
results = pd.DataFrame(index=index_pairs)

# Collect sample data in a dictionary for sorting later
sample_data_dict = {}

for subfolder_name in os.listdir(base_dir):
    subfolder_path = os.path.join(base_dir, subfolder_name)
    
    if os.path.isdir(subfolder_path):
        # Find any CSV file in the subfolder
        csv_files = [f for f in os.listdir(subfolder_path) if f.endswith(".csv")]
        
        if csv_files:
            sample_csv = os.path.join(subfolder_path, csv_files[0])  # Use the first CSV found
            
            # Read the CSV file (including "Ligand Cell Count")
            data = pd.read_csv(sample_csv, index_col=0)
            
            # Check if the "Ligand Cell Count" is at least 100
            if data['Ligand Cell Count'].max() >= 100:
                # Prepare data for the current sample
                sample_data = {}

                # For each ligand-receptor pair, check if the ligand cell count is >= 100
                for ligand, receptor in ligand_receptor_pairs:
                    # Ensure both ligand and receptor exist
                    count = (
                        data.loc[ligand, receptor] if ligand in data.index and receptor in data.columns else pd.NA
                    )
                    # If the ligand cell count is less than 100, set the count to NaN (pd.NA)
                    if data.loc[ligand, 'Ligand Cell Count'] < 100:
                        count = pd.NA

                    sample_data[f"{ligand}-{receptor}"] = count

                sample_data_dict[subfolder_name] = sample_data

# Add the data in the custom order
for sample_name in custom_sample_order:
    if sample_name in sample_data_dict:
        results[sample_name] = pd.Series(sample_data_dict[sample_name])

# Save the output to an Excel file
output_path = "/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Ligand-Receptor/Ligand_Neighbors-per-sample/Ligand-neighbors-per-sample.csv"
results.to_csv(output_path, index=True)
print(f"Data saved successfully to {output_path}")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Reset to default Matplotlib style
plt.style.use("default")

# Replace problematic values and ensure mask is boolean
results = results.apply(pd.to_numeric, errors='coerce')
nan_mask = results.isna()

plt.figure(figsize=(30, 20))

sns.heatmap(
    results,
    cmap="YlGnBu",
    annot=True,
    fmt=".2f",
    vmax=20,
    cbar_kws={'label': 'Interaction Count'},
    mask=nan_mask,
    linewidths=0.5,
    linecolor="white"
)

grey_patch = plt.Line2D([0], [0], marker='o', color='w', label='NaN',
                         markerfacecolor='lightgrey', markersize=10)
plt.legend(handles=[grey_patch], loc='upper right')

plt.title("Ligand-Receptor Interaction Heatmap")
plt.xlabel("Samples")
plt.ylabel("Ligand-Receptor Pairs")
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

In [None]:
# Healthy samples
healthy_samples = [
      's1r2', 's1r6', 's2r1_HV203', 's2r1_HV207', 's2r7_a', 's2r7_b', 's2r9_a', 's2r9_b', 
    'HV171A', 'HV191A_A', 'HV191A_B', 'HV205A', 'HV205B', 'HV220A_A', 'HV220A_B',  
]

# Perio samples
perio_samples = [
     's1r1_a', 's1r1_b', 's1r3', 's1r5', 's2r2_HV137', 's2r2_HV184', 's2r3', 's2r4_a', 's2r4_b', 
    's2r5', 's2r8_HV185', 's2r8_HV188_a', 's2r8_HV188_b', 's2r8_HV211', 'HV137A', 'HV140A_A', 
    'HV140A_B', 'HV140A_C', 'HV140A_D', 'HV160C_A', 'HV160C_B', 'HV192A_A', 'HV192A_B', 'HV192A_C', 'HV198B_A', 'HV198B_B', 
    'HV219AB_A', 'HV219AB_B', 'HV219AS_A', 'HV219AS_B', 'HV219AS_C'  # Replace with your actual sample names
]

# Output the lists
print("Healthy Samples:", healthy_samples)
print("Perio Samples:", perio_samples)


In [None]:
# Assuming results_grouped is your DataFrame, and the columns are your samples
healthy_data = results[healthy_samples]
perio_data = results[perio_samples]


In [None]:
from scipy import stats
import pandas as pd
from statsmodels.stats.multitest import multipletests

# Healthy samples
healthy_samples = [
    's1r2', 's1r6', 's2r1_HV203', 's2r1_HV207', 's2r7_a', 's2r7_b', 's2r9_a', 's2r9_b', 
    'HV171A', 'HV191A_A', 'HV191A_B', 'HV205A', 'HV205B', 'HV220A_A', 'HV220A_B',  
]

# Perio samples
perio_samples = [
    's1r1_a', 's1r1_b', 's1r3', 's1r5', 's2r2_HV137', 's2r2_HV184', 's2r3', 's2r4_a', 's2r4_b', 
    's2r5', 's2r8_HV185', 's2r8_HV188_a', 's2r8_HV188_b', 's2r8_HV211', 'HV137A', 'HV140A_A', 
    'HV140A_B', 'HV140A_C', 'HV140A_D', 'HV160C_A', 'HV160C_B', 'HV192A_A', 'HV192A_B', 'HV192A_C', 'HV198B_A', 'HV198B_B', 
    'HV219AB_A', 'HV219AB_B', 'HV219AS_A', 'HV219AS_B', 'HV219AS_C'  # Replace with your actual sample names
]

# Assuming `results` is the DataFrame that contains the ligand-receptor expression data
# Extract Healthy and Perio data columns using .loc or directly by column names
healthy_data = results[healthy_samples]  # Accessing columns based on the list
perio_data = results[perio_samples]  # Same here

# Initialize an empty list to store results
stats_results = []  # Renamed to avoid conflict with the DataFrame

# Loop through each row (ligand-receptor pair) in the DataFrame
for index, row in results.iterrows():  # 'index' is the ligand-receptor pair
    # Extract the expression values for the current ligand-receptor pair
    healthy_expression = healthy_data.loc[index]  # Get Healthy expression values
    perio_expression = perio_data.loc[index]  # Get Perio expression values

    # Perform t-test to compare Healthy and Perio samples
    t_stat, p_value = stats.ttest_ind(healthy_expression, perio_expression)

    # Store the result in the stats_results list
    stats_results.append({
        'Ligand-Receptor Pair': index,  # Store ligand-receptor pair name
        'T-statistic': t_stat,
        'P-value': p_value
    })

# Convert stats_results list to a DataFrame
stats_results_df = pd.DataFrame(stats_results)

# Correct p-values using Bonferroni correction (for multiple comparisons)
stats_results_df['Corrected P-value'] = multipletests(stats_results_df['P-value'], method='bonferroni')[1]

# Print the results DataFrame
print(stats_results_df)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Plot p-values for each ligand-receptor pair
plt.figure(figsize=(20, 8))
sns.barplot(x='Ligand-Receptor Pair', y='Corrected P-value', data=stats_results_df)
plt.xticks(rotation=90)
plt.title('P-values for Ligand-Receptor Pairs: Healthy vs Perio')
plt.show()

In [None]:
print(results)

In [None]:
print(ligand_table_df.columns)

In [None]:
merged = sc.read_h5ad('/data/vasileiosionat2/Xenium/Integration/NEW_CITE_ seq_files_ 24.12.23/Xenium_citeSeq_harmony.h5ad')

In [None]:
merged.obs['Lvl4'].unique().tolist()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Filter Xenium assay cells from the merged object
xen_obj = merged[merged.obs['assay'] == 'Xenium'].copy()  # Ensure a copy to avoid warnings

# Define the Lvl4 column as string type
xen_obj.obs['Lvl4'] = xen_obj.obs['Lvl4'].astype('str')

# Compute cell counts for all categories without filtering
celltype_counts = pd.DataFrame(xen_obj.obs.groupby(['Lvl4', 'citeSeq_to_Xenium_label']).size()).unstack()
celltype_counts.columns = celltype_counts.columns.droplevel()
celltype_counts.index.name = 'Xenium cell type'
celltype_counts.columns.name = 'Predicted citeSeq cell type'
celltype_counts = celltype_counts.T

# Normalize row-wise and handle NaNs
celltype_counts = celltype_counts.div(celltype_counts.sum(axis=1), axis=0).fillna(0)

# Specify clusters to plot (leave analysis intact)
# Define filters
lvl4_clusters_to_plot = ['B', 'Pl.1', 'Pl.2', 'PB', 'Th', 'Th_proximity_to_B', 'Treg', 'Tc', 'NK', 'T.IL7Rhi', 'T_proximity_to_Fib', 'T.IE',
                'Mac', 'Mast', 'Mono', 'Mac.Neut.Mix', 'cDC1', 'Lang', 'cDC2', 'mregDC', 'pDC']  # Replace with your desired Lvl4 categories
citeseq_clusters_to_plot = ['B Atypical', 'B Memory 1', 'B Memory 2', 'B Naive', 'Plasma 1', 'Plasma 2', 
                   'Plasma 3', 'Plasmablast', 'CD4 IL17A+ FOXP3+', 'CD4 T naive', 'CD4 TCM', 'CD4 TFH',
                  'CD4 TFreg', 'CD4 TH17', 'CD4 Treg', 'CD8 T', 'CD8 T IKZF2+', 'CD8 T TNF+', 'CD8 T naive', 'CD8 TRM', 'CD16+ NK', 'NK', 'T gd',
                  'ILC', 'MAIT', 'Macrophage', 'Mast', 'Monocyte', 'Monocyte/Macrophage', 'Neutrophil', 'cDC1', 'cDC2', 'mregDC', 'pDC']  # Replace with your desired citeSeq labels


# Filter for plotting only
filtered_counts = celltype_counts.loc[
    citeseq_clusters_to_plot, lvl4_clusters_to_plot
]

# Plot heatmap for selected clusters
plt.figure(figsize=(12, 10))
sns.heatmap(filtered_counts, cmap='YlGnBu', fmt=".2f", cbar=True)
plt.title("Xenium vs Predicted scRNAseq Heatmap (Selected Clusters)")
# Save the plot as TIFF file
plt.savefig('/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Xenium_Citeseq_clusters.svg', format='svg')
plt.show()

In [None]:
adata.obs.columns.unique().tolist()

In [None]:
adata.obs['niche_cc14'].unique().tolist()

In [None]:
adata.obs['Lvl4'].unique().tolist()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = ['#e1e1e1', 'yellow', '#7F00FF', 'red', 'magenta']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=3,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/niches.svg'
lvl1_cluster_colors = {'Lymphoid': 'red', 'Plasma': '#7F00FF', 'Crevicular': 'yellow', 'Plasma-Fib CT' : 'magenta'}
lvl1_cluster_key = 'niche_knn50k10_merged'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = ['#e1e1e1', 'yellow', '#7F00FF', 'red']
        alpha_value = 0.1  # Transparency level (adjust as needed)
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                alpha=alpha_value,  # Set transparency
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/niches_faint.svg'
lvl1_cluster_colors = {'Lymphoid': 'red', 'Plasma': '#7F00FF', 'Crevicular': 'yellow'}
lvl1_cluster_key = 'niche_cc14'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = ['#e1e1e1', '#FFFDD0', 'purple', '#F88379']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/niches_faint.svg'
lvl1_cluster_colors = {'Lymphoid': '#F88379', 'Plasma': 'purple', 'Crevicular': '#FFFDD0'}
lvl1_cluster_key = 'niche_cc14'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#808080')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#808080', '#0000FF',  'red', 'magenta', '#39e75f', 'cyan',]
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Xenium_B-Pl.svg'
lvl1_cluster_colors = {'B' : 'magenta', 'pDC' : 'cyan', 'PB': 'red', 'Pl.1': '#0000FF', 'Pl.2': '#0000FF', 'Fib.2': '#39e75f'}
lvl1_cluster_key = 'Lvl4'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#808080')

        # Color priority order: Yellow -> Blue -> Green
        color_order = ['#808080', 'yellow', 'purple', '#008000', 'red', 'cyan']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Xenium_T-Fib.svg'
lvl1_cluster_colors = {'Treg' : 'red', 'mregDC' : 'cyan', 'Tc': 'yellow', 'Th_proximity_to_B': 'purple'}
lvl1_cluster_key = 'Lvl4'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
merged.obs.columns.unique().tolist()

In [None]:
merged.obs['citeSeq_to_Xenium_label'].unique().tolist()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#808080')

        # Color priority order: Yellow -> Blue -> Green
        color_order = ['#808080', 'yellow', 'purple', 'red', 'orange', 'cyan', 'magenta']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Citeseq_T-Fib.svg'
lvl1_cluster_colors = {'CD4 TFreg' : 'orange', 'mregDC' : 'cyan', 'CD4 IL17A+ FOXP3+': 'purple',  'CD4 T naive': 'purple','CD4 Treg': 'red', 'CD4 TCM': 'purple', 'CD4 T naive': 'purple', 'CD8 T': 'yellow', 'CD8 T IKZF2+': 'yellow', 'CD8 T naive': 'yellow',
                       'CD8 T TNF+': 'yellow', 'CD8 TRM': 'yellow' , 'CD4 TFH': 'magenta'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', '#39e75f']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Fib.2.svg'
lvl1_cluster_colors = {'Fibroblast 2' : '#39e75f'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', '#39e75f']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/TFH.svg'
lvl1_cluster_colors = {'CD4 TFH' : '#39e75f'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', '#39e75f']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/TFReg.svg'
lvl1_cluster_colors = {'CD4 TFreg' : '#39e75f'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', '#39e75f']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/mregDC.svg'
lvl1_cluster_colors = {'mregDC' : '#39e75f'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', '#39e75f']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Plasma.svg'
lvl1_cluster_colors = {'Plasma 1' : '#39e75f', 'Plasma 2' : '#39e75f', 'Plasma 3' : '#39e75f'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', '#39e75f']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/PB.svg'
lvl1_cluster_colors = {'Plasmablast' : '#39e75f'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'

generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', '#39e75f']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Bnaive.svg'
lvl1_cluster_colors = {'B Naive' : '#39e75f'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', '#39e75f']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Bmem.svg'
lvl1_cluster_colors = {'B Memory 1' : '#39e75f', 'B Memory 2' : '#39e75f'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', '#39e75f']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Batyp.svg'
lvl1_cluster_colors = {'B Atypical' : '#39e75f'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', '#39e75f']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/pDC.svg'
lvl1_cluster_colors = {'pDC' : '#39e75f'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
merged.obs['citeSeq_to_Xenium_label'].unique().tolist()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = merged[merged.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', '#39e75f']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/cMyel.svg'
lvl1_cluster_colors = {'Cycling myeloid' : '#39e75f'}
lvl1_cluster_key = 'citeSeq_to_Xenium_label'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
merged.obs['citeSeq_to_Xenium_label'].unique().tolist()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Filter Xenium data
xen_obj = merged[merged.obs['assay'] == 'Xenium']
xen_obj.obs['Lvl4'] = xen_obj.obs['Lvl4'].astype('str')

# Group by Xenium and predicted CITE-seq cell types, count occurrences
celltype_counts = pd.DataFrame(
    xen_obj.obs.groupby(['Lvl4', 'citeSeq_to_Xenium_label']).size()
).unstack()

# Clean DataFrame
celltype_counts.columns = celltype_counts.columns.droplevel()
celltype_counts.index.name = 'Xenium cell type'
celltype_counts.columns.name = 'Predicted CITE-seq cell type'
celltype_counts = celltype_counts.T

# Normalize rows by their sums
celltype_counts = celltype_counts.div(celltype_counts.sum(axis=1), axis=0)
celltype_counts = celltype_counts.loc[:, celltype_counts.idxmax(axis=0).sort_values().index].fillna(0)

# Compute column sums to identify significant columns
column_sums = celltype_counts.abs().sum(axis=0)

# Threshold for important Xenium clusters
columns_to_keep = column_sums[column_sums >= 0.1].index
celltype_counts2 = celltype_counts[columns_to_keep]

# Specify desired CITE-seq clusters to visualize on the y-axis
desired_cite_clusters = ['CD4 TFreg', 'mregDC', 'CD4 TFH', 'B Naive', 'B Memory 1', 'B Memory 2', 'Plasma 1', 'Plasma 2', 'Plasma 3', 
                         'Plasmablast', 'pDC', 'B Atypical']  # Replace with your actual cluster names
celltype_counts2 = celltype_counts2.loc[celltype_counts2.index.intersection(desired_cite_clusters)]

# Exclude Xenium Lvl4 clusters starting with "VEC" or "Ep" on the x-axis
exclude_patterns = ('VEC', 'Ep', 'T.', 'T_', 'F', 'Tc', 'L', 'Tr', 'NK', 'SM', 'M', 'cD')
columns_to_exclude = [col for col in celltype_counts2.columns if col.startswith(exclude_patterns)]
celltype_counts2 = celltype_counts2.drop(columns=columns_to_exclude)

# Plot heatmap
plt.figure(figsize=(5, 4))
sns.heatmap(celltype_counts2, cmap='YlGnBu', annot=False, fmt=".2f", linewidths=.5, 
            cbar_kws={'label': 'Co-occurrence Frequency'})
plt.title("Filtered Co-occurrence Frequencies Between Xenium and Predicted CITE-seq Clusters")
plt.xlabel("Xenium Cell Types (Lvl4)")
plt.ylabel("Predicted CITE-seq Cell Types")
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
# Save the plot
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/citeseq-to-xen.svg'
fig = plt.gcf()  # Get the current figure
plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'black']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/ThB.svg'
lvl1_cluster_colors = {'Th_proximity_to_B' : 'black'}
lvl1_cluster_key = 'Lvl4'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'black']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Th.svg'
lvl1_cluster_colors = {'Th_proximity_to_B' : 'black', 'Th' : 'black'}
lvl1_cluster_key = 'Lvl4'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'black']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Tc.svg'
lvl1_cluster_colors = {'Tc' : 'black'}
lvl1_cluster_key = 'Lvl4'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'black']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/B.svg'
lvl1_cluster_colors = {'B' : 'black'}
lvl1_cluster_key = 'Lvl4'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'black']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/mregDC.svg'
lvl1_cluster_colors = {'mregDC' : 'black'}
lvl1_cluster_key = 'Lvl4'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'black']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Pl.svg'
lvl1_cluster_colors = {'Pl.1' : 'black', 'Pl.2' : 'black'}
lvl1_cluster_key = 'Lvl4'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'black']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/PB.svg'
lvl1_cluster_colors = {'PB' : 'black'}
lvl1_cluster_key = 'Lvl4'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'black']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Fib2.svg'
lvl1_cluster_colors = {'Fib.2' : 'black'}
lvl1_cluster_key = 'Lvl4'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'black']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/pDC.svg'
lvl1_cluster_colors = {'pDC' : 'black'}
lvl1_cluster_key = 'Lvl4'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'black']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Treg.svg'
lvl1_cluster_colors = {'Treg' : 'black'}
lvl1_cluster_key = 'Lvl4'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'yellow', 'red', '#7F00FF']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/niches.svg'
lvl1_cluster_colors = {'Lymphoid': 'red', 'Plasma': '#7F00FF', 'Crevicular': 'yellow'}
lvl1_cluster_key = 'niche_cc14'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
adata_perio = adata[adata.obs['status.3'] == "perio"]

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata_perio[adata_perio.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'yellow', 'red', '#7F00FF', 'magenta']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=8,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        #plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/niches.svg'
lvl1_cluster_colors = {'Lymphoid': 'red', 'Plasma': '#7F00FF', 'Crevicular': 'yellow', 'Plasma-Fib CT': 'magenta'}
lvl1_cluster_key = 'perio_niche_knn50k10'
generate_spatial_plot(adata_perio, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata_perio[adata_perio.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'yellow', 'red', '#7F00FF', 'magenta']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=10,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r2_HV184']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_perio_niches.svg'
lvl1_cluster_colors = {'Lymphoid': 'red', 'Plasma': '#7F00FF', 'Crevicular': 'yellow', 'Plasma-Fib CT': 'magenta'}
lvl1_cluster_key = 'perio_niche_knn50k10'
generate_spatial_plot(adata_perio, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
print(df['perio_niche_knn50k10'])

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert AnnData obs to DataFrame
df = pd.DataFrame(adata_perio.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts for all clusters and niches
niche_groups = df.groupby(['Lvl4', 'niche_knn50k10_merged']).size().unstack(fill_value=0)

# Define cluster (y-axis) and niche (x-axis) orders
cluster_order = ['B', 'Th', 'Th_proximity_to_B', 'mregDC', 'Pl.1', 'Pl.2', 'PB', 'Fib.2', 'Mac', 'MyoF']
niche_order = ['Lymphoid', 'Plasma', 'Plasma-Fib CT']  # Niches to plot

# **Normalize using all niches, not just the selected ones**
niche_groups_total = niche_groups.sum(axis=1)  # Sum across ALL niches per cluster
niche_groups_relative = niche_groups.div(niche_groups_total, axis=0)  # Normalize

# **Filter only the selected niches for plotting**
niche_groups_relative_filtered = niche_groups_relative[niche_order]

# Reindex to ensure correct order
niche_groups_relative_filtered = niche_groups_relative_filtered.reindex(index=cluster_order, columns=niche_order)

# **Plot heatmap: Niches (x-axis) and Clusters (y-axis)**
plt.figure(figsize=(4, 5))
sns.heatmap(
    niche_groups_relative_filtered,  # Pre-filtered for plotting
    cmap='plasma',
    cbar_kws={'label': 'Relative Frequency'},
    linewidths=0.05, linecolor='black',
    vmax=0.5,
    vmin=0.05,
    xticklabels=niche_order,
    yticklabels=cluster_order
)
plt.title('Relative Abundance of Cell Clusters Across Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=90, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
.
# Save output
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_perio_clusters_niches.pdf'
fig = plt.gcf()
plt.savefig(output_path, dpi=300, format='pdf', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams.update(plt.rcParamsDefault)  # Reset to default settings
plt.rcParams['pdf.fonttype'] = 42  # Ensures text is stored as an actual font
plt.rcParams['ps.fonttype'] = 42  # If exporting to PostScript (PS) as well
# Convert AnnData obs to DataFrame
df = pd.DataFrame(adata_healthy.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts for all clusters and niches
niche_groups = df.groupby(['Lvl4', 'niche_knn50k10_merged']).size().unstack(fill_value=0)

# Define cluster (y-axis) and niche (x-axis) orders
cluster_order = ['B', 'Th', 'Th_proximity_to_B', 'Treg', 'Tc', 'NK', 'T.IL7Rhi', 
                 'T_proximity_to_Fib', 'cDC1', 'cDC2', 'mregDC', 'pDC', 'Pl.1', 'Pl.2', 'PB', 
                 'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5', 'Mac', 'Mast', 'Mono', 'MyoF']
niche_order = ['Epi-CT', 'Lymphoid', 'Plasma', 'Plasma-Fib CT', 'Fibrous CT']  # Niches to plot

# **Normalize using all niches, not just the selected ones**
niche_groups_total = niche_groups.sum(axis=1)  # Sum across ALL niches per cluster
niche_groups_relative = niche_groups.div(niche_groups_total, axis=0)  # Normalize

# **Filter only the selected niches for plotting**
niche_groups_relative_filtered = niche_groups_relative[niche_order]

# Reindex to ensure correct order
niche_groups_relative_filtered = niche_groups_relative_filtered.reindex(index=cluster_order, columns=niche_order)

# **Plot heatmap: Niches (x-axis) and Clusters (y-axis)**
plt.figure(figsize=(4, 5))
sns.heatmap(
    niche_groups_relative_filtered,  # Pre-filtered for plotting
    cmap='plasma',
    cbar_kws={'label': 'Relative Frequency'},
    linewidths=0.05, linecolor='black',
    vmax=0.5,
    vmin=0.05,
    xticklabels=niche_order,
    yticklabels=cluster_order
)
plt.title('Relative Abundance of Cell Clusters Across Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=90, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()

# Save output
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_healthy_clusters_niches_suppl.pdf'
fig = plt.gcf()
plt.savefig(output_path, dpi=300, format='pdf', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams.update(plt.rcParamsDefault)  # Reset to default settings
plt.rcParams['pdf.fonttype'] = 42  # Ensures text is stored as an actual font
plt.rcParams['ps.fonttype'] = 42  # If exporting to PostScript (PS) as well

# Convert AnnData obs to DataFrame
df = pd.DataFrame(adata_perio.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts for all clusters and niches
niche_groups = df.groupby(['Lvl4', 'niche_knn50k10_merged']).size().unstack(fill_value=0)

# Define cluster (y-axis) and niche (x-axis) orders
cluster_order = ['B', 'Th', 'Th_proximity_to_B', 'Treg', 'Tc', 'NK', 'T.IL7Rhi', 
                 'T_proximity_to_Fib', 'cDC1', 'cDC2', 'mregDC', 'pDC', 'Pl.1', 'Pl.2', 'PB', 
                 'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5', 'Mac', 'Mast', 'Mono', 'MyoF']
niche_order = ['Epi-CT', 'Lymphoid', 'Plasma', 'Plasma-Fib CT', 'Fibrous CT']  # Niches to plot

# **Normalize using all niches, not just the selected ones**
niche_groups_total = niche_groups.sum(axis=1)  # Sum across ALL niches per cluster
niche_groups_relative = niche_groups.div(niche_groups_total, axis=0)  # Normalize

# **Filter only the selected niches for plotting**
niche_groups_relative_filtered = niche_groups_relative[niche_order]

# Reindex to ensure correct order
niche_groups_relative_filtered = niche_groups_relative_filtered.reindex(index=cluster_order, columns=niche_order)

# **Plot heatmap: Niches (x-axis) and Clusters (y-axis)**
plt.figure(figsize=(4, 5))
sns.heatmap(
    niche_groups_relative_filtered,  # Pre-filtered for plotting
    cmap='plasma',
    cbar_kws={'label': 'Relative Frequency'},
    linewidths=0.05, linecolor='black',
    vmax=0.5,
    vmin=0.05,
    xticklabels=niche_order,
    yticklabels=cluster_order
)
plt.title('Relative Abundance of Cell Clusters Across Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=90, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()

# Save output
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_perio_clusters_niches_suppl.pdf'
fig = plt.gcf()
plt.savefig(output_path, dpi=300, format='pdf', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()


In [None]:
import pandas as pd

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert AnnData obs to DataFrame
df = pd.DataFrame(adata_healthy.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['Lvl4', 'niche_knn50k10_merged']).size().unstack(fill_value=0)

# Manually specify which clusters (x-axis) and niches (y-axis) to show, with order
selected_clusters = ['B', 'Th', 'Th_proximity_to_B', 'Treg', 'Tc', 'NK', 'T.IL7Rhi', 
                    'T_proximity_to_Fib', 'cDC1','cDC2', 'mregDC', 'pDC', 'Pl.1', 'Pl.2', 'PB', 
                    'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5', 'Mac', 'Mast', 'Mono', 'MyoF']
selected_niches = ['Lymphoid', 'Plasma', 'Plasma-Fib CT', 'Fibrous CT']  # Replace with actual niche names

# Ensure selected clusters and niches exist in the DataFrame
niche_groups_filtered = niche_groups.loc[
    niche_groups.index.intersection(selected_clusters),  # Use intersection() to filter selected clusters
    niche_groups.columns.intersection(selected_niches)   # Use intersection() to filter selected niches
]

# Specify the order of clusters and niches
cluster_order = ['B', 'Th', 'Th_proximity_to_B', 'mregDC', 'Pl.1', 'Pl.2', 'PB', 'Fib.2', 'MyoF',
                    'Mac', 'Mast']  # Adjust the order as needed

niche_order = ['Lymphoid', 'Plasma', 'Plasma-Fib CT']  # Adjust the order as needed

# Plot heatmap with niches on y-axis and clusters on x-axis using raw counts
plt.figure(figsize=(8, 3))  # Adjusted figsize for readability
sns.heatmap(
    niche_groups_filtered[niche_order].T[cluster_order], cmap='plasma',  # Apply niche_order on y-axis and cluster_order on x-axis
    cbar_kws={'label': 'Cell Count'}, 
    linewidths=0.05, linecolor='black',
    vmax=5000,
    xticklabels=cluster_order,  # Apply cluster_order on x-axis
    yticklabels=niche_order  # Apply niche_order on y-axis
)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Cell Cluster')
plt.ylabel('Niche')
plt.xticks(rotation=45, ha='right')  # Improved alignment
plt.yticks(rotation=0)
plt.tight_layout()  # Prevents clipping
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_perio_clusters_niches.svg'
fig = plt.gcf()  # Get current figure
#plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()


In [None]:
adata_healthy = adata[adata.obs['status.3'] == "healthy"]

In [None]:
adata.obs['niche_knn50k10_merged'].unique().tolist()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert AnnData obs to DataFrame
df = pd.DataFrame(adata_healthy.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts for all clusters and niches
niche_groups = df.groupby(['Lvl4', 'niche_knn50k10_merged']).size().unstack(fill_value=0)

# Define cluster (y-axis) and niche (x-axis) orders
cluster_order = ['B', 'Th', 'Th_proximity_to_B', 'mregDC', 'Pl.1', 'Pl.2', 'PB', 'Fib.2', 'Mac', 'MyoF']
niche_order = ['Lymphoid', 'Plasma', 'Plasma-Fib CT']  # Niches to plot

# **Normalize using all niches, not just the selected ones**
niche_groups_total = niche_groups.sum(axis=1)  # Sum across ALL niches per cluster
niche_groups_relative = niche_groups.div(niche_groups_total, axis=0)  # Normalize

# **Filter only the selected niches for plotting**
niche_groups_relative_filtered = niche_groups_relative[niche_order]

# Reindex to ensure correct order
niche_groups_relative_filtered = niche_groups_relative_filtered.reindex(index=cluster_order, columns=niche_order)

# **Plot heatmap: Niches (x-axis) and Clusters (y-axis)**
plt.figure(figsize=(4, 5))
sns.heatmap(
    niche_groups_relative_filtered,  # Pre-filtered for plotting
    cmap='plasma',
    cbar_kws={'label': 'Relative Frequency'},
    linewidths=0.05, linecolor='black',
    vmax=0.5,
    vmin=0.05,
    xticklabels=niche_order,
    yticklabels=cluster_order
)
plt.title('Relative Abundance of Cell Clusters Across Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=90, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()

# Save output
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_healthy_clusters_niches.pdf'
fig = plt.gcf()
plt.savefig(output_path, dpi=300, format='pdf', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np  # You need this for np.log1p

# Create DataFrame from adata.obs
df = pd.DataFrame(adata_healthy.obs)
df['niche_knn50k10_merged'] = df['niche_knn50k10_merged'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['niche_knn50k10_merged', 'Lvl4']).size().unstack(fill_value=0)

# Transpose the DataFrame
niche_groups = niche_groups.T

# Custom x-axis (niche) order
custom_order = ['Lymphoid', 'Plasma', 'Plasma-Fib CT','Fibrous CT']
niche_groups = niche_groups.reindex(columns=custom_order)

# Custom y-axis (cell type) selection
y_axis_clusters = ['B', 'Th', 'Th_proximity_to_B', 'mregDC', 'Pl.1', 'Pl.2', 'PB', 'Fib.2', 'Mac', 'MyoF']
niche_groups_filtered = niche_groups.loc[y_axis_clusters]

# Transpose so clusters are columns (x-axis), and niches are rows (y-axis)
niche_groups_flipped = niche_groups_filtered.T  # Now niches are rows, clusters are columns

# Apply log1p transformation
niche_groups_log = niche_groups_flipped.applymap(lambda x: np.log1p(x))

# Plot heatmap
plt.figure(figsize=(7, 5))
sns.heatmap(
    niche_groups_log,
    vmax=np.log1p(1000), 
    vmin=np.log1p(50),       # log(0+1)
    cmap='plasma',
    cbar_kws={'label': 'log(Count + 1)'},
    linewidths=0.05,
    linecolor='black'
)

plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Type')
plt.xticks(rotation=90, ha='right')
plt.yticks(rotation=0)

# Adjust colorbar ticks (log scale)
cbar = plt.gca().collections[0].colorbar
log_ticks = [np.log1p(500), np.log1p(1000), np.log1p(10000)]
cbar.set_ticks(log_ticks)
cbar.ax.set_yticklabels(['500', '1,000', '10,000'])

plt.tight_layout()
output_path = f"/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/IBEX_LV3_Clusters_Niches_Log.pdf"
#plt.savefig(output_path, format='pdf')
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert AnnData obs to DataFrame
df = pd.DataFrame(adata_healthy.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['Lvl4', 'niche_knn50k10_merged']).size().unstack(fill_value=0)

# Manually specify which clusters (x-axis) and niches (y-axis) to show, with order
selected_clusters = ['B', 'Th', 'Th_proximity_to_B', 'Treg', 'Tc', 'NK', 'T.IL7Rhi', 
                    'T_proximity_to_Fib', 'cDC1','cDC2', 'mregDC', 'pDC', 'Pl.1', 'Pl.2', 'PB', 
                    'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5', 'Mac', 'Mast', 'Mono', 'MyoF']
selected_niches = ['Lymphoid', 'Plasma', 'Plasma-Fib CT', 'Fibrous CT']  # Replace with actual niche names

# Ensure selected clusters and niches exist in the DataFrame
niche_groups_filtered = niche_groups.loc[
    niche_groups.index.intersection(selected_clusters),  # Use intersection() to filter selected clusters
    niche_groups.columns.intersection(selected_niches)   # Use intersection() to filter selected niches
]

# Specify the order of clusters and niches
cluster_order = ['B', 'Th', 'Th_proximity_to_B', 'mregDC', 'Pl.1', 'Pl.2', 'PB', 'Fib.2', 'MyoF',
                    'Mac', 'Mast']  # Adjust the order as needed

niche_order = ['Lymphoid', 'Plasma', 'Plasma-Fib CT']  # Adjust the order as needed

# Plot heatmap with niches on y-axis and clusters on x-axis using raw counts
plt.figure(figsize=(8, 3))  # Adjusted figsize for readability
sns.heatmap(
    niche_groups_filtered[niche_order].T[cluster_order], cmap='plasma',  # Apply niche_order on y-axis and cluster_order on x-axis
    cbar_kws={'label': 'Cell Count'}, 
    linewidths=0.05, linecolor='black',
    vmax=700,
    xticklabels=cluster_order,  # Apply cluster_order on x-axis
    yticklabels=niche_order  # Apply niche_order on y-axis
)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Cell Cluster')
plt.ylabel('Niche')
plt.xticks(rotation=45, ha='right')  # Improved alignment
plt.yticks(rotation=0)
plt.tight_layout()  # Prevents clipping
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_perio_clusters_niches.svg'
fig = plt.gcf()  # Get current figure
#plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()


In [None]:
adata = anndata.read_h5ad('/data/vasileiosionat2/Xenium/Drake_outputs/ccProcessed.h5ad')

In [None]:
adata.obs

In [None]:
merged = sc.read_h5ad('/data/vasileiosionat2/Xenium/Integration_2025_only_stromal/Modified_citeseq_files/Xenium_citeSeq_total_harmony.h5ad')

In [None]:
merged.obs

In [None]:
import anndata as ad

# Assuming adata_perio and merged_perio are already loaded
# Initialize a list to store the processed samples
merged_samples = []

# Get all unique samples
samples = adata.obs['sample'].unique()

for sample in samples:
    # Subset both adata_perio and merged_perio based on the current sample
    adata_sample_subset = adata[adata.obs['sample'] == sample]
    merged_sample_subset = merged[merged.obs['sample'] == sample]

    # Sort by the index (this ensures the rows align correctly)
    adata_sample_subset = adata_sample_subset[adata_sample_subset.obs.sort_index().index]
    merged_sample_subset = merged_sample_subset[merged_sample_subset.obs.sort_index().index]

    # Transfer the columns from adata_perio to merged_perio
    merged_sample_subset.obs['niche_knn50k10'] = adata_sample_subset.obs['niche_knn50k10']
    merged_sample_subset.obs['niche_knn50k10_merged'] = adata_sample_subset.obs['niche_knn50k10_merged']

    # Add the modified sample to the list
    merged_samples.append(merged_sample_subset)

# After processing all samples, concatenate them into one AnnData object
combined = merged_samples[0].concatenate(*merged_samples[1:], join='outer')

# Now, 'combined' holds the result with the transferred columns for all samples

In [None]:
combined_perio  = combined[combined.obs['status.3'].str.contains('perio', na=False)].copy()

In [None]:
combined_perio.obs['niche_knn50k10_merged'].unique().tolist()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert AnnData obs to DataFrame
df = pd.DataFrame(combined_perio.obs)
df['final_label_citeSeq'] = df['final_label_citeSeq'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['final_label_citeSeq', 'niche_knn50k10_merged']).size().unstack(fill_value=0)

# Manually specify which clusters (y-axis) and niches (x-axis) to show, with order
selected_clusters = ['B Atypical', 'B Memory 1', 'B Memory 2', 'B Naive', 'Plasma 1', 'Plasma 2', 
                   'Plasma 3', 'Plasmablast', 'CD4 TFH',
                  'CD4 TFreg', 'mregDC', 'pDC', 'Fibroblast 1', 'Fibroblast 2', 'Endothelial 2', 'Endothelial 1', 'Endothelial 3', 'Endothelial 4', 'Endothelial 5']
selected_niches = ['Lymphoid', 'Plasma', 'Plasma-Fib CT']  # Replace with actual niche names

# Ensure selected clusters and niches exist in the DataFrame
niche_groups_filtered = niche_groups.loc[
    niche_groups.index.intersection(selected_clusters),  
    niche_groups.columns.intersection(selected_niches)   
]

# Calculate relative frequencies
niche_groups_total = niche_groups.sum(axis=1)  # Sum across all niches for each cluster
niche_groups_relative = niche_groups.div(niche_groups_total, axis=0)  # Compute relative frequencies

# Filter the relative frequencies
niche_groups_relative_filtered = niche_groups_relative.loc[
    niche_groups_relative.index.intersection(selected_clusters),
    niche_groups_relative.columns.intersection(selected_niches)
]

# Define custom order
cluster_order = ['B Atypical', 'B Memory 1', 'B Memory 2', 'B Naive', 'CD4 TFH', 'CD4 TFreg',  'mregDC', 'Plasmablast', 'Plasma 1', 'Plasma 2',
                   'Plasma 3',  'Fibroblast 2']

niche_order = ['Lymphoid', 'Plasma', 'Plasma-Fib CT']

# Apply custom order safely
niche_groups_relative_filtered = niche_groups_relative_filtered.reindex(index=cluster_order, columns=niche_order)

# Plot heatmap with clusters on y-axis and niches on x-axis
plt.figure(figsize=(2.8, 4))  # Adjusted for better readability
sns.heatmap(
    niche_groups_relative_filtered, cmap='plasma',  
    cbar_kws={'label': 'Relative Frequency'}, 
    linewidths=0.05, linecolor='black',
    vmax=0.6,
    xticklabels=niche_order,  # Niches on x-axis
    yticklabels=cluster_order  # Clusters on y-axis
)
plt.title('Cell Type Distribution in Niches - Periodontitis')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()

# Save figure
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Citeseq_perio_clusters_niches.svg'
fig = plt.gcf()
plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Filter Xenium assay cells from the merged object
xen_obj = merged[merged.obs['assay'] == 'Xenium'].copy()  # Ensure a copy to avoid warnings

# Define the Lvl4 column as string type
xen_obj.obs['Lvl4'] = xen_obj.obs['Lvl4'].astype('str')

# Compute cell counts for all categories without filtering
celltype_counts = pd.DataFrame(xen_obj.obs.groupby(['Lvl4', 'citeSeq_to_Xenium_label']).size()).unstack()
celltype_counts.columns = celltype_counts.columns.droplevel()
celltype_counts.index.name = 'Xenium cell type'
celltype_counts.columns.name = 'Predicted citeSeq cell type'
celltype_counts = celltype_counts.T

# Normalize row-wise and handle NaNs
celltype_counts = celltype_counts.div(celltype_counts.sum(axis=1), axis=0).fillna(0)

# Specify clusters to plot (leave analysis intact)
# Define filters
lvl4_clusters_to_plot = ['B', 'PB', 'Pl.1', 'Pl.2', 'Th', 'Th_proximity_to_B', 'mregDC', 'Fib.1',  'Fib.2', 'Fib.5']  # Replace with your desired Lvl4 categories
citeseq_clusters_to_plot = ['B Atypical', 'B Memory 1', 'B Memory 2', 'B Naive', 'Plasmablast', 'Plasma 1', 'Plasma 2', 
                   'Plasma 3', 'CD4 TFH','CD4 TFreg', 'mregDC', 'Fibroblast 2']  # Replace with your desired citeSeq labels


# Filter for plotting only
filtered_counts = celltype_counts.loc[
    citeseq_clusters_to_plot, lvl4_clusters_to_plot
]

# Plot heatmap for selected clusters
plt.figure(figsize=(7, 4))
sns.heatmap(filtered_counts, cmap='plasma', fmt=".2f", cbar=True, linewidths=0.04, linecolor='black',)
plt.title("Xenium vs Predicted scRNAseq Heatmap (Selected Clusters)")
# Save the plot as TIFF file
plt.savefig('/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure7/Xenium_Citeseq_clusters.pdf', format='pdf')
plt.show()

In [None]:
import scanpy as sc
import matplotlib.pyplot as plt

plt.rcParams['pdf.fonttype'] = 42  # Embed TrueType fonts (best for Illustrator)
plt.rcParams['ps.fonttype'] = 42

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = [ 'B', 'Th', 'Th_proximity_to_B', 'mregDC', 'Pl.1', 'Pl.2', 'PB', 'Fib.2', 'Mac', 'MyoF']

# Define the subset of genes you want to plot in the desired order
correct_order = [
   'BANK1', 'HLA-DMB', 
     'TRAC', 'CD4',
    'LAMP3', 'CCR7',
     'MZB1',
    'MDM2', 
     'STAT3',
    'UBE2C', 'MKI67',   'VCAN', 'CXCL12', 'CCL19', 'C3',
    'CD14', 'CD163', 'STEAP4', 'ACTA2', 
   
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl4'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl4',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8,
      figsize=(5, 5)
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_max=0.8,
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_Lvl4_TLS_dotplot.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)


In [None]:
import scanpy as sc

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = [ 'NK', 'Th', 'Th_proximity_to_B', 'T.IL7Rhi', 'Tc', 'Treg',  'T.IE', 'T_proximity_to_Fib']

# Define the subset of genes you want to plot in the desired order
correct_order = [
    'NKG7', 'GNLY', 'DUSP2', 
    'KLRD1', 'PRF1', 'IL7R', 
    'CD2', 'TRAC', 'CD4',
    'PTPRC', 'NLRC5', 'CD3E',
    'CD69', 'SLAMF1', 'CD28', 'CD83', 'LAMP3', 'CCR7', 'IL7R', 
    'MZB1', 'TENT5C', 'CCL5', 
    'GZMK', 'GZMA', 'CD8A',
    'CTLA4', 'FOXP3', 'IL2RA',
    'KRT5', 'KRT19', 'C1R', 'VCAN'
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata_copy[adata_copy.obs['Lvl4'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl4',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8                       # Set a max value for the scale
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Tcell_Lvl4_dotplot.pdf'
#dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)


In [None]:
import scanpy as sc

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = [ 'NK', 'Th', 'Th_proximity_to_B', 'T.IL7Rhi', 'Tc', 'Treg',  'T.IE', 'T_proximity_to_Fib']

# Define the subset of genes you want to plot in the desired order
correct_order = [
    'NKG7', 'GNLY', 'DUSP2', 
    'KLRD1', 'PRF1', 'IL7R', 
    'CD2', 'TRAC', 'CD4',
    'PTPRC', 'NLRC5', 'CD3E',
    'CD69', 'SLAMF1', 'CD28', 
    'MZB1', 'TENT5C', 'CCL5', 
    'GZMK', 'GZMA', 'CD8A',
    'CTLA4', 'FOXP3', 'IL2RA',
    'KRT5', 'KRT19', 'C1R', 'VCAN'
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl4'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl4',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8                       # Set a max value for the scale
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Tcell_Lvl4_dotplot.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)


In [None]:
import scanpy as sc

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = [ 'B', 'Pl.2', 'Pl.1', 'PB']

# Define the subset of genes you want to plot in the desired order
correct_order = [
    'MS4A1', 'BANK1', 'HLA-DMB', 
    'PTPRC', 'IRF8', 'CD19', 
    'SELL', 'CD83', 'CXCR4', 'CD79A', 'MZB1',
    'IGHA1', 'TENT5C', 'SEC11C', 'DERL3', 'SLAMF7', 'MDM2', 
    'TNFRSF17', 'ICAM1',
    'MEF2C', 'TCF4', 'CD27',
    'CYTIP', 'BCL2L11', 'STAT3',
    'UBE2C', 'MKI67']

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl4'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl4',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8                       # Set a max value for the scale
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Bcell_Lvl4_dotplot.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)

In [None]:
import scanpy as sc

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = ['Lang', 'Mac', 'cDC1', 'Mono', 'cDC2', 'Mast', 'mregDC', 'pDC']

# Define the subset of genes you want to plot in the desired order
correct_order = [
    'CD207', 'HLA-DQB2', 'FCER1A', 'CD1A', 'HLA-DMB', 'HLA-DQA2', 'CD14', 
    'CD163', 'MS4A4A', 'MS4A6A', 'MPEG1', 'FGL2', 'CLEC7A', 'ANPEP', 'CSF2RA', 'FCN1', 'VCAN', 'CCR2',
    'ITGB2', 'CLEC10A', 'CD1C', 'CPA3',
    'MS4A2', 'KIT', 'IL1RL1', 'CD83', 'LAMP3', 'CCR7', 'IL7R', 'IRF8', 'IL3RA', 'GZMB', 'MZB1']

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl4'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl4',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8                       # Set a max value for the scale
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Myeloid_Lvl4_dotplot.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)

In [None]:
import scanpy as sc

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = ['Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5', 
         'MyoF', 'SMC',  'VEC.1', 'VEC.2', 'VEC.3', 'VEC.4', 'VEC.5', 
         'LEC' ]

# Define the subset of genes you want to plot in the desired order
correct_order = [ 'COL16A1', 'VCAN', 'CXCL12', 'C1R', 'PRRX1', 'C3', 'CFB', 'MZB1',
    'IGHA1', 'PDGFRA', 'APCDD1', 'LTBP2', 'TBX3', 'ITGB5', 'BMP4', 'PDGFRB', 'STEAP4', 'ACTA2', 'MYH11', 'MYLK', 'VWF', 'CD34', 'PECAM1', 'SELE', 'ACKR1', 'ICAM1', 
    'DNASE1L3', 'EDNRB', 'APCDD1', 'TCIM', 'EDN1', 'SOX17', 'BTNL9',  
     'MMRN1', 'PROX1', 'PDPN']

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl4'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl4',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8                       # Set a max value for the scale
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Stromal_Lvl4_dotplot.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)

In [None]:
import scanpy as sc

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = ['Neur',  'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5', 
         'MyoF', 'SMC',  'VEC.1', 'VEC.2', 'VEC.3', 'VEC.4', 'VEC.5', 
         'LEC' ]

# Define the subset of genes you want to plot in the desired order
correct_order = ['PCSK2', 'ITGB4', 'PMP22', 'SEMA3C', 'DST', 
    'COL16A1', 'VCAN', 'CXCL12', 'C1R', 'PRRX1', 'C3', 'CFB', 'MZB1',
    'IGHA1', 'PDGFRA', 'APCDD1', 'LTBP2', 'TBX3', 'ITGB5', 'BMP4', 'PDGFRB', 'STEAP4', 'ACTA2', 'TFPI', 
    'SH2D3C', 'MYH11', 'MYLK', 'VWF', 'CD34', 'PECAM1', 'CLEC14A',  'MCF2L', 'SELE', 'ACKR1', 'ICAM1', 
    'DNASE1L3', 'EDNRB', 'KRT5', 'TCIM', 'GNG11', 'EDN1', 'SOX17', 'BTNL9',  
     'MMRN1', 'PROX1', 'PDPN', 'ANGPT2']

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl4'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl4',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8                       # Set a max value for the scale
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Stromal_Lvl4_dotplot.pdf'
#dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)

In [None]:
print(adata.raw)  # Check if raw counts are stored
print(adata.layers.keys())  # See if there's a 'normalized' or 'log' layer

In [None]:
import scanpy as sc

# Define the subset of clusters you want to include and their desired order
clusters_of_interest = ['Ep.B.1',  'Ep.B.2', 'Ep.prol', 'Ep.B-PB', 'Ep.PB.1', 'Ep.PB.2', 'Ep.PB.3', 'Ep.Sp', 'Ep.K', 
                        'Ep.Cr.Sp.1', 'Ep.Cr.Sp.2', 'Ep.Cr.B', 'Ep.Cr.PB' ]

# Define the subset of genes you want to plot in the desired order
correct_order = [ 
    'KRT5',  'EHF', 'CLEC7A', 'COL17A1', 'DST', 'ITGB4', 'CXCL14', 'C1R', 'IL1R2',
    'CAVIN1', 'PDPN', 'BASP1', 'SLC26A2', 'MYC', 'CFH', 'PTN',
    'MKI67', 'CDK1', 'AQP3', 'KRT19', 'FGFBP1', 'S100A16', 'LY6D', 'SDC1',
    'CYP4B1', 'MAMDC2', 'SERPINB3', 'NOD2', 'SLPI', 'IL1RN', 'ANXA1', 'ERBB2',
    'CNFN', 'IL36G',  'SERPINB2', 'C15orf48', 'IL36A',
    'ODAM', 'ODAPH', 'LAMC2', 'CXCL1', 'CXCL2', 'CXCL6', 'IL1A',
    'DNASE1L3', 'SAA1', 'SAA2', 'CFHR3'
]

# Subset the AnnData object to include only the clusters of interest
adata_subset = adata[adata.obs['Lvl4'].isin(clusters_of_interest)].copy()

# Ensure clusters are ordered as specified
adata_subset.obs['Lvl4'] = adata_subset.obs['Lvl4'].astype(
    pd.CategoricalDtype(categories=clusters_of_interest, ordered=True)
)

# Create the DotPlot
dotplot = sc.pl.DotPlot(
    adata_subset,
    var_names=correct_order,       # Genes on the x-axis
    groupby='Lvl4',                # Clusters on the y-axis
    standard_scale='var',          # Apply standard scaling across variables
    vmin=0.2,
    vmax=0.8                       # Set a max value for the scale
)

# Transpose the axes using the swap_axes method
dotplot = dotplot.swap_axes(swap_axes=True)

# Modify the style for grayscale and remove outlines
dotplot = dotplot.style(
    cmap="Greys",               # Use grayscale colormap
    dot_edge_color=None,        # Remove dot outlines
    dot_edge_lw=0,              # No line width for edges
    grid=False,                 # Optional: Disable grid lines
    dot_min=0.1,                # Minimum dot size
    dot_max=0.8                 # Maximum dot size
)

# Show the plot
dotplot.show()

# Save the plot as SVG with a transparent background
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_Epi_Lvl4_dotplot.pdf'
dotplot.savefig(output_path, dpi=300, bbox_inches='tight', transparent=True)

In [None]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("/data/vasileiosionat2/Xenium/Xenium_area_calculation/expandedArea_cellLevel_metadata_HP_VT.csv")

# Calculate total area per sample
total_area = df.groupby("sample", as_index=False)["Region_Area"].sum()
total_area["Region"] = "Total"

# Append the new rows to the original DataFrame
df = pd.concat([df, total_area], ignore_index=True)

# Define the region order
region_order = {"Epi": 1, "CT": 2, "Total": 3}

# Sort the dataframe by Sample and Region order
df["Region_order"] = df["Region"].map(region_order)
df = df.sort_values(["sample", "Region_order"]).drop(columns="Region_order")

# Save the updated CSV
df.to_csv("/data/vasileiosionat2/Xenium/Xenium_area_calculation/expandedArea_cellLevel_metadata_HP_VT_new.csv", index=False)

print(df)


In [None]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("/data/vasileiosionat2/Xenium/Xenium_area_calculation/expandedArea_cellLevel_metadata_HP_VT_new.csv")
# Filter rows where Region column contains only 'CT' and 'Epi'
Total_df = df[df['Region'].isin(['Total'])]
Epi_df = df[df['Region'].isin(['Epi'])]
CT_df = df[df['Region'].isin(['CT'])]

# Write the filtered dataframe to a new CSV file
Total_df.to_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Total_area_merged_calculations.csv', index=False)
Epi_df.to_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Epi_area_merged_calculations.csv', index=False)
CT_df.to_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_CT_area_merged_calculations.csv', index=False)

print(Total_df)
print(Epi_df)
print(CT_df)

In [None]:
adata.obs['PatientID_Atlas']=adata.obs['pt.id']

In [None]:
# Ensure 'PatientID_Atlas' is a string type
adata.obs['PatientID_Atlas'] = adata.obs['PatientID_Atlas'].astype(str)

# Define the replacement dictionary
replacement_dict = {
'HV171' : 'HC-1',
'HV180' : 'HC-2',
'HV189'	: 'HC-3',
'HV191' : 'HC-4',
'HV203' : 'HC-5',
'HV207' : 'HC-6',
'HV220' : 'HC-7',
'HV187' : 'HC-8',
'HV205' : 'HC-9',
'HV137' : 'CP-1',
'HV140'	: 'CP-2',
'HV154' : 'CP-3',
'HV160' : 'CP-4',
'HV181' : 'CP-5',
'HV196' : 'CP-6',
'HV204' : 'CP-7',
'HV214' : 'CP-8',
'HV219' : 'CP-9',
'HV184' : 'CP-10',
'HV185' : 'CP-11',
'HV188' : 'CP-12',
'HV192' : 'CP-13',
'HV198' : 'CP-14',
'HV211' : 'CP-15',
    
}

# Replace only the specified categories in 'T_leiden2'
adata.obs['PatientID_Atlas'] = adata.obs['PatientID_Atlas'].astype(str)  # Ensure it's string
adata.obs['PatientID_Atlas'] = adata.obs['PatientID_Atlas'].replace(replacement_dict)

# Check the unique values in 'T_leiden2' to confirm
print(adata.obs['PatientID_Atlas'].unique())


In [None]:
# Ensure 'PatientID_Atlas' is a string type
adata.obs['PatientID_Atlas'] = adata.obs['PatientID_Atlas'].astype(str)

# Define the replacement dictionary
replacement_dict = {
'HC-1': 'H-1',
'HC-2': 'H-2',
'HC-3': 'H-3',
'HC-4': 'H-4',
'HC-5': 'H-5',
'HC-6': 'H-6',
'HC-7': 'H-7',
'HC-8': 'H-8',
'HC-9': 'H-9',
'CP-1': 'P-1',
'CP-2': 'P-2',
'CP-3': 'P-3',
'CP-4': 'P-4',
'CP-5': 'P-5',
'CP-6': 'P-6',
'CP-7': 'P-7',
'CP-8': 'P-8',
'CP-9': 'P-9',
'CP-10': 'P-10',
'CP-11': 'P-11',
'CP-12': 'P-12',
'CP-13': 'P-13',
'CP-14': 'P-14',
'CP-15': 'P-15',
    
}

# Replace only the specified categories in 'T_leiden2'
adata.obs['PatientID_Atlas'] = adata.obs['PatientID_Atlas'].astype(str)  # Ensure it's string
adata.obs['PatientID_Atlas'] = adata.obs['PatientID_Atlas'].replace(replacement_dict)

# Check the unique values in 'T_leiden2' to confirm
print(adata.obs['PatientID_Atlas'].unique())

In [None]:
adata.obsunique().tolist()

In [None]:
adata.obs['sample'].unique().tolist()

In [None]:
#generation of stats table excluding samples HV160D, HV171B (small detached tissues) and HV140A2 (poor orientation with a bias for epithelium)
annotation = "Lvl1"
loc = "region"
status = "status.3"
sample_id = "sample"
patient_id= "PatientID_Atlas"
meta = adata.obs

# Get unique combinations of sample_id, annotation, status, loc
unique_combinations = meta[[patient_id, sample_id, annotation, status, loc]].drop_duplicates().astype(str)

# Initialize result table
result_table = pd.DataFrame(columns=['sample', 'patient', 'status', 'loc', 'annotation', 'total.cells'])
result_table = result_table.sort_values(by=['annotation', 'status']).reset_index(drop=True)

# Iterate through unique combinations
for _, row in unique_combinations.iterrows():
    current_sample_id = row[sample_id]
    current_patient_id= row[patient_id]
    current_annotation = row[annotation]
    current_status = row[status]
    current_loc = row[loc]

    # Filter data based on annotation and sample_id (no need for section_id here)
    current_meta = meta[(meta[annotation] == current_annotation) & (meta[sample_id] == current_sample_id)]

    # Ensure 'current_meta' has data
    if current_meta.empty:
        print(f"No data for {current_annotation} in location {current_loc}. Skipping.")
        continue

    # Calculate total cells
    total_cells = len(current_meta)

    # Add new row to the DataFrame with total cells
    new_row = pd.DataFrame({'sample': [current_sample_id],
                            'patient': [current_patient_id],  # Assuming 'sample_id' is the patient id
                            'status': [current_status],
                            'loc': [current_loc],
                            'annotation': [current_annotation],
                            'total.cells': [total_cells]})
    
    result_table = pd.concat([result_table, new_row], ignore_index=True)

# Sort result table
result_table = result_table.sort_values(by=['annotation', 'status']).reset_index(drop=True)

# Ensure numeric values for total cells
result_table['total.cells'] = pd.to_numeric(result_table['total.cells'], errors='coerce')

# Group by patient, annotation, and status
grouped = result_table.groupby(['patient', 'sample', 'annotation', 'status']).agg(
    total_cells_pt=('total.cells', 'sum')
).reset_index()


# Filter out unwanted samples from the grouped table
exclude_samples = ['s2r9_a', 's2r9_b', 'HV140A_C', 'HV140A_D', 's2r4_a', 's2r4_b', 's2r5', 's2r8_HV188_a', 's2r8_HV188_b', 's2r8_HV211', 
                       'HV192A_A', 'HV192A_B','HV192A_C']
grouped_filtered = grouped[~grouped['sample'].isin(exclude_samples)]

# Save the filtered result to CSV
grouped_filtered.to_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl1_total_cells_filtered.csv')

# Show the filtered result
print(grouped_filtered)


# Show the grouped result
print(grouped)

In [None]:
import pandas as pd

# Load the CSV files into DataFrames
df1 = pd.read_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl1_total_cells_filtered.csv')
df2 = pd.read_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Total_area_merged_calculations.csv')

# Merge the tables on the 'sample' column
df_merged = pd.merge(df1, df2[['sample', 'Region_Area']], on='sample', how='left')

# Convert Region_Area to mm²
df_merged['Region_Area_mm2'] = df_merged['Region_Area'] / 1000  # divide by 1,000,000 to convert to mm²
df_merged['cells_per_mm2'] = df_merged['total_cells_pt'] / df_merged['Region_Area_mm2']
df_merged.to_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl1_total_cells_per_area(Section).csv')

# Check the first few rows to confirm
print(df_merged.head())

In [None]:
import pandas as pd

# Load the CSV files into DataFrames
df1 = pd.read_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl1_total_cells_filtered.csv')
df2 = pd.read_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Total_area_merged_calculations.csv')

# Merge the tables on the 'sample' column
df_merged2 = pd.merge(df1, df2[['sample', 'Region_Area']], on='sample', how='left')

# Convert Region_Area to mm²
df_merged2['Region_Area_mm2'] = df_merged2['Region_Area'] / 1000

# Group by patient, annotation, and sample and sum the total_cells_pt
df_merged2 = df_merged2.groupby(['patient', 'annotation', 'status']).agg({
    'total_cells_pt': 'sum',
    'Region_Area': 'sum',
    'Region_Area_mm2': 'sum'
}).reset_index()

# Cells per area calculation
df_merged2['cells_per_mm2'] = df_merged2['total_cells_pt'] / df_merged2['Region_Area_mm2']

# Save the result to a CSV file
df_merged2.to_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl1_total_cells_per_area(Patient).csv')

# Display the merged DataFrame
df_merged2

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl1_total_cells_per_area(Patient).csv')

# Define the specific order of major_cluster categories and corresponding colors
anticipated_order = ['Other', 'Epithelial', 'Immune', 'Fibroblast', 'Vascular']
colors = {
    'Other': '#e5e5e5',
    'Epithelial': '#FFEA00',
    'Immune': '#008000',
    'Fibroblast': '#0000FF',
    'Vascular': '#990F0FFF',
}

# Pivot the data so that each 'patientID' has columns for each 'lvl1_spatial_cluster'
df_pivot = df.pivot_table(index='patient', columns='annotation', values='cells_per_mm2', aggfunc='sum')

# Reorder the columns (clusters) based on the anticipated_order
df_pivot = df_pivot[anticipated_order]

# Manually specify the order of patients
manual_order = ['H-1','H-2','H-4','H-5','H-6','H-7','H-8', 'H-9',
                'P-1', 'P-2', 'P-3', 'P-4', 'P-6', 'P-7', 'P-8', 'P-9', 'P-10', 'P-11', 'P-14']

# Reorder the DataFrame rows based on the manual order
df_pivot = df_pivot.loc[manual_order]

# Plot the stacked bar plot with adjusted bar width
plt.figure(figsize=(20, 8))
df_pivot.plot(kind='bar', stacked=True, figsize=(20, 10), color=[colors[cluster] for cluster in anticipated_order], width=0.9)  # Adjust the width

# Format the plot
plt.gca().set_facecolor('white')
plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{int(x)}'))  # Format y-axis to show integer counts
plt.gca().spines['top'].set_visible(False)  # Remove top border
plt.gca().spines['right'].set_visible(False)  # Remove right border
plt.gca().spines['bottom'].set_color('black')  # Set bottom border to black
plt.gca().spines['left'].set_color('black')  # Set left border to black

plt.xlabel('Patient ID')
plt.ylabel('Total Cells')
plt.xticks(fontsize=20, rotation=45, ha='right')  # Adjust font size and rotation
plt.yticks(fontsize=20)

# Custom legend handles and labels
sorted_legend_handles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[label], markersize=10) 
                         for label in anticipated_order]
plt.legend(sorted_legend_handles, anticipated_order, loc='upper left', bbox_to_anchor=(1, 1), frameon=False)

# Adjust layout and save the plot
plt.tight_layout()
plt.savefig('/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure1/Xenium_Lvl1_barplot_per_patient(cells_per_area).pdf', format='pdf')
plt.show()

In [None]:
#generation of stats table excluding samples HV160D, HV171B (small detached tissues) and HV140A2 (poor orientation with a bias for epithelium)
annotation = "Lvl2.5"
loc = "region"
status = "status.3"
sample_id = "sample"
patient_id= "PatientID_Atlas"
meta = adata.obs

# Get unique combinations of sample_id, annotation, status, loc
unique_combinations = meta[[patient_id, sample_id, annotation, status, loc]].drop_duplicates().astype(str)

# Initialize result table
result_table = pd.DataFrame(columns=['sample', 'patient', 'status', 'loc', 'annotation', 'total.cells'])
result_table = result_table.sort_values(by=['annotation', 'status']).reset_index(drop=True)

# Iterate through unique combinations
for _, row in unique_combinations.iterrows():
    current_sample_id = row[sample_id]
    current_patient_id= row[patient_id]
    current_annotation = row[annotation]
    current_status = row[status]
    current_loc = row[loc]

    # Filter data based on annotation and sample_id (no need for section_id here)
    current_meta = meta[(meta[annotation] == current_annotation) & (meta[sample_id] == current_sample_id)]

    # Ensure 'current_meta' has data
    if current_meta.empty:
        print(f"No data for {current_annotation} in location {current_loc}. Skipping.")
        continue

    # Calculate total cells
    total_cells = len(current_meta)

    # Add new row to the DataFrame with total cells
    new_row = pd.DataFrame({'sample': [current_sample_id],
                            'patient': [current_patient_id],  # Assuming 'sample_id' is the patient id
                            'status': [current_status],
                            'loc': [current_loc],
                            'annotation': [current_annotation],
                            'total.cells': [total_cells]})
    
    result_table = pd.concat([result_table, new_row], ignore_index=True)

# Sort result table
result_table = result_table.sort_values(by=['annotation', 'status']).reset_index(drop=True)

# Ensure numeric values for total cells
result_table['total.cells'] = pd.to_numeric(result_table['total.cells'], errors='coerce')

# Group by patient, annotation, and status
grouped = result_table.groupby(['patient', 'sample', 'annotation', 'status']).agg(
    total_cells_pt=('total.cells', 'sum')
).reset_index()


# Filter out unwanted samples from the grouped table
exclude_samples = ['s2r9_a', 's2r9_b', 'HV140A_C', 'HV140A_D', 's2r4_a', 's2r4_b', 's2r5', 's2r8_HV188_a', 's2r8_HV188_b', 's2r8_HV211', 
                       'HV192A_A', 'HV192A_B','HV192A_C']
grouped_filtered = grouped[~grouped['sample'].isin(exclude_samples)]

# Save the filtered result to CSV
grouped_filtered.to_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl2.5_total_cells_filtered.csv')

# Show the filtered result
print(grouped_filtered)


# Show the grouped result
print(grouped)

In [None]:
import pandas as pd

# Load the CSV files into DataFrames
df1 = pd.read_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl2.5_total_cells_filtered.csv')
df2 = pd.read_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Total_area_merged_calculations.csv')

# Merge the tables on the 'sample' column
df_merged = pd.merge(df1, df2[['sample', 'Region_Area']], on='sample', how='left')

# Convert Region_Area to mm²
df_merged['Region_Area_mm2'] = df_merged['Region_Area'] / 1000  # divide by 1,000,000 to convert to mm²
df_merged['cells_per_mm2'] = df_merged['total_cells_pt'] / df_merged['Region_Area_mm2']
df_merged.to_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl2.5_total_cells_per_area(Section).csv')

# Check the first few rows to confirm
print(df_merged.head())

In [None]:
import pandas as pd

# Load the CSV files into DataFrames
df1 = pd.read_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl2.5_total_cells_filtered.csv')
df2 = pd.read_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Total_area_merged_calculations.csv')

# Merge the tables on the 'sample' column
df_merged2 = pd.merge(df1, df2[['sample', 'Region_Area']], on='sample', how='left')

# Convert Region_Area to mm²
df_merged2['Region_Area_mm2'] = df_merged2['Region_Area'] / 1000

# Group by patient, annotation, and sample and sum the total_cells_pt
df_merged2 = df_merged2.groupby(['patient', 'annotation', 'status']).agg({
    'total_cells_pt': 'sum',
    'Region_Area': 'sum',
    'Region_Area_mm2': 'sum'
}).reset_index()

# Cells per area calculation
df_merged2['cells_per_mm2'] = df_merged2['total_cells_pt'] / df_merged2['Region_Area_mm2']

# Save the result to a CSV file
df_merged2.to_csv('/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl2.5_total_cells_per_area(Patient).csv')

# Display the merged DataFrame
df_merged2

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
from statsmodels.stats.multitest import multipletests

# Load your data
file_path = '/data/vasileiosionat2/Xenium/Xenium_area_calculation/Xenium_Lvl2.5_total_cells_per_area(Patient).csv'  # Update with your actual file path
df = pd.read_csv(file_path)  # Adjust delimiter if necessary

# Filter relevant columns
df = df[['annotation', 'status', 'cells_per_mm2', 'patient']]  # Include 'patient' for uniqueness

# Get unique annotations
annotations = df['annotation'].unique()

# Define number of rows and columns for the subplot grid
n_rows = 4
n_cols = (len(annotations) + n_rows - 1) // n_rows  # Calculate number of columns based on number of annotations

# Set up the figure with subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 2.5, n_rows * 2))
axes = axes.flatten()  # Flatten the axes array to easily index

# Store p-values for multiple testing correction
p_values = []

# Iterate over each annotation and plot
for idx, annotation in enumerate(annotations):
    # Filter data for the current annotation
    df_annotation = df[df['annotation'] == annotation]
    
    # Perform t-test for 'status' groups (Healthy vs Perio) in the current annotation
    healthy_data = df_annotation[df_annotation['status'] == 'healthy']['cells_per_mm2']
    perio_data = df_annotation[df_annotation['status'] == 'perio']['cells_per_mm2']
    
    # Perform t-test
    t_stat, p_val = stats.ttest_ind(healthy_data, perio_data, equal_var=False)  # Welch's t-test
    p_values.append(p_val)
    # Define the desired order
    order = ["healthy", "perio"]

    # Plotting on the corresponding subplot axis
    sns.boxplot(data=df_annotation, x="status", y="cells_per_mm2", 
            palette={"healthy": "#006A8E", "perio": "#B1283A"}, width=0.7, 
            fliersize=0, dodge=True, order=order, ax=axes[idx])  

    # Add individual data points (jittered for visibility)
    sns.stripplot(data=df_annotation, x="status", y="cells_per_mm2", 
              palette={"healthy": "black", "perio": "black"}, dodge=True,
              jitter=True, alpha=0.7, marker="o", edgecolor="black", 
              linewidth=0.1, order=order, ax=axes[idx])

    # Set aesthetics for the subplot
    axes[idx].set_xlabel("")
    axes[idx].set_ylabel("Cells per mm²")
    axes[idx].set_title(f"{annotation}")
    axes[idx].legend(title="", loc="upper right")
    axes[idx].grid(axis="y", linestyle="--", alpha=0.7)

# Correct p-values for multiple comparisons using Bonferroni
_, corrected_p_values, _, _ = multipletests(p_values, method='fdr_bh')

# Display corrected p-values on the plots
for idx, annotation in enumerate(annotations):
    # Correct p-values
    p_val = corrected_p_values[idx]
    axes[idx].text(0.5, 0.95, f'P = {p_val:.3f}', transform=axes[idx].transAxes, ha='center', va='top', fontsize=10, color='black')

# Adjust layout to ensure labels and titles fit
plt.tight_layout()
plt.savefig('/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Xenium_Lvl2.5_total_clusters_per_area).pdf', format='pdf')
# Show the figure
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(adata.obs)
df['spatial_expression_cluster'] = df['KNN50_K10'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['niche_knn50k10', 'Lvl4']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['Lvl4'].str.contains('Mix') | df['Lvl4'].str.startswith('Oth.') | df['Lvl4'].str.startswith('Neuro') | df['Lvl4'].str.startswith('Unclea')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl4'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups.drop(columns=clusters_to_remove, errors='ignore')

# Plot heatmap with enhanced colormap and custom vmax
plt.figure(figsize=(25, 10))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_filtered, cmap='inferno', cbar_kws={'label': 'Count', 'ticks': [0, 1000, 2000, 4000], 'format': '%.0f'}, linewidths=0.05, linecolor='black', vmax=4000)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche Cluster')
plt.ylabel('Cell Type')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 1000, 2000, 4000])
cbar.ax.set_yticklabels(['0', '1,000', '2,000', '>4,000'])  # Set custom tick labels

plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()


In [None]:
adata.obs['niche_knn50k10'].unique().tolist()

In [None]:
import matplotlib.pyplot as plt
import scimap.plotting as sm
import matplotlib.ticker as mtick
from matplotlib.lines import Line2D

# List of SampleIDs to exclude
excluded_sample_ids = ['s2r9_a', 's2r9_b', 'HV140A_C', 'HV140A_D', 's2r4_a', 's2r4_b', 's2r5', 's2r8_HV188_a', 's2r8_HV188_b', 's2r8_HV211', 
                       'HV192A_A', 'HV192A_B','HV192A_C']

# Create a subset of the AnnData object excluding the specific SampleIDs
adata_subset = adata[~adata.obs['sample'].isin(excluded_sample_ids)].copy()

# Create a new figure with custom dimensions
plt.figure(figsize=(50, 6))  # Adjust width and height as needed

# Define the specific order of major_cluster categories
anticipated_order = [ 'Spinous', 'Keratin', 'Crevicular',  'Fib CT 1', 'Fib CT 2', 'Epi-CT 1', 'Epi-CT 2', 'Lymphoid', 'Plasma-Fib CT',  'Plasma']

# Reorder the major_cluster column in the copied AnnData object
adata_subset.obs['niche_knn50k10'] = adata_subset.obs['niche_knn50k10'].astype('category')
adata_subset.obs['niche_knn50k10'] = adata_subset.obs['niche_knn50k10'].cat.reorder_categories(anticipated_order)
adata_subset.obs = adata_subset.obs.sort_values('niche_knn50k10')

# Define custom colors for each major_cluster category
colors = {
    'Fib CT 1' : 'red',
    'Fib CT 2' : 'blue',
     'Epi-CT 1': '#00FFFF',
    'Epi-CT 2': 'orange',
     'Plasma-Fib CT' : '#CF9FFF',
     'Plasma' : 'magenta' ,
     'Spinous' : '#5D3FD3',
     'Keratin' : '#0096FF' ,
     'Crevicular' : '#800080' ,
     'Lymphoid' : 'yellow'
    
}

# Plot the stacked bar plot with specified colors
plot = sm.stacked_barplot(adata_subset, x_axis='status.3', y_axis='niche_knn50k10',
                          order_yaxis=anticipated_order, method='percent', plot_tool='matplotlib', color=colors, figsize=(2, 6))

# Get the current axes and set its background color to white
plt.gca().set_facecolor('white')

# Format y-axis ticks as percentages
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))

# Get the x-axis limits
x_min, x_max = plt.xlim()

# Plot horizontal dotted lines at y = 0.25, 0.5, and 0.75
plt.hlines(y=[0, 0.25, 0.5, 0.75, 1], xmin=x_min, xmax=x_max, linestyle='--', colors='grey')

# Set y-axis tick labels to match the percentages
plt.yticks([0, 0.25, 0.5, 0.75, 1], ['0%', '25%', '50%', '75%', '100%'])

# Modify the borders
plt.gca().spines['top'].set_visible(False)       # Make top border invisible
plt.gca().spines['right'].set_visible(False)     # Make right border invisible
plt.gca().spines['bottom'].set_color('black')    # Set bottom border color to black
plt.gca().spines['left'].set_color('black')      # Set left border color to black

# Adjust y-axis limits to start a little above the lower edge of the outline
y_min, y_max = plt.ylim()
plt.ylim(y_min - 0.03, y_max)

# Remove 'clinical_status' from x-axis title
plt.xlabel('')

# Increase font size of x-axis tick labels (clinical_status categories)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)  # Adjust font size as needed

# Rotate x-axis labels diagonally
plt.xticks(rotation=45, ha='right')

# Custom legend handles as circles, sorted by anticipated_order in reverse
sorted_legend_handles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[category], markersize=10) 
                         for category in reversed(anticipated_order)]
sorted_legend_labels = reversed(anticipated_order)

# Add legend with sorted custom handles and labels outside the plot
legend = plt.legend(sorted_legend_handles, sorted_legend_labels, loc='upper left', bbox_to_anchor=(1, 1), markerscale=1, frameon=False, borderpad=2)

# Save the plot as TIFF file
#plt.savefig('/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure4/Xenium_niche_stacked_barplot.pdf', format='pdf')

# Display the plot
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV205B']

# Ensure required columns exist
required_columns = ['niche_knn50k10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['niche_knn50k10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['niche_knn50k10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['niche_knn50k10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV192A_C']

# Ensure required columns exist
required_columns = ['niche_knn50k10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['niche_knn50k10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['niche_knn50k10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['niche_knn50k10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV205B']

# Ensure required columns exist
required_columns = ['Lvl3', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['Lvl3'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['Lvl3'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['Lvl3'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
adata.obs.columns.unique().tolist()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV205B']

# Ensure required columns exist
required_columns = ['KNN10_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN10_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN10_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN10_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r1_HV207']

# Ensure required columns exist
required_columns = ['KNN10_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN10_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN10_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN10_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r2_HV184']

# Ensure required columns exist
required_columns = ['KNN10_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN10_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN10_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN10_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's1r2']

# Ensure required columns exist
required_columns = ['niche_cc14', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['niche_cc14'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['niche_cc14'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['niche_cc14'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV205B']

# Ensure required columns exist
required_columns = ['Lvl4', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['Lvl4'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 12
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['Lvl4'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['Lvl4'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r3']

# Ensure required columns exist
required_columns = ['niche_cc14', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['niche_cc14'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['niche_cc14'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['niche_cc14'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(adata_perio.obs)
df['spatial_expression_cluster'] = df['niche_cc14'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['niche_cc14', 'Lvl5']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['Lvl5'].str.contains('Mix') | df['Lvl5'].str.startswith('Oth.') | df['Lvl5'].str.startswith('Neuro') | df['Lvl5'].str.startswith('Unclea')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl5'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups.drop(columns=clusters_to_remove, errors='ignore')

# Plot heatmap with enhanced colormap and custom vmax
plt.figure(figsize=(25, 10))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_filtered, cmap='inferno', cbar_kws={'label': 'Count', 'ticks': [0, 1000, 2000, 4000], 'format': '%.0f'}, linewidths=0.05, linecolor='black', vmax=5000)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche Cluster')
plt.ylabel('Cell Type')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 1000, 2000, 4000])
cbar.ax.set_yticklabels(['0', '1,000', '2,000', '>4,000'])  # Set custom tick labels

plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()


In [None]:
print(adata.obs[['KNN10_K10', 'Lvl4']].head())  # Check for non-null entries
print(adata.obs['KNN10_K10'].unique())  # Check unique values in KNN10_K10
print(adata.obs['Lvl4'].unique())  # Check unique values in Lvl4

In [None]:
adata.obs['status.3'].value_counts()

In [None]:
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=20) # Computing the neighborhood graph
sc.tl.umap(adata) # Build a UMAP to visualize the neighbourhood graph
sc.pl.umap(adata, color= 'Lvl4', cmap= 'vlag', use_raw=False) # View the clusteringimport numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def generate_spatial_plot(adata, sample_names, cluster_key, output_path, crop_coords=None):
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values
        cluster_labels = subset_adata.obs[cluster_key].astype(str)

        # Crop if coordinates are provided
        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values
            cluster_labels = subset_adata.obs[cluster_key].astype(str)

        # Identify unique clusters
        unique_clusters = sorted(cluster_labels.unique())
        
        # Assign colors
        epi_clusters = [c for c in unique_clusters if c.startswith("Ep")]
        other_clusters = [c for c in unique_clusters if c not in epi_clusters]

        num_epi_clusters = len(epi_clusters)
        epi_colors = sns.color_palette("husl", num_epi_clusters)  # Unique colors for "Ep" clusters
        cluster_colors = {c: epi_colors[i] for i, c in enumerate(epi_clusters)}
        cluster_colors.update({c: "#E5E5E5" for c in other_clusters})  # Grey for all others

        # Aspect ratio for figure size
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set white background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Draw points on top
        colors = [cluster_colors.get(c, "#E5E5E5") for c in cluster_labels]
        plt.scatter(x_coords, y_coords, c=colors, s=4.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

# Example usage:
crop_coords = (2200, 3618, 180, 2074)
sample_names = ['HV205B']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/HV205B_Epi.pdf'

# Generate plot
generate_spatial_plot(adata, sample_names, 'Lvl4', output_path, crop_coords)


In [None]:
adata.obs['Lvl2'].unique().tolist()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.colors as mcolors

def generate_spatial_plot(adata, sample_names, lvl2_key, lvl4_key, output_path, crop_coords=None):
    # Identify unique clusters in Lvl4 where Lvl2 == "Myeloid"
    Epi_clusters = sorted(adata.obs.loc[adata.obs[lvl2_key].isin(["Oral Epi", "Crevicular Epi"]), lvl4_key].unique())
    other_clusters = sorted(adata.obs.loc[~adata.obs[lvl2_key].isin(["Oral Epi", "Crevicular Epi"]), lvl4_key].unique())

    # Assign unique colors to Myeloid Lvl4 clusters
    cmap = plt.cm.get_cmap("turbo", len(Epi_clusters))  
    cluster_colors = {cluster: cmap(i) for i, cluster in enumerate(Epi_clusters)}

    # All non-Myeloid clusters in grey
    other_color = "#A0A0A0"
    for cluster in other_clusters:
        cluster_colors[cluster] = other_color

    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values

        # Aspect ratio
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Assign colors based on Lvl2
        colors = subset_adata.obs[lvl4_key].map(cluster_colors).fillna(other_color).values
        plt.scatter(x_coords, y_coords, c=colors, s=8.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path.format("Main"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

    # Generate a separate legend plot
    generate_legend_plot(cluster_colors, other_color, output_path.format("Legend"))

def generate_legend_plot(cluster_colors, other_color, legend_output_path):
    """Creates and saves a separate legend plot, showing only Myeloid Lvl4 clusters and grouping others as 'Other'."""
    Epi_clusters = {k: v for k, v in cluster_colors.items() if v != other_color}

    fig, ax = plt.subplots(figsize=(1, len(Epi_clusters) * 0.3 + 0.6))  # Adjust size dynamically
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(Epi_clusters) + 1)  # Extra space for 'Other'

    # Add Myeloid Lvl4 clusters
    for i, (cluster, color) in enumerate(Epi_clusters.items()):
        ax.add_patch(mpatches.Rectangle((0, i), 1, 0.8, color=color, ec="black"))
        ax.text(1.1, i + 0.4, cluster, va="center", fontsize=8)

    # Add "Other" category in grey
    ax.add_patch(mpatches.Rectangle((0, len(Epi_clusters)), 1, 0.8, color=other_color, ec="black"))
    ax.text(1.1, len(Epi_clusters) + 0.4, "Other", va="center", fontsize=8, fontweight="bold")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis("off")
    
    plt.savefig(legend_output_path, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

# Example usage
sample_names = ['HV205B']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Epi_HV205B_{}.pdf'
crop_coords = (2200, 3618, 180, 2074)

generate_spatial_plot(adata, sample_names, 'Lvl2', 'Lvl4', output_path, crop_coords)



In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.colors as mcolors

def generate_spatial_plot(adata, sample_names, lvl2_key, lvl4_key, output_path, crop_coords=None):
    # Identify unique clusters in Lvl4 where Lvl2 == "Myeloid"
    Epi_clusters = sorted(adata.obs.loc[adata.obs[lvl2_key].isin(["Vascular", "Fibroblast"]), lvl4_key].unique())
    other_clusters = sorted(adata.obs.loc[~adata.obs[lvl2_key].isin(["Vascular", "Fibroblast"]), lvl4_key].unique())

    # Assign unique colors to Myeloid Lvl4 clusters
    cmap = plt.cm.get_cmap("turbo", len(Epi_clusters))  
    cluster_colors = {cluster: cmap(i) for i, cluster in enumerate(Epi_clusters)}

    # All non-Myeloid clusters in grey
    other_color = "#A0A0A0"
    for cluster in other_clusters:
        cluster_colors[cluster] = other_color

    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values

        # Aspect ratio
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Assign colors based on Lvl2
        colors = subset_adata.obs[lvl4_key].map(cluster_colors).fillna(other_color).values
        plt.scatter(x_coords, y_coords, c=colors, s=8.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        #plt.savefig(output_path.format("Main"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

    # Generate a separate legend plot
    generate_legend_plot(cluster_colors, other_color, output_path.format("Legend"))

def generate_legend_plot(cluster_colors, other_color, legend_output_path):
    """Creates and saves a separate legend plot, showing only Myeloid Lvl4 clusters and grouping others as 'Other'."""
    Epi_clusters = {k: v for k, v in cluster_colors.items() if v != other_color}

    fig, ax = plt.subplots(figsize=(1, len(Epi_clusters) * 0.3 + 0.6))  # Adjust size dynamically
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(Epi_clusters) + 1)  # Extra space for 'Other'

    # Add Myeloid Lvl4 clusters
    for i, (cluster, color) in enumerate(Epi_clusters.items()):
        ax.add_patch(mpatches.Rectangle((0, i), 1, 0.8, color=color, ec="black"))
        ax.text(1.1, i + 0.4, cluster, va="center", fontsize=8)

    # Add "Other" category in grey
    ax.add_patch(mpatches.Rectangle((0, len(Epi_clusters)), 1, 0.8, color=other_color, ec="black"))
    ax.text(1.1, len(Epi_clusters) + 0.4, "Other", va="center", fontsize=8, fontweight="bold")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis("off")
    
    plt.savefig(legend_output_path, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

# Example usage
sample_names = ['HV205B']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Str_HV205B_{}.pdf'
crop_coords = (2200, 3618, 180, 2074)

generate_spatial_plot(adata, sample_names, 'Lvl2', 'Lvl4', output_path, crop_coords)



In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.colors as mcolors

def generate_spatial_plot(adata, sample_names, lvl2_key, lvl4_key, output_path, crop_coords=None):
    # Identify unique clusters in Lvl4 where Lvl2 == "Myeloid"
    Epi_clusters = sorted(adata.obs.loc[adata.obs[lvl2_key].isin(["T/NK", "B/Plasma"]), lvl4_key].unique())
    other_clusters = sorted(adata.obs.loc[~adata.obs[lvl2_key].isin(["T/NK", "B/Plasma"]), lvl4_key].unique())

    # Assign unique colors to Myeloid Lvl4 clusters
    cmap = plt.cm.get_cmap("turbo", len(Epi_clusters))  
    cluster_colors = {cluster: cmap(i) for i, cluster in enumerate(Epi_clusters)}

    # Specifically check if 'B' is in the clusters and assign it a color
    if 'B' in cluster_colors:
        cluster_colors['B'] = '#00008B'  # Or choose any color you like for 'B' (e.g., tomato red)

    # All non-Myeloid clusters in grey
    other_color = "#A0A0A0"
    for cluster in other_clusters:
        cluster_colors[cluster] = other_color

    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values

        # Aspect ratio
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Assign colors based on Lvl2
        colors = subset_adata.obs[lvl4_key].map(cluster_colors).fillna(other_color).values
        plt.scatter(x_coords, y_coords, c=colors, s=8.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path.format("Main"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

    # Generate a separate legend plot
    generate_legend_plot(cluster_colors, other_color, output_path.format("Legend"))

def generate_legend_plot(cluster_colors, other_color, legend_output_path):
    """Creates and saves a separate legend plot, showing only Myeloid Lvl4 clusters and grouping others as 'Other'."""
    Epi_clusters = {k: v for k, v in cluster_colors.items() if v != other_color}

    fig, ax = plt.subplots(figsize=(1, len(Epi_clusters) * 0.3 + 0.6))  # Adjust size dynamically
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(Epi_clusters) + 1)  # Extra space for 'Other'

    # Add Myeloid Lvl4 clusters
    for i, (cluster, color) in enumerate(Epi_clusters.items()):
        ax.add_patch(mpatches.Rectangle((0, i), 1, 0.8, color=color, ec="black"))
        ax.text(1.1, i + 0.4, cluster, va="center", fontsize=8)

    # Add "Other" category in grey
    ax.add_patch(mpatches.Rectangle((0, len(Epi_clusters)), 1, 0.8, color=other_color, ec="black"))
    ax.text(1.1, len(Epi_clusters) + 0.4, "Other", va="center", fontsize=8, fontweight="bold")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis("off")
    
    plt.savefig(legend_output_path, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

# Example usage
sample_names = ['HV205B']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Lymphoid_HV205B_{}.pdf'
crop_coords = (2200, 3618, 180, 2074)

generate_spatial_plot(adata, sample_names, 'Lvl2', 'Lvl4', output_path, crop_coords)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.colors as mcolors

def generate_spatial_plot(adata, sample_names, lvl2_key, lvl4_key, output_path, crop_coords=None):
    # Identify unique clusters in Lvl4 where Lvl2 == "Myeloid"
    Epi_clusters = sorted(adata.obs.loc[adata.obs[lvl2_key].isin(["Myeloid"]), lvl4_key].unique())
    other_clusters = sorted(adata.obs.loc[~adata.obs[lvl2_key].isin(["Myeloid"]), lvl4_key].unique())

    # Assign unique colors to Myeloid Lvl4 clusters
    cmap = plt.cm.get_cmap("turbo", len(Epi_clusters))  
    cluster_colors = {cluster: cmap(i) for i, cluster in enumerate(Epi_clusters)}

    # All non-Myeloid clusters in grey
    other_color = "#A0A0A0"
    for cluster in other_clusters:
        cluster_colors[cluster] = other_color

    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values

        # Aspect ratio
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Assign colors based on Lvl2
        colors = subset_adata.obs[lvl4_key].map(cluster_colors).fillna(other_color).values
        plt.scatter(x_coords, y_coords, c=colors, s=8.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path.format("Main"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

    # Generate a separate legend plot
    generate_legend_plot(cluster_colors, other_color, output_path.format("Legend"))

def generate_legend_plot(cluster_colors, other_color, legend_output_path):
    """Creates and saves a separate legend plot, showing only Myeloid Lvl4 clusters and grouping others as 'Other'."""
    Epi_clusters = {k: v for k, v in cluster_colors.items() if v != other_color}

    fig, ax = plt.subplots(figsize=(1, len(Epi_clusters) * 0.3 + 0.6))  # Adjust size dynamically
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(Epi_clusters) + 1)  # Extra space for 'Other'

    # Add Myeloid Lvl4 clusters
    for i, (cluster, color) in enumerate(Epi_clusters.items()):
        ax.add_patch(mpatches.Rectangle((0, i), 1, 0.8, color=color, ec="black"))
        ax.text(1.1, i + 0.4, cluster, va="center", fontsize=8)

    # Add "Other" category in grey
    ax.add_patch(mpatches.Rectangle((0, len(Epi_clusters)), 1, 0.8, color=other_color, ec="black"))
    ax.text(1.1, len(Epi_clusters) + 0.4, "Other", va="center", fontsize=8, fontweight="bold")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis("off")
    
    plt.savefig(legend_output_path, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

# Example usage
sample_names = ['HV205B']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Myeloid_HV205B_{}.pdf'
crop_coords = (2200, 3618, 180, 2074)

generate_spatial_plot(adata, sample_names, 'Lvl2', 'Lvl4', output_path, crop_coords)



In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.colors as mcolors

def generate_spatial_plot(adata, sample_names, lvl2_key, lvl4_key, output_path, crop_coords=None):
    # Identify unique clusters in Lvl4 where Lvl2 == "Myeloid"
    Epi_clusters = sorted(adata.obs.loc[adata.obs[lvl2_key].isin(["Myeloid"]), lvl4_key].unique())
    other_clusters = sorted(adata.obs.loc[~adata.obs[lvl2_key].isin(["Myeloid"]), lvl4_key].unique())

    # Assign unique colors to Myeloid Lvl4 clusters
    cmap = plt.cm.get_cmap("turbo", len(Epi_clusters))  
    cluster_colors = {cluster: cmap(i) for i, cluster in enumerate(Epi_clusters)}

    # All non-Myeloid clusters in grey
    other_color = "#A0A0A0"
    for cluster in other_clusters:
        cluster_colors[cluster] = other_color

    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values

        # Aspect ratio
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Assign colors based on Lvl2
        colors = subset_adata.obs[lvl4_key].map(cluster_colors).fillna(other_color).values
        plt.scatter(x_coords, y_coords, c=colors, s=8.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path.format("Main"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

    # Generate a separate legend plot
    generate_legend_plot(cluster_colors, other_color, output_path.format("Legend"))

def generate_legend_plot(cluster_colors, other_color, legend_output_path):
    """Creates and saves a separate legend plot, showing only Myeloid Lvl4 clusters and grouping others as 'Other'."""
    Epi_clusters = {k: v for k, v in cluster_colors.items() if v != other_color}

    fig, ax = plt.subplots(figsize=(1, len(Epi_clusters) * 0.3 + 0.6))  # Adjust size dynamically
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(Epi_clusters) + 1)  # Extra space for 'Other'

    # Add Myeloid Lvl4 clusters
    for i, (cluster, color) in enumerate(Epi_clusters.items()):
        ax.add_patch(mpatches.Rectangle((0, i), 1, 0.8, color=color, ec="black"))
        ax.text(1.1, i + 0.4, cluster, va="center", fontsize=8)

    # Add "Other" category in grey
    ax.add_patch(mpatches.Rectangle((0, len(Epi_clusters)), 1, 0.8, color=other_color, ec="black"))
    ax.text(1.1, len(Epi_clusters) + 0.4, "Other", va="center", fontsize=8, fontweight="bold")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis("off")
    
    plt.savefig(legend_output_path, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

# Example usage
sample_names = ['HV205B']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Myeloid_HV205B_{}.pdf'
crop_coords = (2200, 3618, 180, 2074)

generate_spatial_plot(adata, sample_names, 'Lvl2', 'Lvl4', output_path, crop_coords)



In [None]:
adata.obs['Lvl1'].unique().tolist()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.colors as mcolors

def generate_spatial_plot(adata, sample_names, lvl2_key, lvl4_key, output_path, crop_coords=None):
    # Identify unique clusters in Lvl4 where Lvl2 == "Myeloid"
    Epi_clusters = sorted(adata.obs.loc[adata.obs[lvl2_key].isin(['Immune', 'Vascular', 'Fibroblast', 'Epithelial']), lvl4_key].unique())
    other_clusters = sorted(adata.obs.loc[~adata.obs[lvl2_key].isin(['Immune', 'Vascular', 'Fibroblast', 'Epithelial']), lvl4_key].unique())

    # Assign unique colors to Myeloid Lvl4 clusters
    cmap = plt.cm.get_cmap("turbo", len(Epi_clusters))  
    cluster_colors = {cluster: cmap(i) for i, cluster in enumerate(Epi_clusters)}

    # Specifically check if 'B' is in the clusters and assign it a color
    if 'B' in cluster_colors:
        cluster_colors['B'] = '#00008B'  # Or choose any color you like for 'B' (e.g., tomato red)

    # All non-Myeloid clusters in grey
    other_color = "#A0A0A0"
    for cluster in other_clusters:
        cluster_colors[cluster] = other_color

    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values

        # Aspect ratio
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Assign colors based on Lvl2
        colors = subset_adata.obs[lvl4_key].map(cluster_colors).fillna(other_color).values
        plt.scatter(x_coords, y_coords, c=colors, s=8.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path.format("Main"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

    # Generate a separate legend plot
    generate_legend_plot(cluster_colors, other_color, output_path.format("Legend"))

def generate_legend_plot(cluster_colors, other_color, legend_output_path):
    """Creates and saves a separate legend plot, showing only Myeloid Lvl4 clusters and grouping others as 'Other'."""
    Epi_clusters = {k: v for k, v in cluster_colors.items() if v != other_color}

    fig, ax = plt.subplots(figsize=(1, len(Epi_clusters) * 0.3 + 0.6))  # Adjust size dynamically
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(Epi_clusters) + 1)  # Extra space for 'Other'

    # Add Myeloid Lvl4 clusters
    for i, (cluster, color) in enumerate(Epi_clusters.items()):
        ax.add_patch(mpatches.Rectangle((0, i), 1, 0.8, color=color, ec="black"))
        ax.text(1.1, i + 0.4, cluster, va="center", fontsize=8)

    # Add "Other" category in grey
    ax.add_patch(mpatches.Rectangle((0, len(Epi_clusters)), 1, 0.8, color=other_color, ec="black"))
    ax.text(1.1, len(Epi_clusters) + 0.4, "Other", va="center", fontsize=8, fontweight="bold")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis("off")
    
    plt.savefig(legend_output_path, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

# Example usage
sample_names = ['HV205B']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Lvl4_HV205B_{}.pdf'
crop_coords = (2200, 3618, 180, 2074)

generate_spatial_plot(adata, sample_names, 'Lvl1', 'Lvl4', output_path, crop_coords)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

def generate_spatial_plot(adata, sample_names, lvl2_key, lvl4_key, output_path, crop_coords=None):
    # Define the preferred order for Lvl1 categories
    category_order = ['Epithelial', 'Fibroblast', 'Vascular', 'Immune']

    # Identify unique clusters in Lvl4 grouped by ordered Lvl1 categories
    sorted_clusters = []
    for category in category_order:
        clusters = sorted(adata.obs.loc[adata.obs[lvl2_key] == category, lvl4_key].unique())
        sorted_clusters.extend(clusters)

    other_clusters = sorted(adata.obs.loc[~adata.obs[lvl2_key].isin(category_order), lvl4_key].unique())

    # Assign unique colors following the specified order
    cmap = plt.cm.get_cmap("turbo", len(sorted_clusters))
    cluster_colors = {cluster: cmap(i) for i, cluster in enumerate(sorted_clusters)}

    # Specifically check if 'B' is in the clusters and assign it a fixed color
    if 'B' in cluster_colors:
        cluster_colors['B'] = '#400000'  # Dark Blue for 'B'

    # Assign all non-selected clusters a grey color
    other_color = "#A0A0A0"
    for cluster in other_clusters:
        cluster_colors[cluster] = other_color

    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values

        # Aspect ratio adjustment
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Assign colors based on Lvl4
        colors = subset_adata.obs[lvl4_key].map(cluster_colors).fillna(other_color).values
        plt.scatter(x_coords, y_coords, c=colors, s=8.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path.format("Main"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

    # Generate a separate legend plot
    generate_legend_plot(cluster_colors, other_color, output_path.format("Legend"))

def generate_legend_plot(cluster_colors, other_color, legend_output_path):
    """Creates and saves a separate legend plot, showing only selected clusters and grouping others as 'Other'."""
    selected_clusters = {k: v for k, v in cluster_colors.items() if v != other_color}

    fig, ax = plt.subplots(figsize=(1, len(selected_clusters) * 0.3 + 0.6))  # Adjust size dynamically
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(selected_clusters) + 1)  # Extra space for 'Other'

    # Add clusters in preferred order
    for i, (cluster, color) in enumerate(selected_clusters.items()):
        ax.add_patch(mpatches.Rectangle((0, i), 1, 0.8, color=color, ec="black"))
        ax.text(1.1, i + 0.4, cluster, va="center", fontsize=8)

    # Add "Other" category in grey
    ax.add_patch(mpatches.Rectangle((0, len(selected_clusters)), 1, 0.8, color=other_color, ec="black"))
    ax.text(1.1, len(selected_clusters) + 0.4, "Other", va="center", fontsize=8, fontweight="bold")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis("off")
    
    plt.savefig(legend_output_path, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

# Example usage
sample_names = ['HV205B']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Lvl4_HV205B_{}.pdf'
crop_coords = (2200, 3618, 180, 2074)

generate_spatial_plot(adata, sample_names, 'Lvl1', 'Lvl4', output_path, crop_coords)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.colors as mcolors

def generate_spatial_plot(adata, sample_names, lvl2_key, lvl4_key, output_path, crop_coords=None):
    # Identify unique clusters in Lvl4 where Lvl2 == "Myeloid"
    Epi_clusters = sorted(adata.obs.loc[adata.obs[lvl2_key].isin(['Immune', 'Vascular', 'Fibroblast', 'Epithelial']), lvl4_key].unique())
    other_clusters = sorted(adata.obs.loc[~adata.obs[lvl2_key].isin(['Immune', 'Vascular', 'Fibroblast', 'Epithelial']), lvl4_key].unique())

    # Assign unique colors to Myeloid Lvl4 clusters
    cmap = plt.cm.get_cmap("turbo", len(Epi_clusters))  
    cluster_colors = {cluster: cmap(i) for i, cluster in enumerate(Epi_clusters)}

    # Specifically check if 'B' is in the clusters and assign it a color
    if 'B' in cluster_colors:
        cluster_colors['B'] = '#00008B'  # Or choose any color you like for 'B' (e.g., tomato red)

    # All non-Myeloid clusters in grey
    other_color = "#A0A0A0"
    for cluster in other_clusters:
        cluster_colors[cluster] = other_color

    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values

        # Aspect ratio
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Assign colors based on Lvl2
        colors = subset_adata.obs[lvl4_key].map(cluster_colors).fillna(other_color).values
        plt.scatter(x_coords, y_coords, c=colors, s=8.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path.format("Main"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

    # Generate a separate legend plot
    generate_legend_plot(cluster_colors, other_color, output_path.format("Legend"))

def generate_legend_plot(cluster_colors, other_color, legend_output_path):
    """Creates and saves a separate legend plot, showing only Myeloid Lvl4 clusters and grouping others as 'Other'."""
    Epi_clusters = {k: v for k, v in cluster_colors.items() if v != other_color}

    fig, ax = plt.subplots(figsize=(1, len(Epi_clusters) * 0.3 + 0.6))  # Adjust size dynamically
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(Epi_clusters) + 1)  # Extra space for 'Other'

    # Add Myeloid Lvl4 clusters
    for i, (cluster, color) in enumerate(Epi_clusters.items()):
        ax.add_patch(mpatches.Rectangle((0, i), 1, 0.8, color=color, ec="black"))
        ax.text(1.1, i + 0.4, cluster, va="center", fontsize=8)

    # Add "Other" category in grey
    ax.add_patch(mpatches.Rectangle((0, len(Epi_clusters)), 1, 0.8, color=other_color, ec="black"))
    ax.text(1.1, len(Epi_clusters) + 0.4, "Other", va="center", fontsize=8, fontweight="bold")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis("off")
    
    plt.savefig(legend_output_path, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Lvl4_HV214s_{}.pdf'
crop_coords = (2832, 4250, 480, 2374)

generate_spatial_plot(adata, sample_names, 'Lvl1', 'Lvl4', output_path, crop_coords)


In [None]:
# Step 1: Filter for B/Plasma cells in the lvl2_key column
b_plasma_cells = adata.obs[adata.obs['Lvl2'] == "T/NK"]

# Step 2: Get the unique values in the lvl4_key column for B/Plasma cells
b_plasma_clusters = b_plasma_cells['Lvl4'].unique()

# Print the unique Lvl4 clusters for B/Plasma
print("Unique Lvl4 clusters for B/Plasma:", b_plasma_clusters)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

def generate_spatial_plot(adata, sample_names, lvl2_key, lvl4_key, output_path, crop_coords=None):
    # Define the preferred order for Lvl1 categories
    category_order = ['Epithelial', 'Fibroblast', 'Vascular', 'Immune']

    # Identify unique clusters in Lvl4 grouped by ordered Lvl1 categories
    sorted_clusters = []
    for category in category_order:
        clusters = sorted(adata.obs.loc[adata.obs[lvl2_key] == category, lvl4_key].unique())
        sorted_clusters.extend(clusters)

    other_clusters = sorted(adata.obs.loc[~adata.obs[lvl2_key].isin(category_order), lvl4_key].unique())

    # Assign unique colors following the specified order
    cmap = plt.cm.get_cmap("turbo", len(sorted_clusters))
    cluster_colors = {cluster: cmap(i) for i, cluster in enumerate(sorted_clusters)}

    # Specifically check if 'B' is in the clusters and assign it a fixed color
    if 'B' in cluster_colors:
        cluster_colors['B'] = '#400000'  # Dark Blue for 'B'

    # Assign all non-selected clusters a grey color
    other_color = "#A0A0A0"
    for cluster in other_clusters:
        cluster_colors[cluster] = other_color

    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]

        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        if crop_coords:
            x_min, x_max, y_min, y_max = crop_coords
            crop_mask = (
                (x_coords >= x_min) & (x_coords <= x_max) &
                (y_coords >= y_min) & (y_coords <= y_max)
            )
            subset_adata = subset_adata[crop_mask]
            x_coords = subset_adata.obs['x_centroid'].values
            y_coords = subset_adata.obs['y_centroid'].values

        # Aspect ratio adjustment
        x_range = x_coords.max() - x_coords.min()
        y_range = y_coords.max() - y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        # Generate plot
        plt.figure(figsize=figsize)
        ax = plt.gca()

        # Set background
        plt.gcf().patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Assign colors based on Lvl4
        colors = subset_adata.obs[lvl4_key].map(cluster_colors).fillna(other_color).values
        plt.scatter(x_coords, y_coords, c=colors, s=8.5, zorder=2)

        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(x_coords.min(), x_coords.max())
        ax.set_ylim(y_coords.min(), y_coords.max())

        for spine in ax.spines.values():
            spine.set_visible(False)

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path.format("Main"), dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

    # Generate a separate legend plot
    generate_legend_plot(cluster_colors, other_color, output_path.format("Legend"))

def generate_legend_plot(cluster_colors, other_color, legend_output_path):
    """Creates and saves a separate legend plot, showing only selected clusters and grouping others as 'Other'."""
    selected_clusters = {k: v for k, v in cluster_colors.items() if v != other_color}

    fig, ax = plt.subplots(figsize=(1, len(selected_clusters) * 0.3 + 0.6))  # Adjust size dynamically
    ax.set_xlim(0, 1)
    ax.set_ylim(0, len(selected_clusters) + 1)  # Extra space for 'Other'

    # Add clusters in preferred order
    for i, (cluster, color) in enumerate(selected_clusters.items()):
        ax.add_patch(mpatches.Rectangle((0, i), 1, 0.8, color=color, ec="black"))
        ax.text(1.1, i + 0.4, cluster, va="center", fontsize=8)

    # Add "Other" category in grey
    ax.add_patch(mpatches.Rectangle((0, len(selected_clusters)), 1, 0.8, color=other_color, ec="black"))
    ax.text(1.1, len(selected_clusters) + 0.4, "Other", va="center", fontsize=8, fontweight="bold")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis("off")
    
    plt.savefig(legend_output_path, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

# Example usage
sample_names = ['s1r3']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure2/Lvl4_HV214s_{}.pdf'
crop_coords = (2832, 4250, 480, 2374)

generate_spatial_plot(adata, sample_names, 'Lvl1', 'Lvl4', output_path, crop_coords)

In [None]:
merged.obs['Lvl5'].unique().tolist()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(merged.obs)
df['Lvl5'] = df['Lvl5'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['final_label_citeSeq', 'niche_cc14']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['final_label_citeSeq'].str.contains('Mix') | df['final_label_citeSeq'].str.startswith('Neuro')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'final_label_citeSeq'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups[~niche_groups.index.isin(clusters_to_remove)]

# Calculate relative frequencies
niche_groups_relative = niche_groups_filtered.div(niche_groups_filtered.sum(axis=1), axis=0)

# Plot heatmap with enhanced colormap
plt.figure(figsize=(16, 20))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_relative, cmap='plasma', cbar_kws={'label': 'Relative Frequency'}, linewidths=0.05, linecolor='black')
plt.title('Relative Cell Type Distribution in Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)
plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()