In [None]:
import scimap as sm
import pandas as pd
import sys
import os
import scanpy as sc
import seaborn as sns; sns.set(color_codes=True)
import anndata
adata = anndata.read_h5ad('/data/vasileiosionat2/Xenium/Drake_outputs/ccProcessed.h5ad')

In [None]:
adata.obs

In [None]:
print(adata.obs.columns)

In [None]:
print(adata.raw)

In [None]:
adata.raw = adata

In [None]:
from sklearn.neighbors import BallTree
import pandas as pd
import numpy as np
import scipy.sparse
import anndata
import pathlib

def spatial_expression (adata, 
                        x_coordinate='x_centroid',
                        y_coordinate='y_centroid',
                        z_coordinate=None,
                        method='knn', 
                        knn=30, 
                        imageid='sample', 
                        use_raw=True, 
                        log=True, 
                        subset=None,
                        label='spatial_expression',
                        verbose=True,
                        output_dir=None):
    """
    Function to calculate spatial expression using k-nearest neighbors (KNN) method.
    """
    
    # Load the AnnData object    
    if isinstance(adata, str):
        imid = str(adata.rsplit('/', 1)[-1])
        adata = anndata.read(adata)
    else:
        adata = adata

    # Error checks
    if use_raw is False:
        if all(adata.X[0] < 1) is False:
            raise ValueError('Please run `sm.pp.rescale` first if you wish to use `use_raw = False`')

    def spatial_expression_internal(adata_subset, x_coordinate, y_coordinate, z_coordinate, log, method, knn, imageid, use_raw, subset, label):
        # Create DataFrame with the necessary spatial coordinates
        if z_coordinate is not None:
            if verbose:
                print("Including Z-axis")
            data = pd.DataFrame({'x': adata_subset.obs[x_coordinate], 'y': adata_subset.obs[y_coordinate], 'z': adata_subset.obs[z_coordinate] })
        else:
            data = pd.DataFrame({'x': adata_subset.obs[x_coordinate], 'y': adata_subset.obs[y_coordinate] })

        # Identify neighborhoods using KNN
        if method == 'knn':
            if verbose:
                print(f"Identifying the {knn} nearest neighbors for every cell")
            tree = BallTree(data, leaf_size=2)
            dist, ind = tree.query(data, k=knn, return_distance=True)
        
        # Normalize distances and build a sparse matrix
        d = scipy.sparse.lil_matrix((len(data), len(data)))
        for row, (columns, values) in enumerate(zip(ind, dist)):
            # Drop self-distance element
            idx = columns != row
            columns = columns[idx]
            values = values[idx]
            if len(values) == 1:
                values = [1.0]
            elif len(values) > 1:
                # Normalize distances
                values = (values.max() - values) / (values.max() - values.min())
                values /= values.sum()
            # Assign row to sparse matrix
            d[row, columns] = values

        # Convert to CSR sparse matrix for efficient access
        wn_matrix_sparse = d.tocsr()

        # Calculate spatial expression
        if use_raw:
            if log:
                spatial_lag = wn_matrix_sparse.dot(np.log1p(adata_subset.raw.X))  # Perform matrix multiplication
            else:
                spatial_lag = wn_matrix_sparse.dot(adata_subset.raw.X)
        else:
            spatial_lag = wn_matrix_sparse.dot(adata_subset.X)

        # Ensure the resulting shape matches the expected index
        spatial_lag_df = pd.DataFrame(spatial_lag.toarray(),  # Convert to dense array
                                      columns=adata_subset.var.index, 
                                      index=adata_subset.obs.index)

        return spatial_lag_df  # Ensure the spatial_lag DataFrame is returned

    # Subset a particular image if needed
    if subset is not None:
        adata_list = [adata[adata.obs[imageid] == subset]]
    else:
        adata_list = [adata[adata.obs[imageid] == i] for i in adata.obs[imageid].unique()]

    # Apply function to all images and create a master dataframe
    r_spatial_expression_internal = lambda x: spatial_expression_internal(adata_subset=x, 
                                                                          x_coordinate=x_coordinate, 
                                                                          y_coordinate=y_coordinate, 
                                                                          z_coordinate=z_coordinate,
                                                                          method=method, 
                                                                          knn=knn, imageid=imageid, 
                                                                          use_raw=use_raw, subset=subset,
                                                                          log=log,
                                                                          label=label) 

    all_data = list(map(r_spatial_expression_internal, adata_list))  # Apply function 

    # Merge all results into a single dataframe    
    result = pd.concat(all_data, join='outer')  

    # Reindex the cells
    result = result.fillna(0)
    result = result.reindex(adata.obs.index)

    # Add to AnnData object
    adata.uns[label] = result

    # Save data if requested
    if output_dir is not None:
        output_dir = pathlib.Path(output_dir)
        output_dir.mkdir(exist_ok=True, parents=True)
        adata.write(output_dir / imid)
    else:    
        # Return the modified AnnData object
        return adata

In [None]:
# Assuming 'adata' is an AnnData object with spatial coordinates and gene expression data

# Calculate spatial expression using 10 nearest neighbors
adata = spatial_expression(adata, x_coordinate='x_centroid', y_coordinate='y_centroid',
                            method='knn', knn=100, use_raw=True, label='expression_knn_100')


In [None]:
# Assuming 'adata' is an AnnData object with spatial coordinates and gene expression data

# Calculate spatial expression using 10 nearest neighbors
adata = spatial_expression(adata, x_coordinate='x_centroid', y_coordinate='y_centroid',
                            method='knn', knn=50, use_raw=True, label='expression_knn_50')


In [None]:
# Assuming 'adata' is an AnnData object with spatial coordinates and gene expression data

# Calculate spatial expression using 10 nearest neighbors
adata = spatial_expression(adata, x_coordinate='x_centroid', y_coordinate='y_centroid',
                            method='knn', knn=30, use_raw=True, label='expression_knn_30')


In [None]:
# Assuming 'adata' is an AnnData object with spatial coordinates and gene expression data

# Calculate spatial expression using 10 nearest neighbors
adata = spatial_expression(adata, x_coordinate='x_centroid', y_coordinate='y_centroid',
                            method='knn', knn=10, use_raw=True, label='expression_knn_10')


In [None]:
from sklearn.cluster import KMeans
import pathlib

def cluster_spatial_expression(adata, label='spatial_expression', n_clusters=10, output_dir=None):
    """
    Cluster the spatial expression results in the AnnData object using KMeans.
    
    Parameters:
    - adata: AnnData object containing the spatial expression results.
    - label: The key to fetch spatial expression data from `adata.uns` (default 'spatial_expression').
    - n_clusters: Number of clusters for KMeans.
    - output_dir: Directory to save the clustered AnnData object (optional).
    
    Returns:
    - adata: Modified AnnData object with added clustering information.
    """
    # Ensure the spatial expression data exists
    if label not in adata.uns:
        raise ValueError(f"Label '{label}' not found in adata.uns")

    # Fetch the spatial expression DataFrame
    spatial_expr_df = adata.uns[label]

    # KMeans clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    spatial_expr_df['cluster'] = kmeans.fit_predict(spatial_expr_df)

    # Dynamically generate cluster label name
    cluster_label = f'KNN100_K{n_clusters}'

    # Add the cluster labels to adata.obs with dynamic name
    adata.obs[cluster_label] = spatial_expr_df['cluster'].values

    # Save the AnnData object if output_dir is provided
    if output_dir is not None:
        output_dir = pathlib.Path(output_dir)
        output_dir.mkdir(exist_ok=True, parents=True)
        # Save the file with a name indicating the number of clusters
        output_file = output_dir / f"KNN100_K{n_clusters}.h5ad"
        adata.write(output_file)
        print(f"Saved clustered AnnData object to {output_file}")

    return adata

In [None]:
# 2. Cluster the spatial expression data (using KMeans or DBSCAN)
adata = cluster_spatial_expression(adata, label='expression_knn_100', n_clusters=12)

In [None]:
from sklearn.cluster import KMeans
import pathlib

def cluster_spatial_expression(adata, label='spatial_expression', n_clusters=10, output_dir=None):
    """
    Cluster the spatial expression results in the AnnData object using KMeans.
    
    Parameters:
    - adata: AnnData object containing the spatial expression results.
    - label: The key to fetch spatial expression data from `adata.uns` (default 'spatial_expression').
    - n_clusters: Number of clusters for KMeans.
    - output_dir: Directory to save the clustered AnnData object (optional).
    
    Returns:
    - adata: Modified AnnData object with added clustering information.
    """
    # Ensure the spatial expression data exists
    if label not in adata.uns:
        raise ValueError(f"Label '{label}' not found in adata.uns")

    # Fetch the spatial expression DataFrame
    spatial_expr_df = adata.uns[label]

    # KMeans clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    spatial_expr_df['cluster'] = kmeans.fit_predict(spatial_expr_df)

    # Dynamically generate cluster label name
    cluster_label = f'KNN50_K{n_clusters}'

    # Add the cluster labels to adata.obs with dynamic name
    adata.obs[cluster_label] = spatial_expr_df['cluster'].values

    # Save the AnnData object if output_dir is provided
    if output_dir is not None:
        output_dir = pathlib.Path(output_dir)
        output_dir.mkdir(exist_ok=True, parents=True)
        # Save the file with a name indicating the number of clusters
        output_file = output_dir / f"KNN50_K{n_clusters}.h5ad"
        adata.write(output_file)
        print(f"Saved clustered AnnData object to {output_file}")

    return adata

In [None]:
# 2. Cluster the spatial expression data (using KMeans or DBSCAN)
adata = cluster_spatial_expression(adata, label='expression_knn_50', n_clusters=10)

In [None]:
from sklearn.cluster import KMeans
import pathlib

def cluster_spatial_expression(adata, label='spatial_expression', n_clusters=10, output_dir=None):
    """
    Cluster the spatial expression results in the AnnData object using KMeans.
    
    Parameters:
    - adata: AnnData object containing the spatial expression results.
    - label: The key to fetch spatial expression data from `adata.uns` (default 'spatial_expression').
    - n_clusters: Number of clusters for KMeans.
    - output_dir: Directory to save the clustered AnnData object (optional).
    
    Returns:
    - adata: Modified AnnData object with added clustering information.
    """
    # Ensure the spatial expression data exists
    if label not in adata.uns:
        raise ValueError(f"Label '{label}' not found in adata.uns")

    # Fetch the spatial expression DataFrame
    spatial_expr_df = adata.uns[label]

    # KMeans clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    spatial_expr_df['cluster'] = kmeans.fit_predict(spatial_expr_df)

    # Dynamically generate cluster label name
    cluster_label = f'KNN30_K{n_clusters}'

    # Add the cluster labels to adata.obs with dynamic name
    adata.obs[cluster_label] = spatial_expr_df['cluster'].values

    # Save the AnnData object if output_dir is provided
    if output_dir is not None:
        output_dir = pathlib.Path(output_dir)
        output_dir.mkdir(exist_ok=True, parents=True)
        # Save the file with a name indicating the number of clusters
        output_file = output_dir / f"KNN30_K{n_clusters}.h5ad"
        adata.write(output_file)
        print(f"Saved clustered AnnData object to {output_file}")

    return adata

In [None]:
# 2. Cluster the spatial expression data (using KMeans or DBSCAN)
adata = cluster_spatial_expression(adata, label='expression_knn_30', n_clusters=10)

In [None]:
from sklearn.cluster import KMeans
import pathlib

def cluster_spatial_expression(adata, label='spatial_expression', n_clusters=10, output_dir=None):
    """
    Cluster the spatial expression results in the AnnData object using KMeans.
    
    Parameters:
    - adata: AnnData object containing the spatial expression results.
    - label: The key to fetch spatial expression data from `adata.uns` (default 'spatial_expression').
    - n_clusters: Number of clusters for KMeans.
    - output_dir: Directory to save the clustered AnnData object (optional).
    
    Returns:
    - adata: Modified AnnData object with added clustering information.
    """
    # Ensure the spatial expression data exists
    if label not in adata.uns:
        raise ValueError(f"Label '{label}' not found in adata.uns")

    # Fetch the spatial expression DataFrame
    spatial_expr_df = adata.uns[label]

    # KMeans clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    spatial_expr_df['cluster'] = kmeans.fit_predict(spatial_expr_df)

    # Dynamically generate cluster label name
    cluster_label = f'KNN10_K{n_clusters}'

    # Add the cluster labels to adata.obs with dynamic name
    adata.obs[cluster_label] = spatial_expr_df['cluster'].values

    # Save the AnnData object if output_dir is provided
    if output_dir is not None:
        output_dir = pathlib.Path(output_dir)
        output_dir.mkdir(exist_ok=True, parents=True)
        # Save the file with a name indicating the number of clusters
        output_file = output_dir / f"KNN10_K{n_clusters}.h5ad"
        adata.write(output_file)
        print(f"Saved clustered AnnData object to {output_file}")

    return adata

In [None]:
# 2. Cluster the spatial expression data (using KMeans or DBSCAN)
adata = cluster_spatial_expression(adata, label='expression_knn_10', n_clusters=10)

In [None]:
adata.obs

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r2_HV184']

# Ensure required columns exist
required_columns = ['KNN100_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r1_HV207']

# Ensure required columns exist
required_columns = ['KNN100_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's1r2']

# Ensure required columns exist
required_columns = ['KNN100_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV205B']

# Ensure required columns exist
required_columns = ['KNN100_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV171A']

# Ensure required columns exist
required_columns = ['KNN100_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's1r3']

# Ensure required columns exist
required_columns = ['KNN100_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r3']

# Ensure required columns exist
required_columns = ['KNN100_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
# 2. Cluster the spatial expression data (using KMeans)
adata = cluster_spatial_expression(adata, label='expression_knn_100', n_clusters=12)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r2_HV184']

# Ensure required columns exist
required_columns = ['KNN50_K12', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K12'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K12'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K12'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)

In [None]:
# 2. Cluster the spatial expression data (using KMeans or DBSCAN)
adata = cluster_spatial_expression(adata, label='expression_knn_100', n_clusters=10)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's1r3']

# Ensure required columns exist
required_columns = ['KNN100_K12', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K12'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K12'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K12'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r3']

# Ensure required columns exist
required_columns = ['KNN100_K12', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K12'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K12'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K12'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r1_HV207']

# Ensure required columns exist
required_columns = ['KNN100_K12', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K12'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K12'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K12'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r1_HV207']

# Ensure required columns exist
required_columns = ['KNN100_K12', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K12'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K12'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K12'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's1r2']

# Ensure required columns exist
required_columns = ['KNN100_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV205B']

# Ensure required columns exist
required_columns = ['KNN100_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV171A']

# Ensure required columns exist
required_columns = ['KNN100_K12', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K12'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K12'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K12'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(adata.obs)
df['spatial_expression_cluster'] = df['KNN100_K10'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['KNN100_K10', 'Lvl4']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['Lvl4'].str.contains('Mix') | df['Lvl4'].str.startswith('Oth.') | df['Lvl4'].str.startswith('Neuro') | df['Lvl4'].str.startswith('Unclea')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl5'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups.drop(columns=clusters_to_remove, errors='ignore')

# Plot heatmap with enhanced colormap and custom vmax
plt.figure(figsize=(25, 10))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_filtered, cmap='inferno', cbar_kws={'label': 'Count', 'ticks': [0, 2000, 4000, 8000], 'format': '%.0f'}, linewidths=0.05, linecolor='black', vmax=8000)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche Cluster')
plt.ylabel('Cell Type')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 2000, 4000, 8000])
cbar.ax.set_yticklabels(['0', '2,000', '4,000', '>8,000'])  # Set custom tick labels

plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(adata.obs)
df['spatial_expression_cluster'] = df['KNN100_K12'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['KNN100_K12', 'Lvl4']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['Lvl4'].str.contains('Mix') | df['Lvl4'].str.startswith('Oth.') | df['Lvl4'].str.startswith('Neuro') | df['Lvl4'].str.startswith('Unclea')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl5'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups.drop(columns=clusters_to_remove, errors='ignore')

# Plot heatmap with enhanced colormap and custom vmax
plt.figure(figsize=(25, 10))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_filtered, cmap='inferno', cbar_kws={'label': 'Count', 'ticks': [0, 2000, 4000, 8000], 'format': '%.0f'}, linewidths=0.05, linecolor='black', vmax=8000)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche Cluster')
plt.ylabel('Cell Type')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 2000, 4000, 8000])
cbar.ax.set_yticklabels(['0', '2,000', '4,000', '>8,000'])  # Set custom tick labels

plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()

In [None]:
adata_subset.uns['rank_genes_groups']['names']

In [None]:
print(adata_subset.uns['rank_genes_groups']['names'].shape)

In [None]:
import scanpy as sc

# Ensure that 'spatial_expression_cluster' is treated as text (string)
adata.obs['spatial_expression_cluster'] = adata.obs['spatial_expression_cluster'].astype(str)

In [None]:

# Log-transform the data (if not already done)
sc.pp.log1p(adata)

# Perform rank-based differential expression (e.g., Wilcoxon test)
sc.tl.rank_genes_groups(adata, 'spatial_expression_cluster', method='wilcoxon', use_raw=False)

# Extract the top genes per cluster
rank_genes = adata.uns['rank_genes_groups']
top_genes_per_cluster = {}

# Get the top 20 genes for each cluster
for cluster in adata.obs['spatial_expression_cluster'].cat.categories:
    top_genes_per_cluster[cluster] = rank_genes['names'][cluster][:20]  # Top 20 genes


In [None]:
# Create a list of all top genes across all clusters
top_genes = [gene for genes in top_genes_per_cluster.values() for gene in genes]

# Plot the dotplot for the top genes
sc.pl.dotplot(adata, var_names=top_genes, groupby='spatial_expression_cluster', 
              swap_axes=True, cmap='viridis', color_map="viridis")

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(adata.obs)
df['spatial_expression_cluster'] = df['KNN10_K15'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['KNN10_K15', 'Lvl5']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['Lvl5'].str.contains('Mix') | df['Lvl5'].str.startswith('Oth.') | df['Lvl5'].str.startswith('Neuro') | df['Lvl5'].str.startswith('Unclea')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl5'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups.drop(columns=clusters_to_remove, errors='ignore')

# Plot heatmap with enhanced colormap and custom vmax
plt.figure(figsize=(25, 10))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_filtered, cmap='inferno', cbar_kws={'label': 'Count', 'ticks': [0, 2000, 4000, 8000], 'format': '%.0f'}, linewidths=0.05, linecolor='black', vmax=8000)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche Cluster')
plt.ylabel('Cell Type')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 2000, 4000, 8000])
cbar.ax.set_yticklabels(['0', '2,000', '4,000', '>8,000'])  # Set custom tick labels

plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()



In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(adata.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['Lvl4', 'KNN100_K10']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['Lvl4'].str.contains('Oth') | df['Lvl4'].str.startswith('Neuro')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl4'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups[~niche_groups.index.isin(clusters_to_remove)]

# Calculate relative frequencies
niche_groups_relative = niche_groups_filtered.div(niche_groups_filtered.sum(axis=1), axis=0)

# Plot heatmap with enhanced colormap
plt.figure(figsize=(8, 14))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_relative, cmap='plasma', cbar_kws={'label': 'Relative Frequency'}, linewidths=0.05, linecolor='black')
plt.title('Relative Cell Type Distribution in Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)
plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()



In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(adata.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['Lvl4', 'KNN100_K12']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['Lvl4'].str.contains('Oth') | df['Lvl4'].str.startswith('Neuro')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl4'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups[~niche_groups.index.isin(clusters_to_remove)]

# Calculate relative frequencies
niche_groups_relative = niche_groups_filtered.div(niche_groups_filtered.sum(axis=1), axis=0)

# Plot heatmap with enhanced colormap
plt.figure(figsize=(8, 14))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_relative, cmap='plasma', cbar_kws={'label': 'Relative Frequency'}, linewidths=0.05, linecolor='black')
plt.title('Relative Cell Type Distribution in Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)
plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()



In [None]:
adata.obs

In [None]:
adata.write_h5ad("/data/vasileiosionat2/Xenium/Drake_outputs/ccProcessed.h5ad")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r2_HV184']

# Ensure required columns exist
required_columns = ['KNN100_K12', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r2_HV184']

# Ensure required columns exist
required_columns = ['KNN100_K12', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN100_K12'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN100_K12'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN100_K12'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(adata.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['Lvl4', 'KNN100_K12']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['Lvl4'].str.contains('Oth') | df['Lvl4'].str.startswith('Neuro')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl4'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups[~niche_groups.index.isin(clusters_to_remove)]

# Calculate relative frequencies
niche_groups_relative = niche_groups_filtered.div(niche_groups_filtered.sum(axis=1), axis=0)

# Plot heatmap with enhanced colormap
plt.figure(figsize=(8, 14))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_relative, cmap='plasma', cbar_kws={'label': 'Relative Frequency'}, linewidths=0.05, linecolor='black')
plt.title('Relative Cell Type Distribution in Niches')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)
plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()



In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r2_HV184']

# Ensure required columns exist
required_columns = ['KNN50_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN50_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN50_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN50_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r3']

# Ensure required columns exist
required_columns = ['KNN50_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN50_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN50_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN50_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r1_HV207']

# Ensure required columns exist
required_columns = ['KNN50_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN50_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN50_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN50_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 'HV205B']

# Ensure required columns exist
required_columns = ['KNN50_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN50_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN50_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN50_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's1r2']

# Ensure required columns exist
required_columns = ['KNN50_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN50_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN50_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN50_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's1r2']

# Ensure required columns exist
required_columns = ['KNN30_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN30_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN30_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN30_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r3']

# Ensure required columns exist
required_columns = ['KNN30_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN30_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN30_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN30_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
adata_perio  = adata[adata.obs['status.3'].str.contains('perio', na=False)].copy()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(adata_perio.obs)
df['spatial_expression_cluster'] = df['KNN30_K10'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['KNN30_K10', 'Lvl4']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['Lvl4'].str.contains('Mix') | df['Lvl4'].str.startswith('Oth.') | df['Lvl4'].str.startswith('Neuro') | df['Lvl4'].str.startswith('Unclea')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl4'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups.drop(columns=clusters_to_remove, errors='ignore')

# Plot heatmap with enhanced colormap and custom vmax
plt.figure(figsize=(25, 10))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_filtered, cmap='inferno', cbar_kws={'label': 'Count', 'ticks': [0, 2000, 4000, 8000], 'format': '%.0f'}, linewidths=0.05, linecolor='black', vmax=8000)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche Cluster')
plt.ylabel('Cell Type')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 2000, 4000, 8000])
cbar.ax.set_yticklabels(['0', '2,000', '4,000', '>8,000'])  # Set custom tick labels

plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()


In [None]:
adata_health  = adata[adata.obs['status.3'].str.contains('healthy', na=False)].copy()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming 'plot_niches' is a DataFrame or similar data structure
df = pd.DataFrame(adata_health.obs)
df['spatial_expression_cluster'] = df['KNN30_K10'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['KNN30_K10', 'Lvl4']).size().unstack(fill_value=0)

# Identify clusters to remove
clusters_to_remove = df['Lvl4'].str.contains('Mix') | df['Lvl4'].str.startswith('Oth.') | df['Lvl4'].str.startswith('Neuro') | df['Lvl4'].str.startswith('Unclea')

# Get the unique clusters to be removed
clusters_to_remove = df.loc[clusters_to_remove, 'Lvl4'].unique()

# Filter out these clusters from the plot
niche_groups_filtered = niche_groups.drop(columns=clusters_to_remove, errors='ignore')

# Plot heatmap with enhanced colormap and custom vmax
plt.figure(figsize=(25, 10))  # Adjusted figsize to accommodate the x-axis labels
sns.heatmap(niche_groups_filtered, cmap='inferno', cbar_kws={'label': 'Count', 'ticks': [0, 1000, 2000, 4000], 'format': '%.0f'}, linewidths=0.05, linecolor='black', vmax=4000)
plt.title('Cell Type Distribution in Niches')
plt.xlabel('Niche Cluster')
plt.ylabel('Cell Type')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 1000, 2000, 4000])
cbar.ax.set_yticklabels(['0', '1,000', '2,000', '>4,000'])  # Set custom tick labels

plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r1_HV207']

# Ensure required columns exist
required_columns = ['KNN50_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN50_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN50_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN50_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter the data for the specific sample
adata_sample = adata[adata.obs['sample'] == 's2r2_HV184']

# Ensure required columns exist
required_columns = ['KNN50_K10', 'x_centroid', 'y_centroid']
missing_cols = [col for col in required_columns if col not in adata_sample.obs]
if missing_cols:
    raise KeyError(f"Missing columns in adata_sample.obs: {missing_cols}")

# Get unique clusters
unique_clusters = adata_sample.obs['KNN50_K10'].unique()

# Reorder clusters (modify this if you have a custom order)
ordered_clusters = sorted(unique_clusters)

# Define number of rows and columns for subplots
num_rows = 6
num_cols = int(np.ceil(len(ordered_clusters) / num_rows))  # Calculate columns dynamically

# Create the figure
fig, axes = plt.subplots(num_rows, num_cols, figsize=(46, 45))
fig.patch.set_facecolor('white')

# Flatten the axes array for easier indexing
axes = axes.flatten()

# Get original coordinates
x_coords = adata_sample.obs['x_centroid']
y_coords = adata_sample.obs['y_centroid']

# Define rotation angle in degrees
rotation_angle = 45  # Modify if needed

# Apply rotation transformation
angle = np.deg2rad(rotation_angle)
new_x_coords = x_coords * np.cos(angle) - y_coords * np.sin(angle)
new_y_coords = x_coords * np.sin(angle) + y_coords * np.cos(angle)

# Determine aspect ratio
x_range = new_x_coords.max() - new_x_coords.min()
y_range = new_y_coords.max() - new_y_coords.min()
aspect_ratio = x_range / y_range

# Loop through each unique cluster
for idx, cluster in enumerate(ordered_clusters):
    ax = axes[idx]

    # Set white background
    ax.set_facecolor('white')

    # Remove subplot outline
    for spine in ax.spines.values():
        spine.set_visible(False)

    # Scatter plot for all other clusters (gray)
    mask_other = adata_sample.obs['KNN50_K10'] != cluster
    ax.scatter(
        new_x_coords[mask_other.to_numpy()],
        new_y_coords[mask_other.to_numpy()],
        c='#C0C0C0', 
        s=3
    )

    # Scatter plot for the current cluster (red)
    mask_cluster = adata_sample.obs['KNN50_K10'] == cluster
    ax.scatter(
        new_x_coords[mask_cluster.to_numpy()],
        new_y_coords[mask_cluster.to_numpy()],
        c='red', 
        s=9
    )

    # Set aspect ratio
    ax.set_aspect(aspect_ratio)

    # Add cluster name below each plot
    ax.text(
        0.5, 0.02, f'{cluster}', 
        horizontalalignment='center', 
        verticalalignment='center', 
        transform=ax.transAxes, 
        color='black', fontsize=20, weight='bold'
    )

    # Remove grid, ticks, and labels
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])

# Turn off unused subplots
for ax in axes[len(ordered_clusters):]:
    ax.set_visible(False)

# Adjust subplot spacing
plt.subplots_adjust(
    left=0.05, right=0.95, top=0.95, bottom=0.05, 
    wspace=0.1, hspace=0.05
)

# Add title
plt.suptitle(f'All Clusters in s2r2_HV184', color='black', fontsize=20, weight='bold', y=1.02)

# Show the figure
plt.show()
plt.close(fig)


In [None]:
adata.obs['niche_knn50k10'] = adata.obs['KNN50_K10'].copy()

In [None]:
adata.obs['niche_knn50k10'] = adata.obs['niche_knn50k10'].astype(str)
# Convert the 'niche_cc14' column to categorical
adata.obs['niche_knn50k10'] = pd.Categorical(adata.obs['niche_knn50k10'])

# Define the replacement dictionary
replacement_dict = {
    '1': 'Epi-CT 1',
    '5': 'Plasma',
    '0': 'Fib CT 1',
    '9': 'Keratin',
    '3': 'Fib CT 2',
    '7':'Crevicular',
    '8':'Plasma-Fib CT',
    '6': 'Spinous',
    '4': 'Epi-CT 2',
    '2' : 'Lymphoid',
   }
# Replace values in the 'niche_cc14' column
adata.obs['niche_knn50k10'] = adata.obs['niche_knn50k10'].replace(replacement_dict)
print(adata.obs['niche_knn50k10'].unique().tolist())

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'yellow', 'red', '#7F00FF', 'magenta']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=10,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r2_HV184']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_perio_niches.svg'
lvl1_cluster_colors = {'Lymphoid': 'red', 'Plasma': '#7F00FF', 'Crevicular': 'yellow', 'Plasma-Fib CT': 'magenta'}
lvl1_cluster_key = 'niche_knn50k10'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'yellow', 'red', '#7F00FF', 'magenta', '#800000']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=10,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r2_HV184']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_perio_niches.svg'
lvl1_cluster_colors = {'Lymphoid': 'red', 'Plasma': '#7F00FF', 'Crevicular': 'yellow', 'Plasma-Fib CT': 'magenta', 'Fib CT 1': '#800000', 'Fib CT 2': '#800000'}
lvl1_cluster_key = 'niche_knn50k10'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'yellow', 'red', '#7F00FF', 'magenta']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=10,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r1_HV207']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_healthy_niches.svg'
lvl1_cluster_colors = {'Lymphoid': 'red', 'Plasma': '#7F00FF', 'Crevicular': 'yellow', 'Plasma-Fib CT': 'magenta'}
lvl1_cluster_key = 'niche_knn50k10'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_spatial_plot(adata, sample_names, cluster_key, cluster_colors, output_path):
    # Define rotation angle in degrees
    rotation_angle = 90
    rotation_rad = np.deg2rad(rotation_angle)
    
    # Rotation matrix
    rotation_matrix = np.array([
        [np.cos(rotation_rad), -np.sin(rotation_rad)],
        [np.sin(rotation_rad), np.cos(rotation_rad)]
    ])
    
    # Loop over each sample to generate a spatial scatter plot
    for sample_name in sample_names:
        subset_adata = adata[adata.obs['sample'] == sample_name]
        x_coords = subset_adata.obs['x_centroid'].values
        y_coords = subset_adata.obs['y_centroid'].values

        # Apply rotation transformation
        coords = np.vstack((x_coords, y_coords))
        transformed_coords = rotation_matrix @ coords
        new_x_coords = transformed_coords[0, :]
        new_y_coords = transformed_coords[1, :]

        # Calculate aspect ratio
        x_range = new_x_coords.max() - new_x_coords.min()
        y_range = new_y_coords.max() - new_y_coords.min()
        aspect_ratio = x_range / y_range
        figsize = (6 * aspect_ratio, 6) if aspect_ratio > 1 else (6, 6 / aspect_ratio)

        plt.figure(figsize=figsize)
        fig = plt.gcf()
        ax = plt.gca()
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')

        # Map cluster colors, set unmapped categories to grey
        subset_adata.obs[f'{cluster_key}_color'] = subset_adata.obs[cluster_key].map(cluster_colors).fillna('#e1e1e1')

        # Color priority order: Yellow -> Blue -> Green
        color_order = [ '#e1e1e1', 'yellow', 'red', '#7F00FF', 'magenta', '#800000']
        for color in color_order:
            mask = subset_adata.obs[f'{cluster_key}_color'] == color
            plt.scatter(
                x=new_x_coords[mask],
                y=new_y_coords[mask],
                c=color,
                s=10,
                label=color
            )

        # Remove grids, ticks, and spines
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)

        # Add a 200 μm scale bar
        scale_bar_length = 200
        scale_bar_x_start = new_x_coords.min() + 0.01 * (new_x_coords.max() - new_x_coords.min())
        scale_bar_x_end = scale_bar_x_start + scale_bar_length
        scale_bar_y = new_y_coords.min() + 0.3 * (new_y_coords.max() - new_y_coords.min())
        ax.plot(
            [scale_bar_x_start, scale_bar_x_end],
            [scale_bar_y, scale_bar_y],
            color='black',
            linewidth=8
        )

        plt.gcf().set_dpi(300)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.show()
        plt.close()

# Example usage
sample_names = ['s2r1_HV207']
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_healthy_niches.svg'
lvl1_cluster_colors = {'Lymphoid': 'red', 'Plasma': '#7F00FF', 'Crevicular': 'yellow', 'Plasma-Fib CT': 'magenta', 'Fib CT 1': '#800000', 'Fib CT 2': '#800000'}
lvl1_cluster_key = 'niche_knn50k10'
generate_spatial_plot(adata, sample_names, lvl1_cluster_key, lvl1_cluster_colors, output_path)


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert AnnData obs to DataFrame
df = pd.DataFrame(adata_perio.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['Lvl4', 'perio_niche_knn50k10']).size().unstack(fill_value=0)

# Manually specify which clusters (x-axis) and niches (y-axis) to show, with order
selected_clusters = ['B', 'Th', 'Th_proximity_to_B', 'Treg', 'Tc', 'NK', 'T.IL7Rhi', 
                    'T_proximity_to_Fib', 'cDC1','cDC2', 'mregDC', 'pDC', 'Pl.1', 'Pl.2', 'PB', 
                    'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5', 'Mac', 'Mast', 'Mono']
selected_niches = ['Lymphoid', 'Plasma', 'Plasma-Fib CT']  # Replace with actual niche names

# Ensure selected clusters and niches exist in the DataFrame
niche_groups_filtered = niche_groups.loc[
    niche_groups.index.intersection(selected_clusters),  # Use intersection() to filter selected clusters
    niche_groups.columns.intersection(selected_niches)   # Use intersection() to filter selected niches
]

# Calculate relative frequencies across all niches, then select the relevant ones
niche_groups_total = niche_groups.sum(axis=1)  # Sum across all niches for each cluster
niche_groups_relative = niche_groups.div(niche_groups_total, axis=0)  # Compute relative frequencies

# Filter the relative frequencies based on selected clusters and niches
niche_groups_relative_filtered = niche_groups_relative.loc[
    niche_groups_relative.index.intersection(selected_clusters),
    niche_groups_relative.columns.intersection(selected_niches)
]

# Specify the order of clusters and niches
cluster_order = ['B', 'Th', 'Th_proximity_to_B', 'mregDC', 'Pl.1', 'Pl.2', 'PB', 'Fib.2',
                 'Treg', 'Tc', 'NK', 'T.IL7Rhi', 'cDC1','cDC2', 'pDC', 
                    'Mac', 'Mast', 'Mono', 'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5']  # Adjust the order as needed

niche_order = ['Lymphoid', 'Plasma', 'Plasma-Fib CT', 'Fib CT 1']  # Adjust the order as needed

# Plot heatmap with niches on y-axis and clusters on x-axis
plt.figure(figsize=(8, 3))  # Adjusted figsize for readability
sns.heatmap(
    niche_groups_relative_filtered[niche_order].T[cluster_order], cmap='plasma',  # Apply niche_order on y-axis and cluster_order on x-axis
    cbar_kws={'label': 'Relative Frequency'}, 
    linewidths=0.05, linecolor='black',
    vmax=0.7,
    vmin=0.1,
    xticklabels=cluster_order,  # Apply cluster_order on x-axis
    yticklabels=niche_order  # Apply niche_order on y-axis
)
plt.title('Relative Cell Type Distribution in Niches')
plt.xlabel('Cell Cluster')
plt.ylabel('Niche')
plt.xticks(rotation=45, ha='right')  # Improved alignment
plt.yticks(rotation=0)
plt.tight_layout()  # Prevents clipping
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_perio_clusters_niches.svg'
fig = plt.gcf()  # Get current figure
plt.savefig(output_path, dpi=300, format='svg', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()

In [None]:
adata.obs['niche_knn50k10_merged'] = adata.obs['niche_knn50k10'].copy()

In [None]:
adata.obs['niche_knn50k10_merged'] = adata.obs['niche_knn50k10_merged'].astype(str)
# Convert the 'niche_cc14' column to categorical
adata.obs['niche_knn50k10_merged'] = pd.Categorical(adata.obs['niche_knn50k10_merged'])

# Define the replacement dictionary
replacement_dict = {
    'Epi-CT 1': 'Epi-CT',
    'Epi-CT 2': 'Epi-CT',
    'Fib CT 2': 'Fibrous CT',
    'Fib CT 1': 'Fibrous CT',
   }
# Replace values in the 'niche_cc14' column
adata.obs['niche_knn50k10_merged'] = adata.obs['niche_knn50k10_merged'].replace(replacement_dict)
print(adata.obs['niche_knn50k10_merged'].unique().tolist())

In [None]:
adata.write_h5ad("/data/vasileiosionat2/Xenium/Drake_outputs/ccProcessed.h5ad")

In [None]:
adata_perio  = adata[adata.obs['status.3'].str.contains('perio', na=False)].copy()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert 'niche_knn50k10_merged' column to categorical
df = pd.DataFrame(adata_perio.obs)
df['spatial_expression_cluster'] = df['niche_knn50k10_merged'].astype('category')

# Define custom lists of niches and clusters
selected_niches = ['Lymphoid', 'Plasma', 'Plasma-Fib CT', 'Fibrous CT']  # Replace with actual niche names
selected_clusters = ['B', 'Th_proximity_to_B',  'Tc', 
                    'Pl.1', 'Pl.2', 'PB', 
                    'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5', 'Mac', 'Mast']  # Replace with actual cluster names

# Aggregate counts
niche_groups = df.groupby(['niche_knn50k10_merged', 'Lvl4']).size().unstack(fill_value=0)

# Filter by selected niches and clusters
niche_groups_filtered = niche_groups.loc[selected_niches, selected_clusters]

# Plot heatmap
plt.figure(figsize=(25, 10))  # Adjusted figsize to accommodate x-axis labels
sns.heatmap(niche_groups_filtered, cmap='inferno', cbar_kws={'label': 'Count', 'ticks': [0, 2000, 4000, 6000], 'format': '%.0f'},
            linewidths=0.05, linecolor='black', vmax=6000)

plt.title('Cell Type Distribution in Selected Niches')
plt.xlabel('Niche Cluster')
plt.ylabel('Cell Type')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 2000, 4000, 6000])
cbar.ax.set_yticklabels(['0', '2,000', '4,000', '>6,000'])  # Set custom tick labels

plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()

In [None]:
adata_health  = adata[adata.obs['status.3'].str.contains('healthy', na=False)].copy()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert 'niche_knn50k10_merged' column to categorical
df = pd.DataFrame(adata_health.obs)
df['spatial_expression_cluster'] = df['niche_knn50k10_merged'].astype('category')

# Define custom lists of niches and clusters
selected_niches = ['Lymphoid', 'Plasma', 'Plasma-Fib CT', 'Fibrous CT']  # Replace with actual niche names
selected_clusters = ['B', 'Th_proximity_to_B',  'Tc', 
                    'Pl.1', 'Pl.2', 'PB', 
                    'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5', 'Mac', 'Mast']  # Replace with actual cluster names

# Aggregate counts
niche_groups = df.groupby(['niche_knn50k10_merged', 'Lvl4']).size().unstack(fill_value=0)

# Filter by selected niches and clusters
niche_groups_filtered = niche_groups.loc[selected_niches, selected_clusters]

# Plot heatmap
plt.figure(figsize=(25, 10))  # Adjusted figsize to accommodate x-axis labels
sns.heatmap(niche_groups_filtered, cmap='inferno', cbar_kws={'label': 'Count', 'ticks': [0, 1000, 2000, 3000], 'format': '%.0f'},
            linewidths=0.05, linecolor='black', vmax=3000)

plt.title('Cell Type Distribution in Selected Niches')
plt.xlabel('Niche Cluster')
plt.ylabel('Cell Type')
plt.xticks(rotation=45, ha='right')  # Adjust rotation and alignment of xticks
plt.yticks(rotation=0)

# Customize the color bar label
cbar = plt.gca().collections[0].colorbar
cbar.set_ticks([0, 1000, 2000, 3000])
cbar.ax.set_yticklabels(['0', '1,000', '2,000', '>3,000'])  # Set custom tick labels

plt.tight_layout()  # Ensures all elements fit within the figure area
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert AnnData obs to DataFrame
df = pd.DataFrame(adata_perio.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['Lvl4', 'niche_knn50k10_merged']).size().unstack(fill_value=0)

# Manually specify which clusters (y-axis) and niches (x-axis) to show, with order
selected_clusters = ['B', 'Th', 'Th_proximity_to_B', 'Treg', 'Tc', 'NK', 'T.IL7Rhi', 
                    'T_proximity_to_Fib', 'cDC1', 'cDC2', 'mregDC', 'pDC', 'Pl.1', 'Pl.2', 'PB', 
                    'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5', 'Mac', 'Mast', 'Mono']
selected_niches = ['Lymphoid', 'Plasma', 'Plasma-Fib CT']  # Replace with actual niche names

# Ensure selected clusters and niches exist in the DataFrame
niche_groups_filtered = niche_groups.loc[
    niche_groups.index.intersection(selected_clusters),  
    niche_groups.columns.intersection(selected_niches)   
]

# Calculate relative frequencies
niche_groups_total = niche_groups.sum(axis=1)  # Sum across all niches for each cluster
niche_groups_relative = niche_groups.div(niche_groups_total, axis=0)  # Compute relative frequencies

# Filter the relative frequencies
niche_groups_relative_filtered = niche_groups_relative.loc[
    niche_groups_relative.index.intersection(selected_clusters),
    niche_groups_relative.columns.intersection(selected_niches)
]

# Define custom order
cluster_order = ['B', 'Th', 'Th_proximity_to_B', 'Treg', 'Tc', 'NK', 'T.IL7Rhi', 'mregDC', 'Pl.1', 'Pl.2', 'PB', 
                 'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5',
                 'cDC1', 'cDC2', 'pDC', 'Mac', 'Mast', 'Mono']

niche_order = ['Lymphoid', 'Plasma', 'Plasma-Fib CT']

# Apply custom order safely
niche_groups_relative_filtered = niche_groups_relative_filtered.reindex(index=cluster_order, columns=niche_order)

# Plot heatmap with clusters on y-axis and niches on x-axis
plt.figure(figsize=(1.5, 4))  # Adjusted for better readability
sns.heatmap(
    niche_groups_relative_filtered, cmap='plasma',  
    cbar_kws={'label': 'Relative Frequency'}, 
    linewidths=0.05, linecolor='black',
    vmax=0.7,
    xticklabels=niche_order,  # Niches on x-axis
    yticklabels=cluster_order  # Clusters on y-axis
)
plt.title('Cell Type Distribution in Niches - Periodontitis')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()

# Save figure
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_perio_clusters_niches.pdf'
fig = plt.gcf()
plt.savefig(output_path, dpi=300, format='pdf', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert AnnData obs to DataFrame
df = pd.DataFrame(adata_health.obs)
df['Lvl4'] = df['Lvl4'].astype('category')

# Aggregate counts
niche_groups = df.groupby(['Lvl4', 'niche_knn50k10_merged']).size().unstack(fill_value=0)

# Manually specify which clusters (y-axis) and niches (x-axis) to show, with order
selected_clusters = ['B', 'Th', 'Th_proximity_to_B', 'Treg', 'Tc', 'NK', 'T.IL7Rhi', 
                    'T_proximity_to_Fib', 'cDC1', 'cDC2', 'mregDC', 'pDC', 'Pl.1', 'Pl.2', 'PB', 
                    'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5', 'Mac', 'Mast', 'Mono']
selected_niches = ['Lymphoid', 'Plasma', 'Plasma-Fib CT']  # Replace with actual niche names

# Ensure selected clusters and niches exist in the DataFrame
niche_groups_filtered = niche_groups.loc[
    niche_groups.index.intersection(selected_clusters),  
    niche_groups.columns.intersection(selected_niches)   
]

# Calculate relative frequencies
niche_groups_total = niche_groups.sum(axis=1)  # Sum across all niches for each cluster
niche_groups_relative = niche_groups.div(niche_groups_total, axis=0)  # Compute relative frequencies

# Filter the relative frequencies
niche_groups_relative_filtered = niche_groups_relative.loc[
    niche_groups_relative.index.intersection(selected_clusters),
    niche_groups_relative.columns.intersection(selected_niches)
]

# Define custom order
cluster_order = ['B', 'Th', 'Th_proximity_to_B', 'Treg', 'Tc', 'NK', 'T.IL7Rhi', 'mregDC', 'Pl.1', 'Pl.2', 'PB', 
                 'Fib.1', 'Fib.2', 'Fib.3', 'Fib.4', 'Fib.5',
                 'cDC1', 'cDC2', 'pDC', 'Mac', 'Mast', 'Mono']

niche_order = ['Lymphoid', 'Plasma', 'Plasma-Fib CT']

# Apply custom order safely
niche_groups_relative_filtered = niche_groups_relative_filtered.reindex(index=cluster_order, columns=niche_order)

# Plot heatmap with clusters on y-axis and niches on x-axis
plt.figure(figsize=(1.5, 4))  # Adjusted for better readability
sns.heatmap(
    niche_groups_relative_filtered, cmap='plasma',  
    cbar_kws={'label': 'Relative Frequency'}, 
    linewidths=0.05, linecolor='black',
    vmax=0.7,
    xticklabels=niche_order,  # Niches on x-axis
    yticklabels=cluster_order  # Clusters on y-axis
)
plt.title('Cell Type Distribution in Niches - Health')
plt.xlabel('Niche')
plt.ylabel('Cell Cluster')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()

# Save figure
output_path = '/data/vasileiosionat2/IBEX_FINAL/Scimap/Outputs/Figures_paper/Figure5/Xenium_healthy_clusters_niches.pdf'
fig = plt.gcf()
plt.savefig(output_path, dpi=300, format='pdf', bbox_inches='tight', facecolor=fig.get_facecolor())
plt.show()
