In [1]:
#pip install shapely

In [2]:
import os
import numpy as np
import pandas as pd

from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc

import anndata as ad

from shapely.geometry import MultiPoint, Point
from shapely.ops import unary_union


In [3]:
!pwd

path1 = './output_dataobjects'
path2 = './figures'

os.makedirs(path1, exist_ok=True)
os.makedirs(path2, exist_ok=True)

/data/projects/zeba/MY_PROJECTS/240130_Xenium_ANCA_SLE_GBM/xenium-cgn/notebooks_zs/02_PeriglomDefinition


In [4]:
def define_glom_ID(adata, col_glom_num):
    #using the column with glom numbers "col_glom_num", creating 'glom_ID' which will give a unique ID to each glomerulus

    adata.obs['glom_ID'] = adata.obs[col_glom_num].astype(str)+ "_" + adata.obs['Slide_ID'].astype(str)+ "_" + adata.obs['Disease'].astype(str) + "_" + adata.obs['sample'].astype(str)

    adata.obs.glom_ID = adata.obs.glom_ID.astype('category')
    
    return adata

In [5]:
def define_expanded_polygons(Slide,NonGlomTag):
    #the first argument should be adata subsetted for each slide separately.
    #second argument is the string that labels cells which fall outside of glom annotation
    
    # dictionary to store the expanded polygons for each 'glom_ID'
    expanded_polygons = {}
    
    # To only take real gloms, filter out rows where 'glom_ID' does not start with the second argument
    Slide_filtered = Slide.obs[~Slide.obs['glom_ID'].str.startswith(NonGlomTag)]

    for glom_ID, group in Slide_filtered.groupby('glom_ID'):
        # Create a MultiPoint object from the x and y coordinates of the cells
        points = MultiPoint(list(zip(group['x'], group['y'])))
        
        # Create the convex polygon to get the simplest polygon that contains all the points
        polygon = points.convex_hull
        
        # Buffer the polygon to create the expanded region
        expanded_polygon = polygon.buffer(100)  # 100 µm expansion
        
        # Store the expanded polygon for this group
        expanded_polygons[glom_ID] = expanded_polygon
        
    return expanded_polygons

In [6]:
def define_polygon_flags(Slide,expanded_polygons):   
    # empty column for storing the polygon keys
    Slide.obs['polygon_flags'] = ''
    
    for index, row in tqdm(Slide.obs.iterrows()):
        point = Point(row['x'], row['y'])  # Create a Point object for the current cell
        
        keys_found = []  # List to store keys of polygons in which the current point lies
        
        # Check this point against each polygon
        for key, polygon in expanded_polygons.items():
            if polygon.contains(point):
                keys_found.append(key)
        
        # Join all found keys with commas and assign to the DataFrame
        Slide.obs.at[index, 'polygon_flags'] = ','.join(keys_found)
        
    return Slide

In [7]:
def create_glom_periglom_cols(Slide,col_glom_num, NonGlomTag):
    # Create column 'is_in_polygon' , 'is_in_glom' and "is_in_periglom"
    Slide.obs['is_in_polygon'] = Slide.obs['polygon_flags'].astype(bool) & Slide.obs['polygon_flags'].notna()
    Slide.obs['is_in_polygon'] = Slide.obs['is_in_polygon'].fillna(False) # Replace NaNs in 'is_in_polygon' with False explicitly
    
    Slide.obs['is_in_glom'] = ~Slide.obs[col_glom_num].str.contains(NonGlomTag)
    
    Slide.obs['is_in_periglom'] = (Slide.obs['is_in_polygon'] == True) & (Slide.obs['is_in_glom'] == False)
    
    return Slide

### Dataobject 


In [8]:
#This is the dataobject with nichePCA based Glom annotation Shared by Behnam On 02 Sep 24 : 
# https://github.com/imsb-uke/xenium-cgn/blob/main/notebooks/06_domain_identification/README.md
#which points to : epyc/Behnam/xenium-cgn/notebooks/06_domain_identification/adata/adata_nichepca_with_domain_tuned_per_sample_v1.h5ad

adata = sc.read_h5ad("/data/projects/Behnam/xenium-cgn/notebooks/06_domain_identification/adata/adata_nichepca_with_domain_tuned_per_sample_v1.h5ad")


In [9]:
adata=define_glom_ID(adata,"nichepca_glom_no")

In [10]:
adata.obs.glom_ID.unique()

['Other_0011695_SLE_X40', '0_0011695_SLE_X40', '1_0011695_SLE_X40', '2_0011695_SLE_X40', 'Other_0011695_SLE_X39', ..., '9_0011216_ANCA_X17', '10_0011216_ANCA_X17', '11_0011216_ANCA_X17', '12_0011216_ANCA_X17', '13_0011216_ANCA_X17']
Length: 833
Categories (833, object): ['0_0011216_ANCA_X17', '0_0011216_ANCA_X18', '0_0011216_ANCA_X19', '0_0011216_ANCA_X20', ..., 'Other_0018775_ANCA_X63', 'Other_0018775_Cntrl_X57', 'Other_0018775_SLE_X61', 'Other_0018775_SLE_X62']

In [11]:
len(adata.obs.glom_ID.unique())-63

770

In [12]:
adata.obs.Slide_ID.unique()

['0011695', '0018775', '0011707', '0011287', '0011762', '0011284', '0011546', '0011216']
Categories (8, object): ['0011216', '0011284', '0011287', '0011546', '0011695', '0011707', '0011762', '0018775']

In [13]:
Slide1_0011762=adata[adata.obs['Slide_ID']=='0011762',:]

Slide1_0011762_expanded_polygons = define_expanded_polygons(Slide1_0011762, "Other")
#Slide1_0011762_expanded_polygons

Slide1_0011762 = define_polygon_flags(Slide1_0011762,Slide1_0011762_expanded_polygons)

Slide1_0011762 = create_glom_periglom_cols(Slide1_0011762,col_glom_num="nichepca_glom_no", NonGlomTag="Other")

Slide1_0011762.write('./output_dataobjects/Slide1_0011762_PeriglomDefined.h5ad')

  for glom_ID, group in Slide_filtered.groupby('glom_ID'):
  Slide.obs['polygon_flags'] = ''
303811it [04:11, 1210.37it/s]


In [14]:
Slide3_0011546=adata[adata.obs['Slide_ID']=='0011546',:]

Slide3_0011546_expanded_polygons = define_expanded_polygons(Slide3_0011546, "Other")
#Slide3_0011546_expanded_polygons

Slide3_0011546 = define_polygon_flags(Slide3_0011546,Slide3_0011546_expanded_polygons)

Slide3_0011546 = create_glom_periglom_cols(Slide3_0011546,col_glom_num="nichepca_glom_no", NonGlomTag="Other")

Slide3_0011546.write('./output_dataobjects/Slide3_0011546_PeriglomDefined.h5ad')

  for glom_ID, group in Slide_filtered.groupby('glom_ID'):
  Slide.obs['polygon_flags'] = ''
407812it [08:22, 811.71it/s]


In [15]:
Slide4_0011216=adata[adata.obs['Slide_ID']=='0011216',:]

Slide4_0011216_expanded_polygons = define_expanded_polygons(Slide4_0011216, "Other")
#Slide4_0011216_expanded_polygons

Slide4_0011216 = define_polygon_flags(Slide4_0011216,Slide4_0011216_expanded_polygons)

Slide4_0011216 = create_glom_periglom_cols(Slide4_0011216,col_glom_num="nichepca_glom_no", NonGlomTag="Other")

Slide4_0011216.write('./output_dataobjects/Slide4_0011216_PeriglomDefined.h5ad')

  for glom_ID, group in Slide_filtered.groupby('glom_ID'):
  Slide.obs['polygon_flags'] = ''
502183it [08:41, 963.20it/s] 


In [16]:
Slide5_0011707=adata[adata.obs['Slide_ID']=='0011707',:]

Slide5_0011707_expanded_polygons = define_expanded_polygons(Slide5_0011707, "Other")
#Slide5_0011707_expanded_polygons

Slide5_0011707 = define_polygon_flags(Slide5_0011707,Slide5_0011707_expanded_polygons)

Slide5_0011707 = create_glom_periglom_cols(Slide5_0011707,col_glom_num="nichepca_glom_no", NonGlomTag="Other")

Slide5_0011707.write('./output_dataobjects/Slide5_0011707_PeriglomDefined.h5ad')

  for glom_ID, group in Slide_filtered.groupby('glom_ID'):
  Slide.obs['polygon_flags'] = ''
396654it [06:39, 993.97it/s] 


In [17]:
Slide6_0011695=adata[adata.obs['Slide_ID']=='0011695',:]

Slide6_0011695_expanded_polygons = define_expanded_polygons(Slide6_0011695, "Other")
#Slide6_0011695_expanded_polygons

Slide6_0011695 = define_polygon_flags(Slide6_0011695,Slide6_0011695_expanded_polygons)

Slide6_0011695 = create_glom_periglom_cols(Slide6_0011695,col_glom_num="nichepca_glom_no", NonGlomTag="Other")

Slide6_0011695.write('./output_dataobjects/Slide6_0011695_PeriglomDefined.h5ad')

  for glom_ID, group in Slide_filtered.groupby('glom_ID'):
  Slide.obs['polygon_flags'] = ''
428899it [06:36, 1080.74it/s]


In [None]:
Slide7_0011284=adata[adata.obs['Slide_ID']=='0011284',:]

Slide7_0011284_expanded_polygons = define_expanded_polygons(Slide7_0011284, "Other")
#Slide7_0011284_expanded_polygons

Slide7_0011284 = define_polygon_flags(Slide7_0011284,Slide7_0011284_expanded_polygons)

Slide7_0011284 = create_glom_periglom_cols(Slide7_0011284,col_glom_num="nichepca_glom_no", NonGlomTag="Other")

Slide7_0011284.write('./output_dataobjects/Slide7_0011284_PeriglomDefined.h5ad')

  for glom_ID, group in Slide_filtered.groupby('glom_ID'):
  Slide.obs['polygon_flags'] = ''
69183it [01:10, 960.80it/s] 

In [None]:
Slide8_0011287=adata[adata.obs['Slide_ID']=='0011287',:]

Slide8_0011287_expanded_polygons = define_expanded_polygons(Slide8_0011287, "Other")
#Slide8_0011287_expanded_polygons

Slide8_0011287 = define_polygon_flags(Slide8_0011287,Slide8_0011287_expanded_polygons)

Slide8_0011287 = create_glom_periglom_cols(Slide8_0011287,col_glom_num="nichepca_glom_no", NonGlomTag="Other")

Slide8_0011287.write('./output_dataobjects/Slide8_0011287_PeriglomDefined.h5ad')

203937it [03:12, 1043.89it/s]

In [None]:
Slide9_0018775=adata[adata.obs['Slide_ID']=='0018775',:]

Slide9_0018775_expanded_polygons = define_expanded_polygons(Slide9_0018775, "Other")
#Slide9_0018775_expanded_polygons

Slide9_0018775 = define_polygon_flags(Slide9_0018775,Slide9_0018775_expanded_polygons)

Slide9_0018775 = create_glom_periglom_cols(Slide9_0018775,col_glom_num="nichepca_glom_no", NonGlomTag="Other")

Slide9_0018775.write('./output_dataobjects/Slide9_0018775_PeriglomDefined.h5ad')