In [1]:
#Loading libraries
import scimap as sm
import anndata as ad
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree
from joblib import Parallel, delayed
import scipy
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer
import multiprocessing as mp
import seaborn as sns
from bokeh.plotting import figure, output_file, show 
from bokeh.palettes import Category10, Category20, Category20b, Category20
import umap as um

In [2]:
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer
import multiprocessing as mp
import seaborn as sns
from bokeh.plotting import figure, output_file, show 
from bokeh.palettes import Category10, Category20, Category20b, Category20
import umap as um

In [3]:
##Function to count the number of knn cells or cells in a radious
#It returns the actual number of cells of each cell_type, instead of tehe fraction of neighboors of each cell_type
def spatial_count_internal (adata_subset,x_coordinate,y_coordinate,phenotype,method,radius,knn,
                                subset,label,imageid):
        # Create a DataFrame with the necessary inforamtion
        data = pd.DataFrame({'x': adata_subset.obs[x_coordinate], 'y': adata_subset.obs[y_coordinate], 'phenotype': adata_subset.obs[phenotype]})

        # Identify neighbourhoods based on the method used
        # a) KNN method
        if method == 'knn':
            print("Identifying the " + str(knn) + " nearest neighbours for every cell")
            tree = BallTree(data[['x','y']], leaf_size= 2)
            ind = tree.query(data[['x','y']], k=knn, return_distance= False)
            neighbours = pd.DataFrame(ind.tolist(), index = data.index) # neighbour DF
            neighbours.drop(0, axis=1, inplace=True) # Remove self neighbour

        # b) Local radius method
        if method == 'radius':
            print("Identifying neighbours within " + str(radius) + " pixels of every cell")
            kdt = BallTree(data[['x','y']], metric='euclidean') 
            ind = kdt.query_radius(data[['x','y']], r=radius, return_distance=False)
            for i in range(0, len(ind)): ind[i] = np.delete(ind[i], np.argwhere(ind[i] == i))#remove self
            neighbours = pd.DataFrame(ind.tolist(), index = data.index) # neighbour DF

        # Map phenotype
        phenomap = dict(zip(list(range(len(ind))), data['phenotype'])) # Used for mapping

        # Loop through (all functionized methods were very slow)
        for i in neighbours.columns:
            neighbours[i] = neighbours[i].dropna().map(phenomap, na_action='ignore')

        # Drop NA
        #n_dropped = neighbours.dropna(how='all')

        # Collapse all the neighbours into a single column
        n = pd.DataFrame(neighbours.stack(), columns = ["neighbour_phenotype"])
        n.index = n.index.get_level_values(0) # Drop the multi index
        n = pd.DataFrame(n)
        n['order'] = list(range(len(n)))

        # Merge with real phenotype
        n_m = n.merge(data['phenotype'], how='inner', left_index=True, right_index=True)
        n_m['neighbourhood'] = n_m.index
        n = n_m.sort_values(by=['order'])

        # Normalize based on total cell count
        k = n.groupby(['neighbourhood','neighbour_phenotype']).size().unstack().fillna(0)
        #k = k.div(k.sum(axis=1), axis=0)

        # return the normalized neighbour occurance count
        return k
    
def spatial_count2 (adata,
                   x_coordinate='X_centroid',
                   y_coordinate='Y_centroid',
                   phenotype='phenotype',
                   method='radius',
                   radius=30,knn=10,
                   imageid='imageid',
                   subset=None,
                   label='spatial_count'):
    
    # Subset a particular image if needed
    if subset is not None:
        adata_list = [adata[adata.obs[imageid] == subset]]
    else:
        adata_list = [adata[adata.obs[imageid] == i] for i in adata.obs[imageid].unique()]

    # Apply function to all images and create a master dataframe
    # Create lamda function 
    r_spatial_count_internal = lambda x: spatial_count_internal(adata_subset=x,x_coordinate=x_coordinate,
                                                   y_coordinate=y_coordinate,phenotype=phenotype,
                                                   method=method,radius=radius,knn=knn,
                                                   imageid=imageid,subset=subset,label=label) 
    all_data = list(map(r_spatial_count_internal, adata_list)) # Apply function 


    # Merge all the results into a single dataframe    
    result = []
    for i in range(len(all_data)):
        result.append(all_data[i])
    result = pd.concat(result, join='outer')  

    # Reindex the cells
    result = result.fillna(0)
    result = result.reindex(adata.obs.index)

    # Add to adata
    adata.uns[label] = result

    # Return        
    return adata

# Reading input data

In [4]:
adata = sm.pp.mcmicro_to_scimap("D:/users/fperez/NKI_TMAs_AF/Tables/All_cells_scimap-format_columns-reduced_20231129.csv",
                                remove_dna=False, remove_string_from_name=None, log=False, drop_markers=None,
                                random_sample=None, unique_CellId=False, CellId='CellId', split='Area',
                                custom_imageid=None, min_cells=None, output_dir=None)

Loading All_cells_scimap-format_columns-reduced_20231129.csv


In [5]:
adata.obs['GlobalCellType'].unique()

array(['Stromal', 'Cancer', 'Other.MY', 'CD8.T.cells', 'Other',
       'CD4.T.cells', 'CD31.stromal', 'T.regs', 'CD207.MY', 'CD15.MY',
       'CD163.MP', 'CD11c.MY', 'CD68.MP', 'B.cells', 'Other.immune'],
      dtype=object)

# Counting number of neighboring cells by celltype

In [8]:
#For radious of 30px (20microns)
adata = spatial_count2(adata, x_coordinate='X_position', y_coordinate='Y_position',
                            phenotype='GlobalCellType', method='radius', radius=30,
                            subset=None, label='spatial_count_30', knn=None)

# #For radious of 46px (30microns)
# adata = sm.tl.spatial_count(adata, x_coordinate='X_position', y_coordinate='Y_position',
#                             phenotype='GlobalCellType', method='knn', knn=10,
#                             subset=None, label='spatial_knn_10')


Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell


In [9]:
adata.uns['spatial_count_30']

neighbour_phenotype,CD11c.MY,CD15.MY,CD163.MP,CD207.MY,CD31.stromal,CD4.T.cells,CD68.MP,CD8.T.cells,Cancer,Other,Other.MY,Stromal,T.regs,B.cells,Other.immune
CellId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
TMA_18_810_core1_c1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0
TMA_18_810_core1_c2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0
TMA_18_810_core1_c3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0
TMA_18_810_core1_c4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0
TMA_18_810_core1_c5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TMA_46_325_core9_c4640,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.0,0.0,0.0
TMA_46_325_core9_c4641,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0
TMA_46_325_core9_c4642,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0
TMA_46_325_core9_c4643,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0


In [10]:
#Saving spatial interaction proportions
kclust = adata.uns['spatial_count_30']
kclust.to_csv("D:/users/fperez/NKI_TMAs_AF/Tables/Spatial-cell-counts_radious30px_20240410.csv")
# kclust = adata.uns['spatial_knn_10']
# kclust.to_csv("D:/users/fperez/NKI_TMAs_AF/Tables/Spatial-cell-proportions_knn10.csv")