In [2]:
#Loading libraries
import scimap as sm
import anndata as ad
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree
from joblib import Parallel, delayed
import scipy
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer
import multiprocessing as mp
import seaborn as sns
from bokeh.plotting import figure, output_file, show 
from bokeh.palettes import Category10, Category20, Category20b, Category20
import umap as um

# Declaring functions

In [3]:
##Function to count the number of knn cells or cells in a radious
#It returns the actual number of cells of each cell_type, instead of tehe fraction of neighboors of each cell_type
def get_neighbours_internal (adata_subset,x_coordinate,y_coordinate,phenotype,method,radius,knn,
                                subset,label,imageid):
        # Create a DataFrame with the necessary inforamtion
        data = pd.DataFrame({'x': adata_subset.obs[x_coordinate], 'y': adata_subset.obs[y_coordinate], 'phenotype': adata_subset.obs[phenotype]})

        # Identify neighbourhoods based on the method used
        # a) KNN method
        if method == 'knn':
            print("Identifying the " + str(knn) + " nearest neighbours for every cell")
            tree = BallTree(data[['x','y']], leaf_size= 2)
            ind = tree.query(data[['x','y']], k=knn, return_distance= False)
            neighbours = pd.DataFrame(ind.tolist(), index = data.index) # neighbour DF
            neighbours.drop(0, axis=1, inplace=True) # Remove self neighbour

        # b) Local radius method
        if method == 'radius':
            print("Identifying neighbours within " + str(radius) + " pixels of every cell")
            kdt = BallTree(data[['x','y']], metric='euclidean') 
            ind = kdt.query_radius(data[['x','y']], r=radius, return_distance=False)
            for i in range(0, len(ind)): ind[i] = np.delete(ind[i], np.argwhere(ind[i] == i))#remove self
            neighbours = pd.DataFrame(ind.tolist(), index = data.index) # neighbour DF
            neighbours_ids = ind.tolist()
            #Get cell ids
            for i in range(0, len(ind)):
                ind[i] = adata_subset.obs.iloc[ind[i]]['CellId'].to_numpy()
            cell_ids = np.concatenate(ind)

            
        # Map phenotype
        phenomap = dict(zip(list(range(len(ind))), data['phenotype'])) # Used for mapping

        # Loop through (all functionized methods were very slow)
        for i in neighbours.columns:
            neighbours[i] = neighbours[i].dropna().map(phenomap, na_action='ignore')

        # Collapse all the neighbours into a single column
        n = pd.DataFrame(neighbours.stack(), columns = ["neighbour_phenotype"])
        n.index = n.index.get_level_values(0) # Drop the multi index
        n = pd.DataFrame(n)
        n['order'] = list(range(len(n)))

        # Merge with real phenotype
        n_m = n.merge(data['phenotype'], how='inner', left_index=True, right_index=True)
        n_m['neighbourhood'] = n_m.index
        n = n_m.sort_values(by=['order'])

        # Normalize based on total cell count
        k = n.groupby(['neighbourhood','neighbour_phenotype']).size().unstack().fillna(0)
        #k = k.div(k.sum(axis=1), axis=0)
        
        #Add cellID and drop columns
        n['Neighbour_CellId'] = cell_ids
        n = n.drop(columns=['order', 'phenotype'])


        # return the normalized neighbour occurance count
        return n

def get_neighbours_pheno (adata,
                   x_coordinate='X_centroid',
                   y_coordinate='Y_centroid',
                   phenotype='phenotype',
                   method='radius',
                   radius=30,knn=10,
                   imageid='imageid',
                   subset=None,
                   label='spatial_count'):
    
    # Subset a particular image if needed
    if subset is not None:
        adata_list = [adata[adata.obs[imageid] == subset]]
    else:
        adata_list = [adata[adata.obs[imageid] == i] for i in adata.obs[imageid].unique()]

    # Apply function to all images and create a master dataframe
    # Create lamda function 
    r_get_neighbours = lambda x: get_neighbours_internal(adata_subset=x,x_coordinate=x_coordinate,
                                                   y_coordinate=y_coordinate,phenotype=phenotype,
                                                   method=method,radius=radius,knn=knn,
                                                   imageid=imageid,subset=subset,label=label) 
    all_data = list(map(r_get_neighbours, adata_list)) # Apply function 


    # Merge all the results into a single dataframe    
    result = []
    for i in range(len(all_data)):
        result.append(all_data[i])
    result = pd.concat(result, join='outer')  

    # Return        
    return result
        
        

#Auxiliar function to get the neighboors of a cell, using cellID
def get_neighbors (adata, cellID_centroid, x_coordinate='X_position', y_coordinate='Y_position', radius=30,
              imageid="imageid", CellId="CellId"):
    """
    For a radius of a centroid cell, identify its neighbors.
    Returns a numpy array with the CellIDs of neighboors cells.
    By default the radius is 30px which correspond to 20microns (1px = 0.65microns)
    """
    
    #Identify the imageid of the centroid cell and subset the input adata
    imageID_selected_cel = adata.obs.loc[adata.obs.index==cellID_centroid][imageid].tolist()[0]
    adata_selection = adata[adata.obs[imageid] == imageID_selected_cel]
    data2 = pd.DataFrame({'x': adata_selection.obs[x_coordinate], 'y': adata_selection.obs[y_coordinate]})
    
    index_cell_centroid = np.where(adata_selection.obs.index == cellID_centroid)[0].tolist()[0]
    
    #Identifying the neighboors in a radius and return their cellIDs
    kdt = BallTree(data2[['x','y']], metric='euclidean') 
    ind = kdt.query_radius(data2.iloc[[index_cell_centroid]][['x','y']], r=radius, return_distance=False)
    neighboors_cellIds = adata_selection.obs.iloc[ind[0]][CellId].to_numpy()
    
    return(neighboors_cellIds)



# Reading input data

In [4]:
table = pd.read_csv("D:/users/fperez/NKI_TMAs_AF/Tables/All_cells_subtype_scimap-format_all_channels_20240416.csv")

# Identify the cancer cells with high MHCII

In [16]:
#Identifiying the MHCII+ cancer cells

table_cancer = table[table['GlobalCellType'] == 'Cancer']
cut_point = np.median(np.log2(table_cancer['MHCII'])) + np.std(np.log2(table_cancer['MHCII']))
postive_MHCII_cancer = table_cancer[np.log2(table_cancer['MHCII']) > cut_point]
postive_MHCII_cancer_ids = postive_MHCII_cancer['CellId'].tolist()

negative_MHCII_cancer = table_cancer[np.log2(table_cancer['MHCII']) < cut_point]
negative_MHCII_cancer_ids = negative_MHCII_cancer['CellId'].tolist()

# Reading data for scimap

In [7]:
adata = sm.pp.mcmicro_to_scimap("D:/users/fperez/NKI_TMAs_AF/Tables/All_cells_subtype_scimap-format_all_channels_20240416.csv",
                                remove_dna=True, remove_string_from_name=None, log=False, drop_markers=None,
                                random_sample=None, unique_CellId=False, CellId='CellId', split='Area',
                                custom_imageid=None, min_cells=None, output_dir=None)

Loading All_cells_subtype_scimap-format_all_channels_20240416.csv


In [8]:
adata.obs

Unnamed: 0_level_0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,Solidity,Perimeter,Roundness,X_position,Y_position,GlobalCellType,SubCelltype,CellId,imageid
CellId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
TMA_18_810_core1_c1,58,9.087063,8.435341,0.371881,1.000000,24.836,1.181611,1078.500000,271.344828,Stromal,StromalClus2,TMA_18_810_core1_c1,TMA_18_810_core1
TMA_18_810_core1_c2,142,14.514077,12.610012,0.495141,0.986111,39.896,1.121087,1050.669014,274.873239,Stromal,StromalClus2,TMA_18_810_core1_c2,TMA_18_810_core1
TMA_18_810_core1_c3,267,23.084022,15.760832,0.730643,0.930314,62.093,0.870234,964.737828,280.842697,Cancer,CancerClus3,TMA_18_810_core1_c3,TMA_18_810_core1
TMA_18_810_core1_c4,207,20.860397,12.843806,0.787978,0.990431,50.962,1.001584,1036.420290,283.555556,Stromal,StromalClus1,TMA_18_810_core1_c4,TMA_18_810_core1
TMA_18_810_core1_c5,214,18.065880,15.236705,0.537291,0.972727,51.152,1.027776,1078.121495,282.752336,Stromal,StromalClus1,TMA_18_810_core1_c5,TMA_18_810_core1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
TMA_46_325_core9_c4640,157,15.471980,13.503890,0.488084,0.940120,44.618,0.991036,1348.427000,2178.401000,Stromal,StromalClus1,TMA_46_325_core9_c4640,TMA_46_325_core9
TMA_46_325_core9_c4641,198,19.042740,13.543120,0.702994,0.985075,50.300,0.983420,1583.682000,2182.323000,Stromal,StromalClus1,TMA_46_325_core9_c4641,TMA_46_325_core9
TMA_46_325_core9_c4642,274,25.259750,14.143750,0.828538,0.961403,61.605,0.907253,1391.843000,2182.040000,Stromal,StromalClus1,TMA_46_325_core9_c4642,TMA_46_325_core9
TMA_46_325_core9_c4643,241,22.866130,14.323350,0.779502,0.912879,60.621,0.824102,1434.913000,2182.199000,Stromal,StromalClus1,TMA_46_325_core9_c4643,TMA_46_325_core9


# Getting a full list of all neighboors in 30px radius

In [9]:
neighbours_ids = get_neighbours_pheno(adata, x_coordinate='X_position', y_coordinate='Y_position',
                            phenotype='GlobalCellType', method='radius', radius=30,
                            subset=None, knn=None)

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell


In [10]:
neighbours_ids

Unnamed: 0_level_0,neighbour_phenotype,neighbourhood,Neighbour_CellId
CellId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
TMA_18_810_core1_c1,Stromal,TMA_18_810_core1_c1,TMA_18_810_core1_c19
TMA_18_810_core1_c1,Stromal,TMA_18_810_core1_c1,TMA_18_810_core1_c5
TMA_18_810_core1_c1,Stromal,TMA_18_810_core1_c1,TMA_18_810_core1_c2
TMA_18_810_core1_c2,Stromal,TMA_18_810_core1_c2,TMA_18_810_core1_c17
TMA_18_810_core1_c2,Stromal,TMA_18_810_core1_c2,TMA_18_810_core1_c1
...,...,...,...
TMA_46_325_core9_c4643,Stromal,TMA_46_325_core9_c4643,TMA_46_325_core9_c4623
TMA_46_325_core9_c4643,Stromal,TMA_46_325_core9_c4643,TMA_46_325_core9_c4635
TMA_46_325_core9_c4644,Stromal,TMA_46_325_core9_c4644,TMA_46_325_core9_c4624
TMA_46_325_core9_c4644,Stromal,TMA_46_325_core9_c4644,TMA_46_325_core9_c4630


# Getting the neighboors of selected cells

In [18]:
#Neighbors of MHCII+ and MHCII- cancer cells
MHCIIpos_cancer_neighboors = neighbours_ids.iloc[neighbours_ids.index.isin(postive_MHCII_cancer_ids)]
MHCIIneg_cancer_neighboors = neighbours_ids.iloc[neighbours_ids.index.isin(negative_MHCII_cancer_ids)]

In [20]:
MHCIIpos_cancer_immune_neighbours = MHCIIpos_cancer_neighboors.loc[(MHCIIpos_cancer_neighboors['neighbour_phenotype'] != 'Cancer') & (MHCIIpos_cancer_neighboors['neighbour_phenotype'] != 'Stromal')]

MHCIIneg_cancer_immune_neighbours = MHCIIneg_cancer_neighboors.loc[(MHCIIneg_cancer_neighboors['neighbour_phenotype'] != 'Cancer') & (MHCIIneg_cancer_neighboors['neighbour_phenotype'] != 'Stromal')]

In [26]:
adata[MHCIIpos_cancer_immune_neighbours.index]

View of AnnData object with n_obs × n_vars = 359684 × 33
    obs: 'Area', 'MajorAxisLength', 'MinorAxisLength', 'Eccentricity', 'Solidity', 'Perimeter', 'Roundness', 'X_position', 'Y_position', 'GlobalCellType', 'SubCelltype', 'CellId', 'imageid'
    uns: 'all_markers'

In [35]:
MHCIIneg_cancer_immune_neighbours.to_csv("D:/users/fperez/NKI_TMAs_AF/Analysis_results/05_ML_validations/Neighborhoods/Cancer_MHCIIneg_immune_neighbours.csv")
MHCIIpos_cancer_immune_neighbours.to_csv("D:/users/fperez/NKI_TMAs_AF/Analysis_results/05_ML_validations/Neighborhoods/Cancer_MHCIIpos_immune_neighbours.csv")