In [1]:
#Loading libraries
import scimap as sm
import pandas as pd
import numpy as np
import os
from re import search
from os import listdir

from sklearn.neighbors import BallTree

Running SCIMAP  2.0.5



IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html



In [2]:
##Function to count the number of knn cells or cells in a radious
#It returns the actual number of cells of each cell_type, instead of tehe fraction of neighboors of each cell_type
def spatial_count_internal (adata_subset,x_coordinate,y_coordinate,phenotype,method,radius,knn,
                                subset,label,imageid):
        # Create a DataFrame with the necessary inforamtion
        data = pd.DataFrame({'x': adata_subset.obs[x_coordinate], 'y': adata_subset.obs[y_coordinate], 'phenotype': adata_subset.obs[phenotype]})

        # Identify neighbourhoods based on the method used
        # a) KNN method
        if method == 'knn':
            print("Identifying the " + str(knn) + " nearest neighbours for every cell")
            tree = BallTree(data[['x','y']], leaf_size= 2)
            ind = tree.query(data[['x','y']], k=knn, return_distance= False)
            neighbours = pd.DataFrame(ind.tolist(), index = data.index) # neighbour DF
            neighbours.drop(0, axis=1, inplace=True) # Remove self neighbour

        # b) Local radius method
        if method == 'radius':
            print("Identifying neighbours within " + str(radius) + " pixels of every cell")
            kdt = BallTree(data[['x','y']], metric='euclidean') 
            ind = kdt.query_radius(data[['x','y']], r=radius, return_distance=False)
            for i in range(0, len(ind)): ind[i] = np.delete(ind[i], np.argwhere(ind[i] == i))#remove self
            neighbours = pd.DataFrame(ind.tolist(), index = data.index) # neighbour DF

        # Map phenotype
        phenomap = dict(zip(list(range(len(ind))), data['phenotype'])) # Used for mapping

        # Loop through (all functionized methods were very slow)
        for i in neighbours.columns:
            neighbours[i] = neighbours[i].dropna().map(phenomap, na_action='ignore')

        # Drop NA
        #n_dropped = neighbours.dropna(how='all')

        # Collapse all the neighbours into a single column
        n = pd.DataFrame(neighbours.stack(), columns = ["neighbour_phenotype"])
        n.index = n.index.get_level_values(0) # Drop the multi index
        n = pd.DataFrame(n)
        n['order'] = list(range(len(n)))

        # Merge with real phenotype
        n_m = n.merge(data['phenotype'], how='inner', left_index=True, right_index=True)
        n_m['neighbourhood'] = n_m.index
        n = n_m.sort_values(by=['order'])

        # Normalize based on total cell count
        k = n.groupby(['neighbourhood','neighbour_phenotype']).size().unstack().fillna(0)
        #k = k.div(k.sum(axis=1), axis=0)

        # return the normalized neighbour occurance count
        return k
    
def spatial_count2 (adata,
                   x_coordinate='X_centroid',
                   y_coordinate='Y_centroid',
                   phenotype='phenotype',
                   method='radius',
                   radius=30,knn=10,
                   imageid='imageid',
                   subset=None,
                   label='spatial_count'):
    
    # Subset a particular image if needed
    if subset is not None:
        adata_list = [adata[adata.obs[imageid] == subset]]
    else:
        adata_list = [adata[adata.obs[imageid] == i] for i in adata.obs[imageid].unique()]

    # Apply function to all images and create a master dataframe
    # Create lamda function 
    r_spatial_count_internal = lambda x: spatial_count_internal(adata_subset=x,x_coordinate=x_coordinate,
                                                   y_coordinate=y_coordinate,phenotype=phenotype,
                                                   method=method,radius=radius,knn=knn,
                                                   imageid=imageid,subset=subset,label=label) 
    all_data = list(map(r_spatial_count_internal, adata_list)) # Apply function 


    # Merge all the results into a single dataframe    
    result = []
    for i in range(len(all_data)):
        result.append(all_data[i])
    result = pd.concat(result, join='outer')  

    # Reindex the cells
    result = result.fillna(0)
    result = result.reindex(adata.obs.index)

    # Add to adata
    adata.uns[label] = result

    # Return        
    return adata

# Reading input data

In [14]:
def get_file_list(folder): 
    files = [os.path.join(folder, f) for f in listdir(folder) if search('_annotated.csv', f)]
    return files

In [15]:
folder = 'D:/Projects/NKI_TMAs/Whole_slide_validation/tribus_analysis/output_data/'
image_path = get_file_list(folder)

In [16]:
image_path

['D:/Projects/NKI_TMAs/Whole_slide_validation/tribus_analysis/output_data/S050_iAdn_logic_table8_raw_tribus_annotated.csv',
 'D:/Projects/NKI_TMAs/Whole_slide_validation/tribus_analysis/output_data/S057_iOme_logic_table8_raw_tribus_annotated.csv',
 'D:/Projects/NKI_TMAs/Whole_slide_validation/tribus_analysis/output_data/S065_iOme_logic_table8_raw_tribus_annotated.csv',
 'D:/Projects/NKI_TMAs/Whole_slide_validation/tribus_analysis/output_data/S072_iOme_logic_table8_raw_tribus_annotated.csv',
 'D:/Projects/NKI_TMAs/Whole_slide_validation/tribus_analysis/output_data/S073_iOme1_logic_table8_raw_tribus_annotated.csv',
 'D:/Projects/NKI_TMAs/Whole_slide_validation/tribus_analysis/output_data/S081_iOme_logic_table8_raw_tribus_annotated.csv',
 'D:/Projects/NKI_TMAs/Whole_slide_validation/tribus_analysis/output_data/S083_iOme1_logic_table8_raw_tribus_annotated.csv',
 'D:/Projects/NKI_TMAs/Whole_slide_validation/tribus_analysis/output_data/S084_iOme2_logic_table8_raw_tribus_annotated.csv',
 'D:/

In [17]:
output_path = "D:/Projects/NKI_TMAs/Whole_slide_validation/scimap_analysis/output_data"

In [19]:
adata = sm.pp.mcmicro_to_scimap(image_path, 
                                remove_dna=False, remove_string_from_name=None, log=False, drop_markers=None,
                                random_sample=None, unique_CellId=True, CellId='CellID', split='X_centroid',
                                custom_imageid=None, min_cells=None, output_dir=None)

Loading S050_iAdn_logic_table8_raw_tribus_annotated.csv
Loading S057_iOme_logic_table8_raw_tribus_annotated.csv
Loading S065_iOme_logic_table8_raw_tribus_annotated.csv
Loading S072_iOme_logic_table8_raw_tribus_annotated.csv
Loading S073_iOme1_logic_table8_raw_tribus_annotated.csv
Loading S081_iOme_logic_table8_raw_tribus_annotated.csv
Loading S083_iOme1_logic_table8_raw_tribus_annotated.csv
Loading S084_iOme2_logic_table8_raw_tribus_annotated.csv
Loading S091_iOme1_logic_table8_raw_tribus_annotated.csv
Loading S098_iOme_logic_table8_raw_tribus_annotated.csv
Loading S100_iOme_raw_scimap_annotated.csv
Loading S107_iOme_logic_table8_raw_tribus_annotated.csv
Loading S110_iTubR_raw_scimap_annotated.csv
Loading S112_iOme_logic_table8_raw_tribus_annotated.csv
Loading S113_iOme_logic_table8_raw_tribus_annotated.csv
Loading S118_iOme_logic_table8_raw_tribus_annotated.csv
Loading S121_iOme_logic_table8_raw_tribus_annotated.csv
Loading S123_iOme_logic_table8_raw_tribus_annotated.csv
Loading S130_


X converted to numpy array with dtype float64



In [20]:
adata.obs['final_label'].unique()

array(['Cancer', 'Stromal', 'CD4.T.cell', 'CD8.T.cell', 'IBA1.Myeloid',
       'CD11c.Myeloid', 'undefined_Global', 'Unknown'], dtype=object)

# Counting number of neighboring cells by celltype

In [21]:
#For radious of 30px (20microns)
adata = spatial_count2(adata, x_coordinate='X_centroid', y_coordinate='Y_centroid',
                            phenotype='final_label', method='radius', radius=30,
                            subset=None, label='spatial_count_30', knn=None)

# #For radious of 46px (30microns)
# adata = sm.tl.spatial_count(adata, x_coordinate='X_position', y_coordinate='Y_position',
#                             phenotype='GlobalCellType', method='knn', knn=10,
#                             subset=None, label='spatial_knn_10')


Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours within 30 pixels of every cell
Identifying neighbours withi

In [22]:
adata.uns['spatial_count_30']

neighbour_phenotype,CD11c.Myeloid,CD4.T.cell,CD8.T.cell,Cancer,IBA1.Myeloid,Stromal,undefined_Global,Unknown
S050_iAdn_logic_table8_raw_tribus_annotated_1,,,,,,,,
S050_iAdn_logic_table8_raw_tribus_annotated_2,,,,,,,,
S050_iAdn_logic_table8_raw_tribus_annotated_3,,,,,,,,
S050_iAdn_logic_table8_raw_tribus_annotated_4,,,,,,,,
S050_iAdn_logic_table8_raw_tribus_annotated_5,,,,,,,,
...,...,...,...,...,...,...,...,...
S268_iOme_logic_table8_raw_tribus_annotated_855274,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0
S268_iOme_logic_table8_raw_tribus_annotated_855275,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
S268_iOme_logic_table8_raw_tribus_annotated_855276,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
S268_iOme_logic_table8_raw_tribus_annotated_855277,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0


In [23]:
#Saving spatial interaction proportions
kclust = adata.uns['spatial_count_30']
kclust.to_csv("{}/Spatial-cell-proportions_radious30px_new_fuction.csv".format(output_path))
# kclust = adata.uns['spatial_knn_10']
# kclust.to_csv("D:/users/fperez/NKI_TMAs_AF/Tables/Spatial-cell-proportions_knn10.csv")