# Import libraries and setup

In [None]:
# Import libraries we may need
import anndata
import numpy as np
import pandas as pd
import seaborn as sb
import scanpy as sc
import dill
import colorcet as cc
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, Normalize
from pathlib import Path

#My module for importing IMC data into AnnData format
import mikeimc as mimc
import mikeimc_v2 as mikeimc_v2

#ATHENA
from spatialOmics import SpatialOmics
import athena as sh


In [None]:
# Set up output figure settings
plt.rcParams['figure.figsize']=(64,64) #rescale figures, increase sizehere

# Set up scanpy settings
sc.settings.verbosity = 3
sc.set_figure_params(dpi=100, dpi_save=300) #Increase DPI for better resolution figures
#sc.logging.print_versions()

In [None]:
#load the session
dill.load_session('DC_ATHENA.db')

In [None]:
#save the session
dill.dump_session('DC_ATHENA.db')

# Transfer data into ATHENA format ('so' object)

In [None]:
adata_subset3 = anndata.read_h5ad('adata_subset3')

In [None]:
adata_subset3

In [None]:
ad = adata_subset3

In [None]:
ad

Convert some columns to category
This will save headaches in later stages - makes sure that all categorical variables in obs are stored as categories.
This will need changing depending on what extra columns you added from your 'observations' in the cell table, or information added in the dictionary.csv

In [None]:
# This will list out the columns you may want to now convert...
ad.obs.columns.tolist()

In [None]:
# Alter this with the categorical obs from above
for i in ['ROI','TMAID', 'CaseID', 'Region', 'Patient', 'Group', 'Diagnosis']:
    ad.obs[i]=ad.obs[i].astype('category')

Add a cell_index - numbers each cell in each sample (starting again at 0 for each sample)

In [None]:
for roi in ad.obs.ROI.cat.categories:
    ad.obs.loc[ad.obs.ROI==roi,'cell_id'] = ad.obs.loc[ad.obs.ROI==roi,'Master_Index'] - ad.obs.loc[ad.obs.ROI==roi,'Master_Index'].min() + 1


Convert the group names into ids - numbers for each group. 
This is required for some of athenas plotting functions and to plot in mode='mask'
Here, we just convert each group name into a number.

In [None]:
ad.obs['hierarchy_id'] = ad.obs.groupby('hierarchy').ngroup()
ad.obs['cell_type_id'] = ad.obs.groupby('cell_type').ngroup()
ad.obs['cell_cluster_id'] = ad.obs.groupby('pheno_cluster').ngroup()

Make sure the columns are set to 'category' type, to enable correct plotting.

Change this with the adata.obs column names that are categorical

In [None]:
ad.obs.columns

In [None]:
for i in ['ROI', 'Patient', 'Group','hierarchy_id', 'cell_type_id','cell_cluster_id']:
    ad.obs.loc[:, i] = ad.obs[i].astype('category')

Construct the SpatialOmics object

We need to create a new data frame that containts the 'sample level' meta data, these are currently stored in the adata.obs

Change the first line with the adata.obs that define ROI-level information, for example what patient (in this case 'Case') the ROI came from

In [None]:
spl = ad.obs[['ROI','TMAID', 'CaseID', 'Region', 'Patient', 'Group', 'Diagnosis']] #These are the sample/ROI level obs in the adata
spl = spl[~spl.duplicated()]
spl.set_index('ROI', inplace=True) #Set the index as ROI, which is the unique ID for each region

In [None]:
spl

Map the cell mask file path for each ROI

Here, you need to setup the mask files in a folder called 'masks', and create a new .csv file that points each ROI to its correct mask file
If you use the Bodenmiller pipeline, the masks are created in the 'CP_Output' folder. Unfortunately they all have quite confusing names that won't match with your ROI names. You can find out which masks match to which ROI by looking at the Image.csv file in the same folder as the outputted masks. There is a row per image analysed (per ROI), the columns you are looking for are 'Metadata_description' and 'Metadata_acname' to work out which ROI each line coresponds to, then look for 'URL_cellmask' to get the file name. I then created a subfolder called 'masks', and copied the masks into there. If you look at the 'mask_dict.csv' example here, you can see how I then layed out the dictionary file that will point each ROI to it's relevant mask file.

In [None]:
#Open the mask dictionary
map_file = pd.read_csv('COVID_mask_dict.csv').set_index('ROI').to_dict()['mask_path']

#Map into the spl dataframe
spl.loc[:, 'cell_mask_file'] = spl.index.map(map_file)

In [None]:
spl.loc[:, 'cell_mask_file']

Create the SpatialOmics instance and add in the sample data

In [None]:
so = SpatialOmics()
so.spl = spl

In [None]:
so

Add in cell mask

In [None]:
for r in so.spl.index:
    mask = ad.obs.ROI == r
    so.X[r] = pd.DataFrame(ad.X[mask], columns=ad.var.index)
    so.obs[r] = ad.obs[mask]
    so.obs[r].set_index('cell_id', inplace=True)
    so.X[r].index = so.obs[r].index

    # this is how you can add masks to the spatial omics instance
    # please use `to_store=False` as this prevents writing the file to disk which is still experimental
    cell_mask_file = spl.loc[r].cell_mask_file

    # first argument is the sample name
    # second argument is the KEY in so.masks[KEY] under which the mask is stored
    # third argument the file name
    so.add_mask(r, 'cellmasks', cell_mask_file, to_store=False)
    so.masks[r]['cellmasks'] = so.masks[r]['cellmasks'].astype(int)  # should be int

    # process segmentation masks and remove masks that do not represent a cell
    existing_cells = set(so.obs[r].index)
    segmentation_ids = set(np.unique(so.masks[r]['cellmasks']))
    idsToDelete = segmentation_ids - existing_cells
    for i in idsToDelete:
        cm = so.masks[r]['cellmasks']
        cm[cm == i] = 0

Check that the number of cell IDs matches the number of cell masks

In [None]:
for spl in so.spl.index:
   ids = np.unique(so.masks[spl]['cellmasks'])
   ids = ids[ids != 0]  # drop the background identifier
   if len(so.obs[spl]) == len(ids):
       print(f'Sample: {spl} has {len(ids)} cell identifiers in cellmasks but {len(so.obs[spl])} cells in so.obs[spl]')

In [None]:
so

In [None]:
so.spl

In [None]:
so.obs

In [None]:
so.obs[spl]['pheno_cluster']

In [None]:
so.obs[spl]['cell_cluster_id']

In [None]:
so.masks

# Colour maps

Create colour maps dictionaries and labels

This will create dictionaries that pair up the id numbers to the actual names

In [None]:
hierarchy_dict = dict(zip(ad.obs['hierarchy_id'].cat.categories, ad.obs['hierarchy'].cat.categories))
cell_type_dict = dict(zip(ad.obs['cell_type_id'].cat.categories, ad.obs['cell_type'].cat.categories))
cell_cluster_dict = dict(zip(ad.obs['cell_cluster_id'].cat.categories, ad.obs['pheno_cluster'].cat.categories))

so.uns['cmap_labels'].update({'hierarchy_id': hierarchy_dict})
so.uns['cmap_labels'].update({'cell_type_id': cell_type_dict})
so.uns['cmap_labels'].update({'cell_cluster_id': cell_cluster_dict})

Assign colour map
This will assign colours in the order they appear in the dictionaries created above.

In [None]:
import colorcet as cc
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib import cm

# Specify different colour maps here
colourmap = cc.glasbey_category10


for i in ['hierarchy_id','cell_type_id','cell_cluster_id']:
    length = len(so.obs[so.spl.index[0]][i].cat.categories)
    cmap = colourmap[:length]
    cmap = ListedColormap(cmap)
    print(i)
    display(cmap)
    so.uns['cmaps'].update({i: cmap})

Duplicate colour maps in format friendly for matplotlib and scanpy etc

In [None]:
items = list(hierarchy_dict.values())
colours = so.uns['cmaps']['hierarchy_id'].colors
hierarchy_colours = {items[i]: colours[i] for i in range(len(items))}

items = list(cell_type_dict.values())
colours = so.uns['cmaps']['cell_type_id'].colors
cell_type_colours = {items[i]: colours[i] for i in range(len(items))}

items = list(cell_cluster_dict.values())
colours = so.uns['cmaps']['cell_cluster_id'].colors
cell_cluster_colours = {items[i]: colours[i] for i in range(len(items))}

Alternative method - use only if you dont want to use the colors set above

In [None]:
metacluster = ad.obs.hierarchy.unique()
cmap = cc.glasbey_warm[:len(metacluster)]
cmap = ListedColormap(cmap)

#hierarchy_id colormap
so.uns['cmaps']['hierarchy_id'] = cmap

#hierarchy colormap
cmap = ['white', 'darkgreen', 'gold', 'steelblue', 'darkred']
cmap_labels = {0: 'background', 1: 'Myeloid',  2: 'Lymphoid', 3: 'Stromal', 4: 'Vascular'}
cmap = ListedColormap(cmap)

so.uns['cmaps'].update({'hierarchy': cmap})
so.uns['cmap_labels'].update({'hierarchy': cmap_labels})

df = ad.obs[['hierarchy', 'hierarchy_id']]
df[~df.duplicated()]
so.uns['cmap_labels']['hierarchy_id'] = df.set_index('hierarchy_id').hierarchy.to_dict()

In [None]:
# set up colormaps for cell_type_id - ATHENA's tutorial color coding
#cmap_paper = np.array([[255, 255, 255], [10, 141, 66], [62, 181, 86], [203, 224, 87],  # 0 1 2 3
                       #[84, 108, 47], [180, 212, 50], [23, 101, 54],  # 4 5 6
                       #[248, 232, 13], [1, 247, 252], [190, 252, 252],  # 7 8 9
                       #[150, 210, 225], [151, 254, 255], [0, 255, 250],  # 10 11 12
                       #[154, 244, 253], [19, 76, 144], [0, 2, 251],  # 13 14 15
                      #[147, 213, 198], [67, 140, 114], [238, 70, 71],  # 16 17 18
                       #[80, 45, 143], [135, 76, 158]]) #19, 20

In [None]:
# set up colormaps for cell_type_id - following my own color coding from previous graphs
cmap_paper = np.array([[240,185,141], [17,198,56], [141,213,147], [133,149,225],  #0 #1 #2 #3
                       [181,187,227], [211,63,106], [244,204,204],  #4 #5 #6
                       [255,217,102], [184,108,185], [73,0,239],  #7 #8 #9
                       [89,89,89], [240,185,141], [163,129,239],  #10 #11 #12
                       [156,222,214], [185,135,125], [0,112,192],  #13 #14 #15
                       [224,123,145], [74,111,227], [255,125,168], #16 #17 #18
                       [15,207,192], [214,188,192], [184,108,185],  #19 #20 #21
                       [247,31,15], [0,176,240],  #22 #23 #24
                       [250,128,0], [51,102,0], [165,63,2]]) #25 #26 #27  

In [None]:
so.uns['cmaps'].update({'cell_cluster_id': ListedColormap(cmap_paper / 255)})

In [None]:
# define labels for cell_cluster_id
#cell_cluster_id follows alphabetical order - see order in the NE plot for example
cmap_labels = {0: 'AT2 cell',
               1: 'Activated Endothelial cell',
               2: 'Alveolar Macrophage',
               3: 'Apoptotic Interstitial Macrophage',
               4: 'B cell',
               5: 'CD4 T cell',
               6: 'CD4 Treg cell',
               7: 'CD66bHigh Neutrophil',
               8: 'CD66bLow Neutrophil',
               9: 'CD8 T cell',
               10: 'Classical Monocyte',
               11: 'EM CD4 T cell',
               12: 'EM CD8 T cell',
               13: 'Endothelial cell',
               14: 'Epithelial cell',
               15: 'Fibroblast',
               16: 'GranzB+ Endothelial cell',
               17: 'Interstitial Macrophage', 
               18: 'Proliferative CD4 T cell',
               19: 'Proliferative Endothelial cell',
               20: 'Proliferative Epithelial cell',
               21: 'Proliferative Fibroblast',
               22: 'Proliferative Interstitial Macrophage',
               23: 'RBC',
               24: 'Smooth Muscle cell',
               25: 'Virus-infected AT2 cell',
               26: 'Virus-infected Alveolar Macrophage',
               27: 'Virus-infected Epithelial cell'}

In [None]:
so.uns['cmap_labels'].update({'cell_cluster_id': cmap_labels})

In [None]:
so.uns['cmap_labels']

In [None]:
so.obs

In [None]:
so.uns['cmaps'].update({'default': cm.plasma})

# Explore SpatialOmics object

In [None]:
print(so.spl.columns.values) #see all available sample annotations
so.spl.head(3) 

In [None]:
spl = 'MP41-ROI1' #for one specific sample
print(so.obs[spl].columns.values) #see all available sample annotations
so.obs[spl].head(3) 

In [None]:
print(so.masks[spl].keys())

# Graph construction

This will construct the different spatial connectivity graphs, these define which cells are considered neighbours for later analyses.

Use your extracted cell locations to build a {radius, knn} graph (it does not use the cellmasks)

You need to define the 'r' parameter to a specific ROI, otherwise the code runs only for the last ROI in the SO object. Or the original code does not work.

Setup

In [None]:
#This select which samples to use - all by default
samples = so.spl.index 

# Import the default graph settings
from athena.graph_builder.constants import GRAPH_BUILDER_DEFAULT_PARAMS

Extract centroids from masks

We can define where cells are in two ways 
1- either from the centre of the mask or 
2- from the cell locations extracted from the cell profiler pipeline.
In theory, they should be almost identical, and in previous tests they are.

In [None]:
print(so.masks[r].keys())

In [None]:
#Loading bar functionality for long tasks
from tqdm import tqdm

# Extract cell centroids across all samples
for r in tqdm(so.obs.keys()):
    sh.pp.extract_centroids(so, r, mask_key='cellmasks')

In [None]:
# print results
print(so.obs[r])

KNN graph

k nearest neighbors - change 'number_neighbours' to define number of nearest neighbors to identify per cell. Default is 6
The graph key will be knn_[number_neighbours]

In [None]:
number_neighbours = 4

#kNN graph, without cellmasks, using predetermined X, Y locs
config = GRAPH_BUILDER_DEFAULT_PARAMS['knn']
config['builder_params']['n_neighbors'] = number_neighbours # set parameter k
for spl in tqdm(samples):
             sh.graph.build_graph(so, spl,
                     key_added='knn_'+str(number_neighbours),
                     builder_type='knn', mask_key=None, coordinate_keys=('X_loc', 'Y_loc'), config=config) 

Radius graph

Defines a cells neighbours using a radius (in um) from the cell. Default is 20
The graph key will be radius_[radius]

In [None]:
radius = 20

# radius graph without cellmasks, using my predetermined X, Y locs
config = GRAPH_BUILDER_DEFAULT_PARAMS['radius']
config['builder_params']['radius'] = radius
for spl in tqdm(samples):
             sh.graph.build_graph(so, spl,
                     key_added='radius_'+str(radius),
                     builder_type='radius', mask_key=None, coordinate_keys=('X_loc', 'Y_loc'), config=config)

In [None]:
so

Contact graph (with multiprocessing)

Defines neighbours using direct cell-cell contact using cell mask

In [None]:
#For multiprocessing
from functools import partial
from multiprocessing import Pool

# Use parallel processing?
parallel = True

# This select which samples to use - all by default
samples = so.spl.index.tolist() #so.spl.index 

# Number of cores to use for parallel processing
num_cores = 4

# Run the paralell processing
if parallel==True:
    
    contact_graph_partial = partial(mikeimc_v2.contact_graph, so)
    
    with Pool(processes = num_cores) as pool:      
        data = pool.map(contact_graph_partial, tqdm(samples))
        print('Complete')
    
    for spl,grph in zip(samples,data):

        if spl in so.G:
            so.G[spl].update({'contact': grph})
        else:
            so.G[spl] = {'contact':grph}
    
    print('Added back into original SO!')
    
elif parallel==False:
    
    for spl in samples:
        try:
            sh.graph.build_graph(so, spl, builder_type='contact', mask_key='cellmasks')
        except KeyboardInterrupt:
            pass
        except BaseException as err:
            print("An exception occurred in calculating contact graph for " + spl)
            print(f"Unexpected {err=}, {type(err)=}") 

# Save spatial mapping of pops without cell masks

In [None]:
so

In [None]:
r = '1507_2_C'
fig, axs = plt.subplots(1, 2, figsize=(15, 8), dpi=300)
for ax, graph_key in zip(axs.flat, ['knn_4', 'radius_20']):
    for obs in ['cell_cluster_id']:
        for bgd in ['black','white']:    
                sh.pl.spatial(so, r, obs, node_size=10, edges=True, graph_key=graph_key, coordinate_keys=['X_loc', 'Y_loc'], ax=ax)
                ax.set_title(graph_key)
fig.tight_layout()
#fig.show()
plt.savefig(str(r)+'.png')

In [None]:
r = '1507_2_A'
fig, axs = plt.subplots(1, 2, figsize=(15, 8), dpi=300)
for ax, graph_key in zip(axs.flat, ['knn_4', 'radius_20']):
    for obs in ['cell_cluster_id']:
        for bgd in ['black','white']:    
                sh.pl.spatial(so, r, obs, node_size=10, edges=True, graph_key=graph_key, coordinate_keys=['X_loc', 'Y_loc'], ax=ax)
                ax.set_title(graph_key)
fig.tight_layout()
#fig.show()
plt.savefig(str(r)+'.png')

In [None]:
#it did not work
#fig, axs = plt.subplots(1, 2, figsize=(15, 8), dpi=300)
#for ax, graph_key in zip(axs.flat, ['knn_4', 'radius_20']):
 #   for obs in ['cell_cluster_id']:
  #      for bgd in ['black','white']:  
   #         for i in tqdm(samples):
    #            sh.pl.spatial(so, i, obs, node_size=5, edges=True, graph_key=graph_key, coordinate_keys=['X_loc', 'Y_loc'], ax=ax)
                ax.set_title(graph_key)
#fig.tight_layout()
#fig.show()

# Plot protein intensity or single-cell annotations

In [None]:
r = 'MP41-ROI1'
fig, axs = plt.subplots(2, 3, figsize=(15, 6), dpi=300)
sh.pl.spatial(so, r, None, ax=ax)
sh.pl.spatial(so, r, 'cell_type_id', ax=ax)
sh.pl.spatial(so, r, 'hierarchy_id', ax=ax)
sh.pl.spatial(so, r, 'cell_cluster_id', ax=ax)
sh.pl.spatial(so, r, 'IgD', ax=ax)
sh.pl.spatial(so, r, 'CD45RA', ax=ax)


In [None]:
r = 'MP41-ROI1'
so.uns['cmaps'].update({'default': cm.plasma})
fig, axs = plt.subplots(1, 3, figsize=(30, 3), dpi=100)
sh.pl.spatial(so, r, 'CD45RA', mode='mask', ax=axs.flat[0], background_color='black')
sh.pl.spatial(so, r, 'CD19', mode='mask', ax=axs.flat[1], background_color='black')
sh.pl.spatial(so, r, 'IgD', mode='mask', ax=axs.flat[2], background_color='black')

# Heterogeneity quantification

# Sample-level scores - Information-theoretic scores

Sample-level scores estimate a single heterogeneity score for the whole tumor, saved in so.spl. Although they ignore the spatial topology and cell-cell interactions, they describe the heterogeneity attrbuted to the number of cell types present and their frequencies. Below we compute some of the included metrics across all samples.

Richness: The most basic and intuitive heterogeneity score is richness S, which simply counts the number of observed cell subpopulations within a tumor sample, independently of their relative abundance. This score is equivalent to tumor clonality, commonly employed in genetic heterogeneity studies [3] to quantify the number of distinct clones in a tumor.

Shannon index: Shannon index H takes into consideration not only the number of cell subpopulations S present, but also their relative proportions. In other words, how likely we are to guess the phenotype of a randomly observed cell from a tumor sample. the more even the cell proportions, the more uncertain our prediction. Shannon entropy increases with richness and evenness, and reaches its maximal value when the cell subpopulation distribution is uniform. Higher Shannon index the higher heterogeneity.

Simpson index: Similarly, the Simpson index considers both richness and relative abundance. The Simpson index describes the probability of sampling the same phenotype twice from the tumor. In contrast to the Shannon index, the Simpson index decreases with increasing diversity. Furthermore, the Simpson index is sensitive to the abundance of the more dominant phenotype and can be regarded as a measure of dominance concentration.

Rao’s quadratic entropy: The indices and quantiﬁcation methods discussed so far consider both richness and relative abundance of phenotypes, but ignore the similarity of these phenotypes, i.e., how close they are in phenotypic space. Rao’s quadratic entropy [22] accounts for that.

Calculate richness, shannon and quadratic entropy (with multiprocessing)

In [None]:
# compute metrics at a sample level for all samples - this will take some time
for s in tqdm(samples):
    sh.metrics.richness(so, s, 'cell_cluster_id', local=False, graph_key='knn_4')
    sh.metrics.shannon(so, s, 'cell_cluster_id', local=False, graph_key='knn_4')
    sh.metrics.quadratic_entropy(so, s, 'cell_cluster_id', local=False, graph_key='knn_4', metric='cosine')

# estimated values are saved in so.obs
so.obs[spl].columns

In [None]:
# compute metrics at sample level for all samples - this will take some time
for s in tqdm(samples):
    sh.metrics.richness(so, s, 'cell_cluster_id', local=False, graph_key='radius_20')
    sh.metrics.shannon(so, s, 'cell_cluster_id', local=False, graph_key='radius_20')
    sh.metrics.quadratic_entropy(so, s, 'cell_cluster_id', local=False, graph_key='radius_20', metric='cosine')

# estimated values are saved in so.obs
so.obs[spl].columns

In [None]:
# compute cell counts
so.spl['cell_count'] = [len(so.obs[s]) for s in so.obs.keys()]
so.spl['Myeloid_cell_count'] = [np.sum(so.obs[s].hierarchy == 'Myeloid') for s in so.obs.keys()]

In [None]:
so.spl

In [None]:
# estimated values are saved in so.spl    
so.spl[['cell_count', 'richness_cell_cluster_id', 'shannon_cell_cluster_id', 'quadratic_cell_cluster_id']]

In [None]:
df = so.spl[['cell_count', 'richness_cell_cluster_id', 'shannon_cell_cluster_id', 'quadratic_cell_cluster_id']]
compression_opts = dict(method='zip',
                         archive_name='COVID_Malawi_Heterogeneity_Index_Sample_level_scores.csv') 
df.to_csv('COVID_Malawi_Heterogeneity_Index_Sample_level_scores.csv.zip', index=True, compression=compression_opts)

In [None]:
df1 = pd.read_csv('COVID_Malawi_Heterogeneity_Index_Sample_level_scores.csv')

In [None]:
df1

In [None]:
df1.plot.bar(x='ROI', y='richness_cell_cluster_id', color = 'darkmagenta', stacked=False, figsize=(32, 8), rot=90).legend(bbox_to_anchor=(1, 1))
df1.plot.bar(x='ROI', y='shannon_cell_cluster_id', color = 'darkmagenta', stacked=False, figsize=(32, 8), rot=90).legend(bbox_to_anchor=(1, 1))
df1.plot.bar(x='ROI', y='quadratic_cell_cluster_id', color = 'darkmagenta', stacked=False, figsize=(32, 8), rot=90).legend(bbox_to_anchor=(1, 1))

#figsize=(16, 4),

# Cell-level scores

In their original deﬁnition, all entropic scores described above do not take the spatial component into account when calculated on a whole tumor level. For this reason, ATHENA implements two ﬂavors of these scores: a global ﬂavor, in which the metrics are computed at a whole sample level using only the phenotype distribution, and a local ﬂavor, in which the scores are computed at a single-cell level, using also the graph structure. Speciﬁcally, when computing local scores, ATHENA iterates over all cells, and for each cell, computes the local entropy within its neighborhood. In this way, highly diverse regions where cells from multiple diferent phenotypes coexist can be highlighted, and, instead of computing a single entropy value as for the global ﬂavor, a distribution of entropic values is returned.

Cell-level scores quantify heterogeneity in a spatial manner, accounting for local effects, and return a value per single cell, saved in so.obs. To apply these scores to the data we use again .metrics but this time with local=True. Since these scores heavily depend on the tumor topology, the graph type and occasionally additional parameters also need to be provided.

In [None]:
from tqdm import tqdm
all_samples = so.spl.index.values

In [None]:
# compute metrics at a cell level for all samples - this will take some time
for s in tqdm(all_samples):
    sh.metrics.richness(so, s, 'cell_cluster_id', local=True, graph_key='knn_4')
    sh.metrics.shannon(so, s, 'cell_cluster_id', local=True, graph_key='knn_4')
    sh.metrics.quadratic_entropy(so, s, 'cell_cluster_id', local=True, graph_key='knn_4', metric='cosine')

# estimated values are saved in so.obs
so.obs[spl].columns

In [None]:
# compute metrics at a cell level for all samples - this will take some time
for s in tqdm(all_samples):
    sh.metrics.richness(so, s, 'cell_cluster_id', local=True, graph_key='radius_20')
    sh.metrics.shannon(so, s, 'cell_cluster_id', local=True, graph_key='radius_20')
    sh.metrics.quadratic_entropy(so, s, 'cell_cluster_id', local=True, graph_key='radius_20', metric='cosine')

# estimated values are saved in so.obs
so.obs[spl].columns

In [None]:
fig = plt.figure(figsize=(60, 30))
for i,s in enumerate(all_samples):
    plt.subplot(4, 8, i+1)
    g=sb.histplot(so.obs[s]['quadratic_cell_cluster_id_radius_20'], stat='probability', color = 'darkmagenta')
    g.set_title(s + 'median quadratic entropy = ' + str(round(so.obs[s]['quadratic_cell_cluster_id_radius_20'].median(),3)))
    plt.ylim([0,0.5])
    plt.xlim([0,1])

In [None]:
# if needed, we can retrieve selected single-cell score values 
df2 = so.obs[spl].loc[:,['richness_cell_cluster_id_knn_4', 'shannon_cell_cluster_id_knn_4', 'quadratic_cell_cluster_id_knn_4', 
                  'richness_cell_cluster_id_radius_20', 'shannon_cell_cluster_id_radius_20', 'quadratic_cell_cluster_id_radius_20']].head(500)

In [None]:
compression_opts = dict(method='zip',
                         archive_name='COVID_Malawi_Heterogeneity_Index_Cell_level_scores.csv') 
df2.to_csv('COVID_Malawi_Heterogeneity_Index_Cell_level_scores.csv.zip', index=True, compression=compression_opts)

The results can be plotted again using the .pl.spatial submodule and passing the attribute we want to visualize. For example, let’s observe the spatial heterogeneity of a random sample using three different metrics in the cell below. While local richness highlights tumor neighborhoods with multiple cell phenotypes present, local Shannon also takes into consideration the proportions of these phenotypes. Finally, local quadratic entropy additionally takes into consideration the similarity between these phenotypes using the single-cell proteomic data stored in .X. Notice how, in the last subplot, only regions where cell phenotypes with very different profiles (e.g., tumor - immune - stromal) are highlighted.

In [None]:
so.uns['cmaps'].update({'default': cm.plasma})

r = '1507_2_C'
fig, axs = plt.subplots(1, 3, figsize=(25, 12), dpi=300)
for ax, obs in zip(axs.flat, ['richness_cell_cluster_id_knn_4', 'shannon_cell_cluster_id_knn_4', 'quadratic_cell_cluster_id_knn_4']): 
            sh.pl.spatial(so, r, obs, node_size=40, edges=True, graph_key='knn_4', coordinate_keys=['X_loc', 'Y_loc'], cbar_title=False, background_color='white', ax=ax)
            ax.set_title(obs)
fig.tight_layout()
#fig.show()
plt.savefig(str(r)+'_knn.png')

In [None]:
so.uns['cmaps'].update({'default': cm.plasma})

r = '1507_2_C'
fig, axs = plt.subplots(1, 3, figsize=(25, 12), dpi=300)
for ax, obs in zip(axs.flat, ['richness_cell_cluster_id_radius_20', 'shannon_cell_cluster_id_radius_20', 'quadratic_cell_cluster_id_radius_20']): 
            sh.pl.spatial(so, r, obs, node_size=40, edges=True, graph_key='radius_20', coordinate_keys=['X_loc', 'Y_loc'], cbar_title=False, background_color='white', ax=ax)
            ax.set_title(obs)
fig.tight_layout()
#fig.show()
plt.savefig(str(r)+'_radius.png')

# Cell type interactions

mode: it can be classical, histocat or proportion

Classic: In the classic ﬂavor of the neighborhood analysis score, the average number of interactions between cells of the same phenotype is computed.

HistoCAT: In the histoCAT ﬂavor, the global average of interaction score is only computed across cells that actually show this interaction.

Proportional: Finally, the proportional ﬂavor uses interaction frequencies instead of counts, i.e., divides the counts of pairwise interactions by all interactions a given cell has. We propose this approach as an alternative that normalizes the scores with respect to varying cell density. In contrast to the classic or the histoCAT ﬂavor, the proportional ﬂavor is not inﬂuenced by the number of cells in a sample and the score is bounded in the range of [0,1].

graph_key can be knn, radius or contact

prediction_type can be observation, pvalue or diff In addition to a p-value, we propose to compute the di↵erence between the observed proportion of interactions obs ij between cell type i ! j and the randomised proportion of interactions rand ij . This difference can be asymmetrical (i ! j = 6 j ! i), is bounded in the range of [-1,1], and might be more suited as input for certain machine learning models.

Calculate all potential interaction metrics (with multiprocessing)

In [None]:
#calculating based on radius_20 graph
import logging
logging.getLogger().setLevel(logging.ERROR)  # set logger to logging.INFO if you want to see more progress information

# this will take some time...
for s in tqdm(all_samples):
    sh.neigh.interactions(so, s, 'cell_cluster_id', mode='proportion', prediction_type='diff', graph_key='radius_20')

In [None]:
#calculating based on radius_20 graph
import logging
logging.getLogger().setLevel(logging.ERROR)  # set logger to logging.INFO if you want to see more progress information

# this will take some time...
for s in tqdm(all_samples):
    sh.neigh.interactions(so, s, 'cell_type_id', mode='proportion', prediction_type='diff', graph_key='radius_20')

In [None]:
#calculating based on radius_20 graph
import logging
logging.getLogger().setLevel(logging.ERROR)  # set logger to logging.INFO if you want to see more progress information

# this will take some time...
for s in tqdm(all_samples):
    sh.neigh.interactions(so, s, 'hierarchy_id', mode='proportion', prediction_type='diff', graph_key='radius_20')

In [None]:
#calculating based on knn_4 graph
import logging
logging.getLogger().setLevel(logging.ERROR)  # set logger to logging.INFO if you want to see more progress information

# this will take some time...
for s in tqdm(all_samples):
    sh.neigh.interactions(so, s, 'cell_cluster_id', mode='proportion', prediction_type='diff', graph_key='knn_4')

In [None]:
#calculating based on knn_4 graph
import logging
logging.getLogger().setLevel(logging.ERROR)  # set logger to logging.INFO if you want to see more progress information

# this will take some time...
for s in tqdm(all_samples):
    sh.neigh.interactions(so, s, 'cell_type_id', mode='proportion', prediction_type='diff', graph_key='knn_4')

In [None]:
#calculating based on knn_4 graph
import logging
logging.getLogger().setLevel(logging.ERROR)  # set logger to logging.INFO if you want to see more progress information

# this will take some time...
for s in tqdm(all_samples):
    sh.neigh.interactions(so, s, 'hierarchy_id', mode='proportion', prediction_type='diff', graph_key='knn_4')

# Interaction graphs

Interaction heatmap example

In [None]:
so.uns

In [None]:
so.spl.index

In [None]:
#Samples to combine for the interaction summary
CM2 = ['MP42-ROI1','MP42-ROI2','MP42-ROI3','MP61-ROI1','MP61-ROI2','MP61-ROI3','MP69-ROI1','MP69-ROI2',
       'MP69-ROI3','PM78-ROI1','PM78-ROI2','PM78-ROI3','PM83-ROI1','PM83-ROI2','PM83-ROI3',
       'PM102-ROI1','PM102-ROI2','PM102-ROI3'] # the SAMPLE_NAMES must be in so.masks.keys()


Non_CM = ['MP41-ROI1','MP41-ROI2','MP41-ROI3','MP65-ROI1','MP65-ROI2','MP65-ROI3',
        'PM88-ROI1','PM88-ROI2','PM88-ROI3']

CM2 = so.spl.index # this way you compute the graphs for all samples in your spatialOmics instance

Non_CM = so.spl.index


In [None]:
CM2

In [None]:
Non_CM

In [None]:
mikeimc_v2.interactions_summary(so,
                                CM2,
                                'cell_type_id_proportion_diff_radius_20',
                                population_dictionary=so.uns['cmap_labels']['cell_type_id'], aggregate_function='mean',
                                calc_ttest_p_value=None, cmap='bwr')

Interaction bar graphs - Cell type broad

In [None]:
so.uns['cmap_labels']['cell_cluster_id']

In [None]:
samples = list(so.spl.index)

all_pops=['Activated B cell', 'CD4 T cell', 'CD68+CD163+VISTA+ Macrophage',
         'CD68+CD163+VISTA- Macrophage', 'CD8 T cell', 'Classical Monocyte','Dendritic cell', 'Effector CD4 T cell', 'Effector CD8 T cell',
         'Endothelial cell', 'Iba1+VISTA- Macrophage', 'Intermediate Monocyte', 'Memory CD4 T cell', 'NK cell', 'Naive B cell',
         'Neutrophil', 'Non-classical Monocyte', 'RBC', 'Smooth Muscle cell']

remap_dict={'Activated B cell':'Lymphoid',
            'CD4 T cell':'Lymphoid',
            'CD68+CD163+VISTA+ Macrophage':'Myeloid',
            'CD68+CD163+VISTA- Macrophage':'Myeloid',
            'CD8 T cell':'Lymphoid',
            'Classical Monocyte':'Myeloid',
            'Dendritic cell':'Myeloid',
           'Effector CD4 T cell':'Lymphoid',
            'Effector CD8 T cell': 'Lymphoid',
             'Endothelial cell': 'Vascular',
            'Iba1+VISTA- Macrophage': 'Myeloid',
            'Intermediate Monocyte': 'Myeloid',
            'Memory CD4 T cell': 'Lymphoid',
            'NK cell': 'Lymphoid',
            'Naive B cell': 'Lymphoid',
            'Neutrophil': 'Myeloid',
            'Non-classical Monocyte':'Myeloid',
            'RBC': 'Vascular',
            'Smooth Muscle cell': 'Vascular'}    

titles = ['KNN', 'Radius']

graphs = ['cell_type_id_proportion_diff_knn_4',          
          'cell_type_id_proportion_diff_radius_20']


for b,k in zip(titles, graphs):    
    
    for t in ['diff','score']:    
        variable=t

        summary = mikeimc_v2.interactions_table(so,
                            samples_list=samples,
                            interaction_reference=k,
                            var=variable,
                            population_dictionary=so.uns['cmap_labels']['cell_type_id'],
                            mode='individual',
                            remap=remap_dict,
                            remap_agg='sum')



        fig, axs = plt.subplots(1, 5, figsize=(8, 3), dpi=300)
        fig.suptitle(b, fontsize=16)
        fig.tight_layout()
        plt.subplots_adjust(wspace = 1)
        
        for count,i in enumerate(['Vascular','Lymphocytes','Myeloid']):

            data = summary.loc[summary.target_label==i,:]
            data = data.loc[np.where(data.source_label.isin(all_pops),True,False),:]
            try:
                data.source_label.cat.remove_unused_categories(inplace=True)
                data.target_label.cat.remove_unused_categories(inplace=True)

            except:
                'None'


            sb.barplot(data = data, 
                       x = "source_label", 
                       y = variable, 
                       ci=68,
                       ax=axs[count],
                       palette=cell_type_colours,
                      order=data.groupby(['source_label','target_label']).mean().sort_values(variable).reset_index()['source_label']
                      )
            #axs[count].set_yscale("log")
            axs[count].set_title(i)
            axs[count].tick_params(axis='x', labelrotation = 90, labelsize=8)
            axs[count].tick_params(axis='y', labelsize=8)
            plt.xticks(rotation=90)
            axs[count].set_ylabel(t, fontsize=12)
        
fig.savefig(('figures/interaction_bargraphs/'+t+'_'+k+'.svg'), bbox_inches='tight')



UMAP clustering of interactions - "cell_type_broad_id_proportion_diff_contact"

In [None]:
samples = list(so.spl.index)
mikeimc_v2.interactions_summary_UMAP(so,samples,'cell_type_id_proportion_diff_radius_20',var='score',
                                     category_columns=['ROI','Patient','Group'],annotate='ROI', dim_red='UMAP')

In [None]:
#Let’s look at the results for each ROI
from matplotlib.colors import ListedColormap, Normalize
norm = Normalize(-.1, .1)
fig = plt.subplots(figsize=(15, 6), dpi=100)
for s in tqdm(all_samples):
    sh.pl.interactions(so, s, 'cell_cluster_id', mode='proportion', prediction_type='diff', graph_key='radius_20',
                   norm=norm)
#fig.tight_layout()
#fig.show()
plt.savefig(str(r)+'_interactions_radius.png')

In [None]:
so.uns['cmap_labels']

In [None]:
# Update the colormap for cell_type_id - following my own color coding from previous graphs
cmap_paper2 = np.array([[250,128,0], [15,207,192],  [165,63,2], [240,185,141],  #0, 1, 2, 3
                       [141,213,147], [255,217,102], [163,129,239], [247,31,15],[0,0,0]]) #4 5 6 7 8
so.uns['cmaps'].update({'cell_type_id': ListedColormap(cmap_paper2 / 255)})

In [None]:
# update colormap to show only immune and tumor cells 
cmap = ['lightgrey', 'blue', 'lightgrey', 'lightgrey', 
        'lightgrey', 'lightgrey', 'lightgrey', 'orange', 'lightgrey']

cmap2 = ListedColormap(cmap)
so.uns['cmaps'].update({'cell_type_id': cmap2})

In [None]:
#use the interaction score of specific cell clusters to sort our samples by increasing attraction:
mixing_score=[]
for s in all_samples:
    interaction_res = so.uns[s]['interactions']['cell_type_id_proportion_diff_radius_20'] # get interaction results
    diff = interaction_res.loc[1].loc[7]['diff'] # interactions between source id 1 (immune), target id 4 (tumor)
    mixing_score.append(diff)

ind=np.argsort(mixing_score)

In [None]:
#order the plots of ROI accoding the force of cellular interactions between the 2 specific clusters
fig, axs = plt.subplots(6, 4, figsize=(25, 12), dpi=300)
for i,s in enumerate(ind):
    sh.pl.spatial(so, all_samples[s], 'cell_type_id', node_size=0.5, edges=True, graph_key='radius_20', coordinate_keys=['X_loc', 'Y_loc'], ax=axs.flat[i])

In [None]:
# update colormap to show only immune and tumor cells 
cmap = ['lightgrey', 'lightgrey', 'blue', 'lightgrey', 
        'lightgrey', 'lightgrey', 'lightgrey', 'orange', 'lightgrey']

cmap2 = ListedColormap(cmap)
so.uns['cmaps'].update({'cell_type_id': cmap2})


#use the interaction score of specific cell clusters to sort our samples by increasing attraction:
mixing_score=[]
for s in all_samples:
    interaction_res = so.uns[s]['interactions']['cell_type_id_proportion_diff_radius_20'] # get interaction results
    diff = interaction_res.loc[2].loc[7]['diff'] # interactions between source id 1 (immune), target id 4 (tumor)
    mixing_score.append(diff)

ind=np.argsort(mixing_score)

#order the plots of ROI accoding the force of cellular interactions between the 2 specific clusters
fig, axs = plt.subplots(6, 4, figsize=(25, 12), dpi=300)
for i,s in enumerate(ind):
    sh.pl.spatial(so, all_samples[s], 'cell_type_id', node_size=0.5, edges=True, graph_key='radius_20', coordinate_keys=['X_loc', 'Y_loc'], ax=axs.flat[i])

In [None]:
# update colormap to show only immune and tumor cells 
cmap = ['lightgrey', 'lightgrey', 'lightgrey', 'blue', 
        'lightgrey', 'lightgrey', 'lightgrey', 'orange', 'lightgrey']

cmap2 = ListedColormap(cmap)
so.uns['cmaps'].update({'cell_type_id': cmap2})

#use the interaction score of specific cell clusters to sort our samples by increasing attraction:
mixing_score=[]
for s in all_samples:
    interaction_res = so.uns[s]['interactions']['cell_type_id_proportion_diff_radius_20'] # get interaction results
    diff = interaction_res.loc[3].loc[7]['diff'] # interactions between source id 1 (immune), target id 4 (tumor)
    mixing_score.append(diff)

ind=np.argsort(mixing_score)

#order the plots of ROI accoding the force of cellular interactions between the 2 specific clusters
fig, axs = plt.subplots(6, 4, figsize=(25, 12), dpi=300)
for i,s in enumerate(ind):
    sh.pl.spatial(so, all_samples[s], 'cell_type_id', node_size=0.5, edges=True, graph_key='radius_20', coordinate_keys=['X_loc', 'Y_loc'], ax=axs.flat[i])

In [None]:
# update colormap to show only immune and tumor cells 
cmap = ['lightgrey', 'lightgrey', 'lightgrey', 'lightgrey', 
        'lightgrey', 'blue', 'lightgrey', 'orange', 'lightgrey']

cmap2 = ListedColormap(cmap)
so.uns['cmaps'].update({'cell_type_id': cmap2})

#use the interaction score of specific cell clusters to sort our samples by increasing attraction:
mixing_score=[]
for s in all_samples:
    interaction_res = so.uns[s]['interactions']['cell_type_id_proportion_diff_radius_20'] # get interaction results
    diff = interaction_res.loc[5].loc[7]['diff'] # interactions between source id 1 (immune), target id 4 (tumor)
    mixing_score.append(diff)

ind=np.argsort(mixing_score)

#order the plots of ROI accoding the force of cellular interactions between the 2 specific clusters
fig, axs = plt.subplots(6, 4, figsize=(25, 12), dpi=300)
for i,s in enumerate(ind):
    sh.pl.spatial(so, all_samples[s], 'cell_type_id', node_size=0.5, edges=True, graph_key='radius_20', coordinate_keys=['X_loc', 'Y_loc'], ax=axs.flat[i])

In [None]:
# update colormap to show only immune and tumor cells 
cmap = ['lightgrey', 'lightgrey', 'lightgrey', 'lightgrey', 
        'lightgrey', 'lightgrey', 'blue', 'orange', 'lightgrey']

cmap2 = ListedColormap(cmap)
so.uns['cmaps'].update({'cell_type_id': cmap2})

#use the interaction score of specific cell clusters to sort our samples by increasing attraction:
mixing_score=[]
for s in all_samples:
    interaction_res = so.uns[s]['interactions']['cell_type_id_proportion_diff_radius_20'] # get interaction results
    diff = interaction_res.loc[6].loc[7]['diff'] # interactions between source id 1 (immune), target id 4 (tumor)
    mixing_score.append(diff)

ind=np.argsort(mixing_score)

#order the plots of ROI accoding the force of cellular interactions between the 2 specific clusters
fig, axs = plt.subplots(6, 4, figsize=(25, 12), dpi=300)
for i,s in enumerate(ind):
    sh.pl.spatial(so, all_samples[s], 'cell_type_id', node_size=0.5, edges=True, graph_key='radius_20', coordinate_keys=['X_loc', 'Y_loc'], ax=axs.flat[i])

In [None]:
# update colormap to show only immune and tumor cells 
cmap = ['lightgrey', 'lightgrey', 'lightgrey', 'lightgrey', 
        'lightgrey', 'lightgrey', 'lightgrey' , 'orange', 'blue']

cmap2 = ListedColormap(cmap)
so.uns['cmaps'].update({'cell_type_id': cmap2})

#use the interaction score of specific cell clusters to sort our samples by increasing attraction:
mixing_score=[]
for s in all_samples:
    interaction_res = so.uns[s]['interactions']['cell_type_id_proportion_diff_radius_20'] # get interaction results
    diff = interaction_res.loc[7].loc[8]['diff'] # interactions between source id 1 (immune), target id 4 (tumor)
    mixing_score.append(diff)

ind=np.argsort(mixing_score)

#order the plots of ROI accoding the force of cellular interactions between the 2 specific clusters
fig, axs = plt.subplots(6, 4, figsize=(25, 12), dpi=300)
for i,s in enumerate(ind):
    sh.pl.spatial(so, all_samples[s], 'cell_type_id', node_size=0.5, edges=True, graph_key='radius_20', coordinate_keys=['X_loc', 'Y_loc'], ax=axs.flat[i])

In [None]:
# update colormap to show only immune and tumor cells 
cmap = ['orange', 'blue', 'lightgrey', 'lightgrey', 
        'lightgrey', 'lightgrey', 'lightgrey' , 'lightgrey', 'lightgrey']

cmap2 = ListedColormap(cmap)
so.uns['cmaps'].update({'cell_type_id': cmap2})

#use the interaction score of specific cell clusters to sort our samples by increasing attraction:
mixing_score=[]
for s in all_samples:
    interaction_res = so.uns[s]['interactions']['cell_type_id_proportion_diff_radius_20'] # get interaction results
    diff = interaction_res.loc[1].loc[0]['diff'] # interactions between source id 1 (immune), target id 4 (tumor)
    mixing_score.append(diff)

ind=np.argsort(mixing_score)

#order the plots of ROI accoding the force of cellular interactions between the 2 specific clusters
fig, axs = plt.subplots(6, 4, figsize=(25, 12), dpi=300)
for i,s in enumerate(ind):
    sh.pl.spatial(so, all_samples[s], 'cell_type_id', node_size=0.5, edges=True, graph_key='radius_20', coordinate_keys=['X_loc', 'Y_loc'], ax=axs.flat[i])

In [None]:
# update colormap to show only immune and tumor cells 
cmap = ['orange', 'lightgrey', 'lightgrey', 'blue', 
        'lightgrey', 'lightgrey', 'lightgrey' , 'lightgrey', 'lightgrey']

cmap2 = ListedColormap(cmap)
so.uns['cmaps'].update({'cell_type_id': cmap2})

#use the interaction score of specific cell clusters to sort our samples by increasing attraction:
mixing_score=[]
for s in all_samples:
    interaction_res = so.uns[s]['interactions']['cell_type_id_proportion_diff_radius_20'] # get interaction results
    diff = interaction_res.loc[3].loc[0]['diff'] # interactions between source id 1 (immune), target id 4 (tumor)
    mixing_score.append(diff)

ind=np.argsort(mixing_score)

#order the plots of ROI accoding the force of cellular interactions between the 2 specific clusters
fig, axs = plt.subplots(6, 4, figsize=(25, 12), dpi=300)
for i,s in enumerate(ind):
    sh.pl.spatial(so, all_samples[s], 'cell_type_id', node_size=0.5, edges=True, graph_key='radius_20', coordinate_keys=['X_loc', 'Y_loc'], ax=axs.flat[i])

# Cell infiltration score - sample level

More sophisticated heterogeneity scores additionally consider cell-cell interactions by exploiting the cellcell graph, where nodes encode cells, edges encode interactions, and each node is associated with a label that encodes the cell’s phenotype.

The infiltration score included in the .neigh submodule quantifies the degree of tumor-immune mixing (as defined in Keren, L. et al. - paper). It quantiﬁes the degree to which a certain cell phenotype has penetrated among cells of another type. As a consequence, the implementation of the score does not explicitly limit its application to immune-to-tumor inﬁltration, but is very ﬂexible and allows the user to deﬁne any pairwise interaction, e.g., a speciﬁc immune subtype to the whole tumor, or even non-immune types of inﬁltration, should this be of interest.
ATHENA implements two ﬂavors of inﬁltration, a global one that returns an estimate at the wholesample level, and a local one, where for each cell i the inﬁltration is computed on the sub-graph only containing all immediate neighbors of i. Let us compute it across all patients.
This score computes a ratio between the number of interactions.

Cell infiltration score at hierarchy level (sample analysis) - metaclusters

In [None]:
samples2 = ['MP42-ROI1','MP42-ROI2','MP42-ROI3','MP61-ROI1','MP61-ROI2','MP61-ROI3','MP69-ROI1','MP69-ROI2', 'MP69-ROI3','PM78-ROI1','PM78-ROI2','PM78-ROI3','PM83-ROI1','PM83-ROI2','PM83-ROI3','PM102-ROI1','PM102-ROI2','PM102-ROI3','MP41-ROI1','MP41-ROI2','MP41-ROI3','MP65-ROI1','MP65-ROI2','MP65-ROI3']

In [None]:
samples2

In [None]:
spl = so.spl.index[1]
obs = so.obs[spl]

# make sure that the column you want to use is categorical
assert isinstance(obs.hierarchy.dtype, pd.CategoricalDtype)

# Definition of infiltration
# infiltration = count_of_interaction1 / count_of_interaction2

# compute infiltration on the sample level
for s in tqdm(samples2):
    sh.neigh.infiltration(so, s, 'cell_type', 
                          interaction1=('Dendritic cells', 'B cells'),
                          interaction2=('B cells', 'B cells'),
                          graph_key='radius_20',
                          local=False)
so.spl.loc[samples2, 'infiltration']

In [None]:
so.uns['cmap_labels']['cell_type_id']

In [None]:
# sort samples by increasing infiltration
ind1=np.argsort(so.spl.loc[all_samples].infiltration.values)

# update colormap to show only immune and tumor cells 
cmap = ['white', 'white', 'white', 'blue', 
        'white', 'white', 'white', 'orange', 'white']

cmap2 = ListedColormap(cmap)
so.uns['cmaps'].update({'cell_type_id': cmap2})

#cmap_labels = {0: 'B cells', 1: 'CD 4 T cells',  2: 'CD 8 T cells', 3: 'Dendritics cells', 4: 'Endothelial cells', 
               #5: 'Macrophages', 6: 'NK cells', 7: 'RBCs', 8: 'Smooth Muscle Cells'}
#so.uns['cmap_labels'].update({'cell_type_id': cmap_labels})

fig, axs = plt.subplots(6, 4, figsize=(25, 12), dpi=300)
for i,s in enumerate(ind1):
    sh.pl.spatial(so, samples2[s], 'cell_type_id', node_size=0.5, edges=False, graph_key='radius_20', coordinate_keys=['X_loc', 'Y_loc'], ax=axs.flat[i])

Cell infiltration score at cell type level (sample analysis) - metaclusters

In [None]:
spl = so.spl.index[1]
obs = so.obs[spl]

# make sure that the column you want to use is categorical
assert isinstance(obs.cell_type.dtype, pd.CategoricalDtype)

# Definition of infiltration
# infiltration = count_of_interaction1 / count_of_interaction2

# compute infiltration on the sample level
for s in tqdm(all_samples):
    sh.neigh.infiltration(so, s, 'cell_type', 
                          interaction1=('Fibroblast', 'Epithelial'),
                          interaction2=('Epithelial', 'Epithelial'),
                          graph_key='radius_1',
                          local=False)
so.spl.loc[all_samples, 'infiltration']

In [None]:
# sort samples by increasing infiltration
ind1=np.argsort(so.spl.loc[samples2].infiltration.values)

# update colormap to show only immune and tumor cells
cmap2 = ['white', 'lightgrey', 'darkgreen', 'lightgrey', 'darkred']
#cmap_labels = {0: 'background', 1: 'Myeloid',  2: 'Lymphoid', 3: 'Vascular', 4: 'Stromal'}
cmap = ListedColormap(cmap)
so.uns['cmaps'].update({'cell_type_id': cmap2})
#so.uns['cmap_labels'].update({'hierarchy_id': cmap_labels})

fig, axs = plt.subplots(6, 5, figsize=(28, 14), dpi=300)
for i,s in enumerate(ind1):
    sh.pl.spatial(so, all_samples[s], 'hierarchy_id', mode='mask', ax=axs.flat[i])
    d = so.spl.loc[all_samples[s]]

# Cell infiltration score - single-cell level

In [None]:
# compute infiltration on a cell-level
spl = so.spl.index[1]
obs = so.obs[spl]

assert isinstance(obs.hierarchy.dtype, pd.CategoricalDtype)

for s in tqdm(samples2):
    sh.neigh.infiltration(so, s, 'cell_type', 
                          interaction1=('CD4 T cells', 'RBCs'),
                          interaction2=('RBCs', 'RBCs'),
                          graph_key='radius_20',
                          local=True)
    
so.obs[spl].infiltration
so.obs[spl].infiltration.isna().mean()

In [None]:
#it does not work
spl = 'C07_ROI4'
fig, axs = plt.subplots(1,3, figsize=(16,8))
sh.pl.infiltration(so, spl, 'hierarchy_id',step_size= 10, ax=axs[1])
sh.pl.infiltration(so, spl, 'hierarchy_id', step_size= 5, ax=axs[2])

# Modularity

Modularity captures the structure of a graph by quantifying the degree at which it can be divided into communities of the same label. In the context of tumor heterogeneity, modularity can be thought of as the degree of self-organization of the cells with the same phenotype into spatially distinct communities. A graph of high modularity represents a tumor where connections between the cells within the same community are more dense than connections between cells of different communities.

In [None]:
for spl in so.spl.index:
    sh.graph.build_graph(so, spl, builder_type='radius', mask_key='cellmasks')
    sh.metrics.modularity(so, spl, 'cell_cluster_id', graph_key='radius_20')
so.spl

# LISA Clustering

LISA clustering Paper: https://www.biorxiv.org/content/10.1101/2021.08.16.456469v1

Web app: https://shiny.maths.usyd.edu.au/lisaClust/

Exporting data for LISA analysis

Export a simplified version of the .obs dataframe with the information we need to do the LISA analysis

In [None]:
#Specify the cluster that has population information in the adata.obs

cluster_id='pheno_cluster'

adata_subset3.obs[['ROI',cluster_id,'X_loc','Y_loc']].to_csv('lisaclust_export.csv')

Upload the 'lisaclust_export.csv' to the web app above, and then download the results as 'LISAclust.csv' in this folder

Importing data from LISA analysis from web app

This will add in the LISA results to the adata.obs as a new column

In [None]:
mikeimc_v2.lisa_import(adata_subset3,LISA_file = 'LISA_annotated_data_4regions.csv',LISA_col_title = 'LISAclust')

Stacked bar graphs

In [None]:
for i in ['LISAclust']: 

    for x in ['cell_type','hierarchy']:
        
        tmp = pd.crosstab(adata_subset3.obs[i],adata_subset3.obs[x], normalize='index')
        tmp.plot.bar(stacked=True, figsize=(3, 3)).legend(bbox_to_anchor=(1.02, 1))

In [None]:
col_df = pd.read_csv('mikeimc_approach/colours/pheno_colours.csv')
colour_palette = col_df.set_index('pheno_cluster').to_dict()
colour_palette['colour']

In [None]:
for i in ['LISAclust']: 
    tmp = pd.crosstab(adata_subset3.obs[i],adata_subset3.obs['pheno_cluster'], normalize='index')
    tmp.plot.bar(stacked=True, color=colour_palette['colour'], figsize=(3, 3)).legend(bbox_to_anchor=(1.02, 1))

Heatmaps

In [None]:
LISA_clust_obs='LISAclust'
population_id='pheno_cluster'

vmax=0.6
vmin=0
figsize=(30,16)

fig, axs = plt.subplots(figsize=figsize)
tmp = pd.crosstab(adata_subset3.obs[population_id], adata_subset3.obs[LISA_clust_obs], normalize='index')
sb.heatmap(data=tmp, robust=True,linewidths=.5,square=True,cmap='viridis',vmax=vmax, vmin=vmin, ax=axs)
fig.savefig('LISAclust_heatmap.png', bbox_inches='tight')

# UMAP based on cluster proportion in an ROI

In [None]:
mikeimc_v2.cellabundance_UMAP(adata_subset3, 'ROI',population='LISAclust', colour_by='Group', normalize='index', dim_red='UMAP')

Edge vs core of lisa clusters

Normalising over ROI, so it becomes proportion of cells in each region

In [None]:
mikeimc_v2.grouped_graph(adata_subset3,
                         ROI_id='ROI',
                         group_by_obs='Group',
                         x_axis='LISAclust',
                         fig_size=(3,3),
                         log_scale=False,
                        display_tables=True,
                         crosstab_norm='index'
                        ) #If you change display_tables to True, will also do stats on the groups
plt.show()

In [None]:
Draw all voronoi and save them

In [None]:
from mikeimc_v2 import draw_voronoi_scatter

for i in adata_subset3.obs['ROI'].unique().tolist():

    spot = adata_subset3.obs[adata_subset3.obs['ROI']==i]

    _ = draw_voronoi_scatter(spot=spot,
                             c=[],
                             voronoi_palette = sc.pl.palettes.vega_20_scanpy,
                             X='X_loc',
                             Y='Y_loc',
                             voronoi_hue='LISAclust')
    #plt.savefig(str(i)+'_voronoi.svg')
    plt.savefig(str(i)+'_voronoi.png')

In [None]:
for i in adata_subset3.obs['ROI'].unique().tolist():
    sc.pl.spatial(adata_subset3[adata_subset3.obs["ROI"] == i], color = 'LISAclust', neighbors_key="spatial_neighbors", 
                  spot_size=10, edges=False, edges_width=1, edges_color='black', img_key=None, title=i,
                  add_outline=False,return_fig=True, save=str(i)+'.png')

Voronoi, with a specific cell population drawn ontop

In [None]:
#Define the ROI to look at
ROI = adata_subset3.obs[adata_subset3.obs['ROI']=='PM102-ROI1']

#Specify which cell populations will be overlayed
specific_cells=ROI[ROI.hierarchy=='Lymphoid'].copy()

#Make a new column with a number per category, will be used to colour cells that are added
specific_cells['colour']=specific_cells['pheno_cluster'].cat.codes

_ = draw_voronoi_scatter(spot=ROI,
                         c=specific_cells,
                         voronoi_palette = sc.pl.palettes.vega_20_scanpy,
                         X='X_loc',
                         Y='Y_loc',
                         voronoi_hue='LISAclust',
                         scatter_hue='colour',
                         scatter_palette=sc.pl.palettes.vega_20_scanpy,
                         scatter_kwargs={'s':10},
                         figsize=(5,5))