In [None]:
import os, sys, re, random, math, time, glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import MiniBatchKMeans
import seaborn as sns
from pprint import pprint
import yaml
import uuid
import time
import pickle

import anndata as ad
print(f"anndata=={ad.__version__}")
import scanpy as sc
import squidpy as sq
print(f"squidpy=={sq.__version__}")
import scimap as sm
print(f"scimap=={sm.__version__}")

from scipy.stats import kurtosis
from scipy.stats import skew
from scipy.stats import iqr

%matplotlib inline

In [None]:
allClassData = pd.read_csv("./allClassData.csv")

In [None]:
allClassData['ROI'].nunique()

# Aggregate Stats & Permutation Test (SciMap Tool)

##### [ obs connections /  random permutations of connections ]

In [None]:
## Create AnnData object
noBad = allClassData[~allClassData['Class'].str.contains("ARTIFACT: ", na=False)] # remove artifacts from allClassData
singleJustVars = noBad.filter(regex='(_Cell_)',axis=1) # Get only markers
singleJustVars = singleJustVars[singleJustVars.columns.drop(list(singleJustVars.filter(regex='(_Max|_Mean)')))] # Remove max and mean

adata = ad.AnnData(singleJustVars) # create AnnData object
adata.obs_names = [ str(e) for e in noBad['uuid'].to_list()] # Set observation names
adata.var_names = singleJustVars.columns.to_list() # Set variable names
adata.obsm={ "spatial": noBad[['Centroid_X_um','invertY']].to_numpy(), # Add coordinates
             "Nucleus_Area" : noBad[['Nucleus_Area_um2']].to_numpy(), # Add nucleus area
             "Cell_Area" : noBad[['Cell_Area_um2']].to_numpy() # Add cell area
           }  
adata.obs["cell_type"] = pd.Categorical( noBad['Class'] ) # Add Class annotations
adata.uns["Slide"] = noBad["Slide"] # Add Slide
adata.obs["imageid"] = pd.Categorical( noBad["ROI"] ) # Add ROI number
adata.obs["cohort_site"] = pd.Categorical( noBad["Origin"] )

adata

In [None]:
noBad["Origin"].value_counts()

In [None]:
# Get highest expressed markers
fig, ax1 = plt.subplots(figsize=(15,9))
sc.pl.highest_expr_genes(adata, n_top=25, ax=ax1, save='highest_expressed_markers.png')

In [None]:
sc.pp.calculate_qc_metrics(
    adata,
    percent_top=(5, 10, 15),
    var_type="antibodies",
    inplace=True
)

# log1p_total_counts = log(1+x) where x is total count of genes in a cell
# n_antibodies_by_counts = number of antibodies with at least 1 count in a cell, calculated across all cells
sns.jointplot("log1p_total_counts", "n_antibodies_by_counts", adata.obs, kind="hex", norm=mpl.colors.LogNorm()) 
plt.savefig('figures/n_antibodies_by_counts.png')

In [None]:
# Looking at one ROI
justRandROI = adata[adata.obs['imageid'] =='controllymph1']
print(justRandROI.shape)
fig, ax1 = plt.subplots(figsize=(14,9))
sc.pl.spatial(justRandROI, color="cell_type", spot_size=12, ax=ax1)

# Neighborhood enrichment
 -----
 
A similar analysis that can inform on the neighbor structure of the tissue is the *neighborhood enrichment test*.
You can compute such score with the following function: :func:`squidpy.gr.nhood_enrichment`.
In short, it's an enrichment score on spatial proximity of clusters: if spots belonging to two different clusters are often close to each other, then they will have a high score and can be defined as being *enriched*.
On the other hand, if they are far apart, the score will be low and they can be defined as *depleted*.

This score is based on a permutation-based test, and you can set the number of permutations with the ``n_perms`` argument (default is 1000).
Since the function works on a connectivity matrix, we need to compute that as well.
This can be done with :func:`squidpy.gr.spatial_neighbors`.

In [None]:
#  https://scimap.xyz/tutorials/5-Simple_Spatial_Analysis/

adata.obs["X"] = adata.obsm['spatial'][:,0] 
adata.obs["Y"] = adata.obsm['spatial'][:,1]

start_time = time.time()
# The function allows users to calculate the average shortest distance between phenotypes or clusters of interest (3D data supported).
adata = sm.tl.spatial_distance (adata, 
                               x_coordinate='X', y_coordinate='Y', 
                               z_coordinate=None, 
                               phenotype='cell_type', 
                               subset=None, 
                               imageid='imageid', 
                               label='spatial_distance')
print("--- %s minutes ---" % ((time.time() - start_time)/60))

In [None]:
adata.uns['spatial_distance']

In [None]:
# Visualize average shortest distances between cell types
plt.rcParams['figure.figsize'] = [5, 3]
sm.pl.spatial_distance(adata,phenotype='cell_type')

## Case -- Lymph Node

In [None]:
# The function allows users to computes how likely celltypes are found next to each another compared to random background.
caseLymph = adata[adata.obs['cohort_site'] =='Case-Lymphnode']

# Using the radius method to identify local neighbours compute P-values
start_time = time.time()
caseLymph = sm.tl.spatial_interaction (caseLymph, x_coordinate='X', y_coordinate='Y', 
                                  z_coordinate=None, imageid='imageid', phenotype='cell_type', method='radius', 
                                  radius=45, label='spatial_interaction_radius', permutation=2000)
print("--- %s minutes ---" % ((time.time() - start_time)/60))

In [None]:
caseLymph.uns['spatial_interaction_radius']

In [None]:
# view results
# spatial_interaction heatmap for a single image

# drawing the plot
sm.pl.spatial_interaction(caseLymph, summarize_plot=True, 
                          spatial_interaction='spatial_interaction_radius',
                          row_cluster=False, linewidths=0.75, linecolor='black', yticklabels=True)
plt.show()

## Control - Lymph Node

In [None]:
cntlLymph = adata[adata.obs['cohort_site'] =='Control-Lymphnode']

# Using the radius method to identify local neighbours compute P-values
start_time = time.time()
cntlLymph = sm.tl.spatial_interaction (cntlLymph, x_coordinate='X', y_coordinate='Y', 
                                  z_coordinate=None, imageid='imageid', phenotype='cell_type', method='radius', 
                                  radius=45, label='spatial_interaction_radius', permutation=2000)
print("--- %s minutes ---" % ((time.time() - start_time)/60))

In [None]:
cntlLymph.uns['spatial_interaction_radius']

In [None]:
# view results
# spatial_interaction heatmap for a single image
sm.pl.spatial_interaction(cntlLymph, 
                          summarize_plot=True,
                          spatial_interaction='spatial_interaction_radius',
                          row_cluster=False, linewidths=0.75, linecolor='black')

## Control -- Primary Site

In [None]:
cntlPS = adata[adata.obs['cohort_site'] =='Control-PrimarySite']

# Using the radius method to identify local neighbours compute P-values
start_time = time.time()
cntlPS = sm.tl.spatial_interaction (cntlPS, x_coordinate='X', y_coordinate='Y', 
                                  z_coordinate=None, imageid='imageid', phenotype='cell_type', method='radius', 
                                  radius=45, label='spatial_interaction_radius', permutation=2000)
print("--- %s minutes ---" % ((time.time() - start_time)/60))

In [None]:
cntlPS.uns['spatial_interaction_radius']

In [None]:
# view results
# spatial_interaction heatmap for a single image
sm.pl.spatial_interaction(cntlPS, 
                          summarize_plot=True, 
                          spatial_interaction='spatial_interaction_radius',
                          row_cluster=False, linewidths=0.75, linecolor='black')

## Case -- Primary Site

In [None]:
casePS = adata[adata.obs['cohort_site'] =='Case-PrimarySite']

# Using the radius method to identify local neighbours compute P-values
start_time = time.time()
casePS = sm.tl.spatial_interaction (casePS, x_coordinate='X', y_coordinate='Y', 
                                  z_coordinate=None, imageid='imageid', phenotype='cell_type', method='radius', 
                                  radius=45, label='spatial_interaction_radius', permutation=2000)
print("--- %s minutes ---" % ((time.time() - start_time)/60))

In [None]:
casePS.uns['spatial_interaction_radius']

In [None]:
# view results
# spatial_interaction heatmap for a single image
sm.pl.spatial_interaction(casePS, 
                          summarize_plot=True,
                          spatial_interaction='spatial_interaction_radius',
                          row_cluster=False, linewidths=0.75, linecolor='black')

# Distinct Pairwise Distances (QuPath Tool)

#### Subset Dataset - Cases ONLY

Need to get "Distance" columns in Quant file before proceeding (full dataset required)

In [None]:
subset = allClassData.loc[(allClassData['Cohort'] == "Case") & (allClassData['SiteLoc'] == "Lymphnode")]
subset = subset[list(subset.filter(regex='(Class|Distance_)'))]
subset = subset[subset.columns.drop(list(subset.filter(regex='Distance_to_ARTIFACT')))]
subset = subset[~subset['Class'].str.contains("ARTIFACT: ")]

caseTableLN = subset.groupby(['Class']).agg('mean').fillna(0)
caseTableLN.sort_index(level=0, ascending=True, inplace=True)
caseTableLN = caseTableLN.reindex(sorted(caseTableLN.columns), axis=1)

f, ax = plt.subplots(1, 2, figsize=(15, 6))
sns.heatmap(caseTableLN, annot=True, linewidths=.5, cmap = sns.cm.rocket_r, ax=ax[0]).set(title='Average Case-Lymphnode')


subset = allClassData.loc[(allClassData['Cohort'] == "Case") & (allClassData['SiteLoc'] == "PrimarySite")]
subset = subset[list(subset.filter(regex='(Class|Distance_)'))]
subset = subset[subset.columns.drop(list(subset.filter(regex='Distance_to_ARTIFACT')))]
subset = subset[~subset['Class'].str.contains("ARTIFACT: ")]

caseTablePS = subset.groupby(['Class']).agg('mean').fillna(0)
caseTablePS.sort_index(level=0, ascending=True, inplace=True)
caseTablePS = caseTablePS.reindex(sorted(caseTablePS.columns), axis=1)

sns.heatmap(caseTablePS, annot=True, linewidths=.5, cmap = sns.cm.rocket_r, ax=ax[1]).set(title='Average Case-PrimarySite')
plt.savefig('figures/caseLymph_vs_casePS_avgdist.pdf', format='pdf')

#### Subset Dataset - Controls ONLY

In [None]:
subset = allClassData.loc[(allClassData['Cohort'] == "Control") & (allClassData['SiteLoc'] == "Lymphnode")]
subset = subset[list(subset.filter(regex='(Class|Distance_)'))]
subset = subset[subset.columns.drop(list(subset.filter(regex='Distance_to_ARTIFACT')))]
subset = subset[~subset['Class'].str.contains("ARTIFACT: ")]

crtlTableLN = subset.groupby(['Class']).agg('mean').fillna(0)
crtlTableLN.sort_index(level=0, ascending=True, inplace=True)
crtlTableLN = crtlTableLN.reindex(sorted(crtlTableLN.columns), axis=1)

f, ax = plt.subplots(1, 2, figsize=(15, 6))
sns.heatmap(crtlTableLN, annot=True, linewidths=.5, cmap = sns.cm.rocket_r, ax=ax[0]).set(title='Average Control-Lymphnode')

subset = allClassData.loc[(allClassData['Cohort'] == "Control") & (allClassData['SiteLoc'] == "PrimarySite")]
subset = subset[list(subset.filter(regex='(Class|Distance_)'))]
subset = subset[subset.columns.drop(list(subset.filter(regex='Distance_to_ARTIFACT')))]
subset = subset[~subset['Class'].str.contains("ARTIFACT: ")]

crtlTablePS = subset.groupby(['Class']).agg('mean').fillna(0)
crtlTablePS.sort_index(level=0, ascending=True, inplace=True)
crtlTablePS = crtlTablePS.reindex(sorted(crtlTablePS.columns), axis=1)

ax = sns.heatmap(crtlTablePS, annot=True, linewidths=.5,
                 cmap = sns.cm.rocket_r, ax=ax[1]).set(title='Average Control-PrimarySite')
plt.savefig('figures/ctrlLymph_vs_ctrlPS_avgdist.pdf', format='pdf')

### Case Vs. Control Pairwise Distance Deltas

In [None]:
deltas = caseTableLN.subtract(crtlTableLN, fill_value=0, axis=1)
## Blue = Closer in Cases
## Red = Further away in Cases

f, ax = plt.subplots(1, 2, figsize=(15, 6))
sns.heatmap(deltas, annot=True, linewidths=.5, cmap = sns.cm.vlag, vmin=-30, vmax=40,
            ax=ax[0]).set(title='Case vs. Control [Lymphnode] Deltas (um)')

deltas = caseTablePS.subtract(crtlTablePS, fill_value=0, axis=1)
## Blue = Closer in Cases
## Red = Further away in Cases
ax1 = sns.heatmap(deltas, annot=True, linewidths=.5, cmap = sns.cm.vlag, vmin=-30, vmax=40,
                  ax=ax[1]).set(title='Case vs. Control [PrimarySite] Deltas (um)')
plt.savefig('figures/case_vs_ctrl_distance_deltas.pdf', format='pdf')