
> **ISO2024 INTRODUCTORY SPATIAL 'OMICS ANALYSIS**
>
>
>- HYBRID : TORONTO & ZOOM
>- 10TH JULY 2024 <br>

>**Module 5 : Module 5 : Realizing the spatial potential in your datasets, part 1 ** <BR>
>
>**Instructor : Shamini Ayyadhury**
>
---

> TOPICS COVERED

* A. Centrality scores *
* B. Ripley's L statistics *

***

In [None]:
### Import packages

### we will be using the builtu-in functions of squidpy to perform the analysis

import squidpy as sq
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [None]:
out = '/home/shamini/data/projects/spatial_workshop/out/' ### data directory

DATASETS:
1. For this lesson, we will set the stage using datasets from module 2/3.

ANALYSIS :
The codes for this pipeline were predominanly re-produced and adapted from <https://squidpy.readthedocs.io/en/stable/notebooks/tutorials/tutorial_vizgen_mouse_liver.html#network-centrality-scores>

In [None]:
adata = sc.read_h5ad(out + 'module3/TgCRND8_17_8mths/adata_module3b_banksy.h5ad')
adata = adata[adata.obs['cell_label'].notna()]
adata

In [None]:
from copy import deepcopy


### Step 1: Calculate spatial neighbors
sq.gr.spatial_neighbors(adata, coord_type="generic", n_rings=2, delaunay=True)
### QUESTION - what difference would it make if we set delaunay=False?

### calling squidpy function to calculate the centrality scores
sq.gr.centrality_scores(adata, "cell_label")
sc.set_figure_params(figsize=(20, 8))

# copy centrality data to new DataFrame
df_central = deepcopy(adata.uns["cell_label_centrality_scores"])


In [None]:
df_central ### sort the centrality scores in both ascending and descending order

# SORT BY :
### DEGREE CENTRALITY - fraction of non-group members connected to group members.
df_central.sort_values(by="degree_centrality", ascending=False, inplace=True)


# SORT BY :
### CLOSENESS CENTRALITY - measure of how close the group is to other nodes.
df_central.sort_values(by="closeness_centrality", ascending=False, inplace=True)

In [None]:

# sort clusters based on centrality scores
ser_closeness = df_central["closeness_centrality"].sort_values(ascending=False)

# degree centrality 
# [Networkx](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.degree_centrality.html#networkx.algorithms.centrality.degree_centrality)
# The degree centrality for a node v is the fraction of nodes it is connected to.
ser_degree = df_central["degree_centrality"].sort_values(ascending=False)

# clustering coefficient - measure of the degree to which nodes cluster together.
ser_cluster = df_central["average_clustering"].sort_values(ascending=False)

Now we will take the top 5 and bottom 5 cell labels from the centrality closeness score output

In [None]:
inst_clusters = ser_closeness.index.tolist()[:5]
print(inst_clusters)

fig, axs = plt.subplots(1, 2, figsize=(20, 6))
fig.suptitle("Top 5 clusters and bottom 5 clusters based on closeness centrality", fontsize=24, y=1.05, x=0.4)

sq.pl.spatial_scatter(
    adata, groups=inst_clusters, color="cell_label", size=15, img=False, figsize=(10, 10), ax=axs[0]
)
axs[0].set_title("Top 5 clusters based on closeness centrality", loc="left")


inst_clusters = ser_closeness.index.tolist()[-5:]
print(inst_clusters)
sq.pl.spatial_scatter(
    adata, groups=inst_clusters, color="cell_label", size=15, img=False, figsize=(10, 10), palette="tab20", ax=axs[1]
)
axs[1].set_title("Bottom 5 clusters based on closeness centrality", loc="left")

>>> NOW LET'S TRY TO COMPLEMENT THE CENTRALITY SCORES WITH RIPLEY'S L FUNCTION 

In [None]:
sq.gr.ripley(adata, 
                  cluster_key='cell_label', 
                  mode='L', 
                  spatial_key='spatial', 
                  metric='euclidean', 
                  n_neigh=2, 
                  n_simulations=50, n_observations=1000, 
                  max_dist=None, 
                  n_steps=50, 
                  seed=None, 
                  copy=False)


The Ripley output

In [None]:
df_cell_ripley=pd.DataFrame(adata.uns['cell_label_ripley_L']['L_stat'])
df_cell_ripley

In [None]:
sim = pd.DataFrame(adata.uns['cell_label_ripley_L']['sims_stat'])
sim.columns = ['bins', 'cell_label', 'stats']
sim['cell_label'] = 'sim'
sim


In [None]:
pd.DataFrame(adata.uns['cell_label_ripley_L']['pvalues'])

Let's compare the centrality scores with their corresponding Ripley's L scores

In [None]:

df_cell_ripley_high_cen = df_cell_ripley[df_cell_ripley['cell_label'].isin(ser_closeness.index.tolist()[:5])].copy()
df_cell_ripley_low_cen = df_cell_ripley[df_cell_ripley['cell_label'].isin(ser_closeness.index.tolist()[-5:])].copy()

In [None]:


df_cell_ripley_high_cen['source'] = 'High Closeness Centrality'
sim['source'] = 'Simulated Data'

if 'cell_label' not in sim.columns:
    sim['cell_label'] = 'Simulated'  

combined_df = pd.concat([df_cell_ripley_high_cen, sim])

# Plotting
plt.figure(figsize=(10, 6))
sns.lineplot(data=combined_df, x='bins', y='stats', hue='cell_label', style='source', legend='brief')

# Customize the legend
handles, labels = plt.gca().get_legend_handles_labels()
# Filter out the legends that are in the combined DataFrame
filtered_handles_labels = [(h, l) for h, l in zip(handles, labels) if l in combined_df['cell_label'].unique()]
handles, labels = zip(*filtered_handles_labels)
plt.legend(handles, labels, loc='center left', bbox_to_anchor=(1, 0.5), title='Legend')

# Set plot title and labels
plt.title('Ripley L-function for High Closeness Centrality and Simulated Data')
plt.xlabel('Bins')
plt.ylabel('Stats')

# Show plot
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df_cell_ripley_low_cen['source'] = 'Low Closeness Centrality'
sim['source'] = 'Simulated Data'

if 'cell_label' not in sim.columns:
    sim['cell_label'] = 'Simulated'  

combined_df_low_cen = pd.concat([df_cell_ripley_low_cen, sim])

# Plotting
plt.figure(figsize=(10, 6))
sns.lineplot(data=combined_df_low_cen, x='bins', y='stats', hue='cell_label', style='source', legend='brief')

# Customize the legend
handles, labels = plt.gca().get_legend_handles_labels()
# Filter out the legends that are in the combined DataFrame
filtered_handles_labels = [(h, l) for h, l in zip(handles, labels) if l in combined_df_low_cen['cell_label'].unique()]
handles, labels = zip(*filtered_handles_labels)
plt.legend(handles, labels, loc='center left', bbox_to_anchor=(1, 0.5), title='Legend')

# Set plot title and labels
plt.title('Ripley L-function for Low Closeness Centrality and Simulated Data')
plt.xlabel('Bins')
plt.ylabel('Stats')

plt.vlines(x=500, ymin=0, ymax=40, color='red', linestyle='--', label='Threshold')

# Show plot
plt.show()


>>> DISCUSSION : WHAT CAN WE INFER ABOUT THE CHANGES IN DISTRIBUTION FOR THE CELL-TYPES WITH LOW CLOSENESS CENTRALITY SCORES

NOW : PARTICIPANTS TO TRY ON THEIR OWN IN THE NEXT SCRIPT
Script 6 has been updated with additional code that we did not have time to review during the workshop