
> **ISO2024 INTRODUCTORY SPATIAL 'OMICS ANALYSIS**
>
>
>- HYBRID : TORONTO & ZOOM
>- 10TH JULY 2024 <br>

>**Module 6 : Realizing the spatial potential in your datasets, part 2 ** <BR>
>
>**Instructor : Shamini Ayyadhury**
>
---

> TOPICS COVERED

* A. Centrality scores *
* B. Ripley's L statistics *

***

In [None]:
### Import packages

### we will be using the builtu-in functions of squidpy to perform the analysis

import squidpy as sq
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [None]:
out = '/home/shamini/data/projects/spatial_workshop/out/module3/' ### data directory

colors = ['#8B0000', '#006400', '#00008B', '#8B008B', '#556B2F', '#FF8C00', '#8B4513', 
'#2F4F4F', '#4682B4', '#8A2BE2', '#5F9EA0', '#D2691E', '#DC143C', '#9400D3', 
'#B22222', '#228B22', '#4B0082', '#6B8E23', '#B8860B', '#9932CC', '#8B4513', 
'#FF4500', '#6A5ACD', '#708090', '#FF6347', '#4682B4', '#8FBC8F', '#483D8B', 
'#2E8B57', '#FF69B4', '#CD5C5C']



names = ['wt', 'AD']

In [None]:
adatas = []

adata = sc.read_h5ad(out + 'wt_13_4mths/adata_wt_banksy.h5ad')
adata = adata[adata.obs['cell_label'].notna()]
adatas.append(adata)

adata = sc.read_h5ad(out + 'TgCRND8_17_8mths/adata_module3b_banksy.h5ad')
adata = adata[adata.obs['cell_label'].notna()]
adatas.append(adata)

for adata in adatas:
    print(adata)
    print('')


In [None]:


### PARTICIPANTS TO PLAY AROUND WITH THE PARAMETERS

for adata in adatas:
    sq.gr.spatial_neighbors(adata, coord_type="generic", n_rings=2, delaunay=True)


COMPUTE CENTRALITY SCORES

In [None]:
from copy import deepcopy

deep_centrals = []
ser_closeness = []
ser_degree = []
ser_cluster = []

for i, adata in enumerate(adatas):
    sq.gr.centrality_scores(adata, "cell_label")

    # copy centrality data to new DataFrame
    deep_central = deepcopy(adata.uns["cell_label_centrality_scores"])

    ser_closeness.append(deep_central["closeness_centrality"].sort_values(ascending=False))

    ser_degree.append(deep_central["degree_centrality"].sort_values(ascending=False))

    ser_cluster.append(deep_central["average_clustering"].sort_values(ascending=False))
    
    deep_centrals.append(deep_central)

### We will tag each cell_label with a unique color for easy visualization and comparison between control and AD models

In [None]:

unique_labels = adata.obs['cell_label'].unique()
if len(colors) < len(unique_labels):
    raise ValueError("Not enough colors provided for the unique cell labels.")

# Create the dictionary of colors
dict_colors = dict(zip(unique_labels, colors))

# Assign colors to the adata object
for adata in adatas:
    adata.uns['cell_label_colors'] = pd.Series(dict_colors).loc[adata.obs['cell_label'].cat.categories].values

for i, closeness in enumerate(ser_degree):
    # Get top 5 clusters
    inst_clusters_top = closeness.index.tolist()[:5]

    fig, axs = plt.subplots(1, 2, figsize=(20, 6))
    fig.suptitle(f"Top 5 clusters and bottom 5 clusters based ond degree centrality for {names[i]}", fontsize=24, y=1.05, x=0.4)
    
    # Plot top 5 clusters
    sq.pl.spatial_scatter(
        adatas[i], groups=inst_clusters_top, color="cell_label", size=15, img=False, figsize=(10, 10), ax=axs[0]
    )
    axs[0].set_title("Top 5 clusters", loc="left")

    # Get bottom 5 clusters
    inst_clusters_bottom = closeness.index.tolist()[-5:]

    # Plot bottom 5 clusters
    sq.pl.spatial_scatter(
        adatas[i], groups=inst_clusters_bottom, color="cell_label", size=15, img=False, figsize=(10, 10), ax=axs[1]
    )
    axs[1].set_title("Bottom 5 clusters", loc="left")

plt.tight_layout()
plt.show()


```
1. If you observe the plots above, you will notice that the distribution of certain cell-types has changed between the WT and AD-model.
2. Now let's dig in a bit more

```

In [None]:

unique_labels = adata.obs['cell_label'].unique()
if len(colors) < len(unique_labels):
    raise ValueError("Not enough colors provided for the unique cell labels.")

# Create the dictionary of colors
dict_colors = dict(zip(unique_labels, colors))


In [None]:

for adata in adatas:
    adata.uns['cell_label_colors'] = pd.Series(dict_colors).loc[adata.obs['cell_label'].cat.categories].values # Assign colors to the adata object

for i, closeness in enumerate(ser_degree):

    inst_clusters_top = closeness.index.tolist()[:5] # Get top 5 clusters
    
    adata_subset = adatas[i][adatas[i].obs["cell_label"].isin(inst_clusters_top)] # Subset the adata object

    fig, axs = plt.subplots(1, len(inst_clusters_top)+1, figsize=(20, 3.5))
    fig.suptitle(f"Top 5 clusters clusters based on degree centrality for {names[i]}", fontsize=24, y=1.05, x=0.4)

    # Plot top 5 clusters
    sns.scatterplot(adata_subset.obs, x='x_location', y='y_location', hue='cell_label', palette=dict_colors, ax=axs[0], s=1)
    axs[0].set_title("Top 5 clusters", loc="left")

    ### plot individual clusters
    for j, cluster in enumerate(inst_clusters_top):
        adata_subset = adatas[i][adatas[i].obs["cell_label"] == cluster]
        sns.scatterplot(adata_subset.obs, x='x_location', y='y_location', hue='cell_label', palette=dict_colors, ax=axs[j+1], s=1)
        axs[j+1].set_title(f"Cluster {cluster}", loc="left")
        
plt.tight_layout()
plt.show()


>>> 1. The purpose of performing or utilizing spatial statistics is to understand the broad patterns of distributions of points.
>>> 2. Here, points can be either cells or transcripts 
>>> 3. You can evaluate spatial patterns using cell-types, clusters or other biomarkers.


>>> 4. In the above plot, we can see that the top well-connected cell-types change in the AD model. And why do we want to look at these spatial shifts?

>>> Looking at overall distributions of cells can give us a sense of broad patterns of changes before we dive deep into cluster or anatomical regional pattern characterization.

>>> Though at the workshop we could not delve deep into this, try to repeat the above methods and others found on packages such as squidpy, sopa , on clusters , cell types and cell states.

In [None]:

for i, closeness in enumerate(ser_degree):
    inst_clusters_bottom = closeness.index.tolist()[-5:]
    
    adata_subset = adatas[i][adatas[i].obs["cell_label"].isin(inst_clusters_bottom)]
    
    fig, axs = plt.subplots(1, len(inst_clusters_bottom)+1, figsize=(20, 3.5))
    fig.suptitle(f"Top 5 clusters clusters based on closeness centrality for {names[i]}", fontsize=24, y=1.05, x=0.4)

    sns.scatterplot(adata_subset.obs, x='x_location', y='y_location', hue='cell_label', palette=dict_colors, ax=axs[0], s=3)
    axs[0].set_title("Bottom 5 clusters", loc="left")
    axs[0].legend(loc='upper right')
    
    ### plot individual clusters
    for j, cluster in enumerate(inst_clusters_bottom):
        adata_subset = adatas[i][adatas[i].obs["cell_label"] == cluster]
        sns.scatterplot(adata_subset.obs, x='x_location', y='y_location', hue='cell_label', palette=dict_colors, ax=axs[j+1], s=3)
        axs[j+1].set_title(f"Cluster {cluster}", loc="left")
        axs[j+1].legend(loc='upper right')
        
plt.tight_layout()
plt.show()

>>> The same repeated on the bottom 5 cell-types .
>>> the bottom 5 cell-types are clustered in select regions. 
>>> Question : Do you think these cell-types group together within the same anatomical regions or clusters?

>>> Repeat now using the other centrality groups? Which ones do you think are informative?

>>> DO IT YOURSELF


COMPUTE RIPLEY'S FUNCTION

Now lets try out the Ripley's spatial stat

In [None]:
for adata in adatas:
    sq.gr.ripley(adata, 
                  cluster_key='cell_label', 
                  mode='L', 
                  spatial_key='spatial', 
                  metric='euclidean', 
                  n_neigh=2, 
                  n_simulations=50, n_observations=1000, 
                  max_dist=None, 
                  n_steps=50, 
                  seed=None, 
                  copy=False)


In [None]:
df_cell_ripley = []
sim = []

for adata in adatas:
    df_cell_ripley.append(pd.DataFrame(adata.uns['cell_label_ripley_L']['L_stat']))
    s = (pd.DataFrame(adata.uns['cell_label_ripley_L']['sims_stat']))
    s.columns = ['bins', 'cell_label', 'stats']
    s['cell_label'] = 'sim'
    sim.append(s)


In [None]:
df_cell_ripley_high_cen = []
df_cell_ripley_low_cen = []

for i, closeness in enumerate(ser_closeness):
    df_cell_ripley_high_cen.append(df_cell_ripley[i][df_cell_ripley[i]['cell_label'].isin(closeness.index.tolist()[:5])].copy())
    df_cell_ripley_low_cen.append(df_cell_ripley[i][df_cell_ripley[i]['cell_label'].isin(closeness.index.tolist()[-5:])].copy())

In [None]:

fig, axs = plt.subplots(1, 2, figsize=(20, 6))

for i, adata in enumerate(adatas):
    df_cell_ripley_high_cen[i]['source'] = 'High Closeness Centrality'
    sim[i]['source'] = 'Simulated Data'

    if 'cell_label' not in sim[i].columns:
        sim[i]['cell_label'] = 'Simulated'  

    combined_df = pd.concat([df_cell_ripley_high_cen[i], sim[i]])

    # Plotting
    ax = sns.lineplot(data=combined_df, x='bins', y='stats', hue='cell_label', style='source', legend='brief', ax=axs[i])

    handles, labels = ax.get_legend_handles_labels()
    filtered_handles_labels = [(h, l) for h, l in zip(handles, labels) if l in combined_df['cell_label'].unique()]
    handles, labels = zip(*filtered_handles_labels)
    ax.legend(handles, labels, loc='center left', bbox_to_anchor=(1, 0.5), title='Legend')

    # Set plot title and labels
    ax.set_title(f'Ripley L-function for High Closeness Centrality and Simulated Data for {names[i]}')
    ax.set_xlabel('Bins')
    ax.set_ylabel('Stats')

plt.tight_layout()
# Show plot
plt.show()


In [None]:
### Now repeat using the low closeness centrality clusters
### Do it yourself

>>> END OF WORKSHOP <br>
>>> THANK YOU FOR JOINING US
>>>