
> **ISO2024 INTRODUCTORY SPATIAL 'OMICS ANALYSIS**
>
>
>- HYBRID : TORONTO & ZOOM
>- 10TH JULY 2024 <br>

>**Module 6 : Realizing the spatial potential in your datasets, part 2 ** <BR>
>
>**Instructor : Shamini Ayyadhury**
>
---

> TOPICS COVERED

* A. Centrality scores *
* B. Ripley's L statistics *

***

In [None]:
### Import packages
### we will be using the builtu-in functions of squidpy to perform the analysis

import squidpy as sq # squidpy is a package that provides tools for the analysis of spatial transcriptomics data
import scanpy as sc # scanpy is a package that provides tools for the analysis of single-cell RNA-seq data
import seaborn as sns # seaborn is a package that provides tools for the visualization of data
import matplotlib.pyplot as plt # matplotlib is a package that provides tools for the visualization of data
import numpy as np # numpy is a package that provides tools for the manipulation of data
import pandas as pd # pandas is a package that provides tools for the manipulation of data


In [None]:
out = '/home/shamini/data/projects/spatial_workshop/out/module3/' ### data directory

names = ['wt', 'AD']

>>> Read anndata object created in module 3

In [None]:
adatas = []

adata = sc.read_h5ad(out + 'wt_13_4mths/adata_wt_banksy.h5ad') 
adata = adata[adata.obs['cell_label'].notna()]
adatas.append(adata)

adata = sc.read_h5ad(out + 'TgCRND8_17_8mths/adata_module3b_banksy.h5ad')
adata = adata[adata.obs['cell_label'].notna()]
adatas.append(adata)

for adata in adatas:
    print(adata)
    print('')


In [None]:
### Calculate spatial neighbors

for adata in adatas:
    sq.gr.spatial_neighbors(adata, coord_type="generic", n_rings=2, delaunay=True)


>>> 1. There are 3 scores from the centrality function that we will compute.
>>> 2. But we will use one as an example to review

COMPUTE CENTRALITY SCORES

In [None]:
from copy import deepcopy # copy is a package that provides tools for the manipulation of data

deep_centrals = []
ser_closeness = []
ser_degree = []
ser_cluster = []

for i, adata in enumerate(adatas):
    sq.gr.centrality_scores(adata, "cell_label")

    deep_central = deepcopy(adata.uns["cell_label_centrality_scores"])

    ser_closeness.append(deep_central["closeness_centrality"].sort_values(ascending=False))

    ser_degree.append(deep_central["degree_centrality"].sort_values(ascending=False))

    ser_cluster.append(deep_central["average_clustering"].sort_values(ascending=False))
    
    deep_centrals.append(deep_central)

In [None]:

for i, closeness in enumerate(ser_closeness):
    inst_clusters = closeness.index.tolist()[:5]
    print(inst_clusters)

    fig, axs = plt.subplots(1, 2, figsize=(20, 6))
    fig.suptitle(f"Top 5 clusters and bottom 5 clusters based on closeness centrality for {names[i]}", fontsize=24, y=1.05, x=0.4)

    sq.pl.spatial_scatter(
        adatas[i], groups=inst_clusters, color="cell_label", size=15, img=False, figsize=(10, 10), palette="tab20", ax=axs[0]
    )
    axs[0].set_title("Top 5 clusters based on closeness centrality", loc="left")

    inst_clusters = closeness.index.tolist()[-5:]
    print(inst_clusters)
    sq.pl.spatial_scatter(
        adatas[i], groups=inst_clusters, color="cell_label", size=15, img=False, figsize=(10, 10), palette="tab20", ax=axs[1]
    )
    axs[1].set_title("Bottom 5 clusters based on closeness centrality", loc="left")

COMPUTE RIPLEY'S FUNCTION

In [None]:
for adata in adatas:
    sq.gr.ripley(adata, 
                  cluster_key='cell_label', 
                  mode='L', 
                  spatial_key='spatial', 
                  metric='euclidean', 
                  n_neigh=2, 
                  n_simulations=50, n_observations=1000, 
                  max_dist=None, 
                  n_steps=50, 
                  seed=None, 
                  copy=False)


In [None]:
df_cell_ripley = []
sim = []

for adata in adatas:
    df_cell_ripley.append(pd.DataFrame(adata.uns['cell_label_ripley_L']['L_stat']))
    s = (pd.DataFrame(adata.uns['cell_label_ripley_L']['sims_stat']))
    s.columns = ['bins', 'cell_label', 'stats']
    s['cell_label'] = 'sim'
    sim.append(s)


In [None]:
df_cell_ripley_high_cen = []
df_cell_ripley_low_cen = []

for i, closeness in enumerate(ser_closeness):
    df_cell_ripley_high_cen.append(df_cell_ripley[i][df_cell_ripley[i]['cell_label'].isin(closeness.index.tolist()[:5])].copy())
    df_cell_ripley_low_cen.append(df_cell_ripley[i][df_cell_ripley[i]['cell_label'].isin(closeness.index.tolist()[-5:])].copy())

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

fig, axs = plt.subplots(1, 2, figsize=(20, 6))

for i, adata in enumerate(adatas):
    df_cell_ripley_high_cen[i]['source'] = 'High Closeness Centrality'
    sim[i]['source'] = 'Simulated Data'

    if 'cell_label' not in sim[i].columns:
        sim[i]['cell_label'] = 'Simulated' 

    combined_df = pd.concat([df_cell_ripley_high_cen[i], sim[i]])

    ax = sns.lineplot(data=combined_df, x='bins', y='stats', hue='cell_label', style='source', legend='brief', ax=axs[i])

    handles, labels = ax.get_legend_handles_labels()
    filtered_handles_labels = [(h, l) for h, l in zip(handles, labels) if l in combined_df['cell_label'].unique()]
    handles, labels = zip(*filtered_handles_labels)
    ax.legend(handles, labels, loc='center left', bbox_to_anchor=(1, 0.5), title='Legend')

    ax.set_title(f'Ripley L-function for High Closeness Centrality and Simulated Data for {names[i]}')
    ax.set_xlabel('Bins')
    ax.set_ylabel('Stats')

plt.tight_layout()
plt.show()


>>> END OF WORKSHOP <br>
>>> THANK YOU FOR JOINING US
>>>