
> **ISO2024 INTRODUCTORY SPATIAL 'OMICS ANALYSIS**
>
>
>- HYBRID : TORONTO & ZOOM
>- 9TH JULY 2024 <br>

>**Module 5 : Module 5 : Realizing the spatial potential in your datasets, part 1 ** <BR>
>
>**Instructor : Shamini Ayyadhury**
>
---

> TOPICS COVERED

* A. Centrality scores *
* B. Ripley's L statistics *

***

In [None]:
### Import packages

### we will be using the builtu-in functions of squidpy to perform the analysis

import squidpy as sq
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [None]:
data_dir = '/home/shamini/data1/data_orig/data/spatial/xenium/10xGenomics/mice_AD_model/wt/module6/' ### data directory

In [None]:
adatas = []

adata = sc.read_h5ad(data_dir + 'adata_wt_banksy.h5ad')
adata = adata[adata.obs['cell_label'].notna()]
adatas.append(adata)

adata = sc.read_h5ad(data_dir + 'adata_wt_banksy.h5ad')
adata = adata[adata.obs['cell_label'].notna()]
adatas.append(adata)

for adata in adatas:
    print(adata)
    print('')


In [None]:


### Step 1: Calculate spatial neighbors
### PARTICIPANTS TO PLAY AROUND WITH THE PARAMETERS

for adata in adatas:
    sq.gr.spatial_neighbors(adata, coord_type="generic", n_rings=2, delaunay=True)


In [None]:
from copy import deepcopy

deep_centrals = []
ser_closeness = []
ser_degree = []
ser_cluster = []

for adata in adatas:
    ### calling squidpy function to calculate the centrality scores
    sq.gr.centrality_scores(adata, "cell_label")

    # copy centrality data to new DataFrame
    df_centrals.append(deepcopy(adata.uns["cell_label_centrality_scores"]))

    # sort clusters based on centrality scores
    # closeness centrality - measure of how close the group is to other nodes.
    ser_closeness.append(df_central["closeness_centrality"].sort_values(ascending=False))

    ser_degree.append(df_central["degree_centrality"].sort_values(ascending=False))

    # clustering coefficient - measure of the degree to which nodes cluster together.
    ser_cluster.append(df_central["average_clustering"].sort_values(ascending=False))

In [None]:

for i, closeness in enumerate(ser_closeness):
    inst_clusters = ser_closeness.index.tolist()[:5]
    print(inst_clusters)

    fig, axs = plt.subplots(1, 2, figsize=(20, 6))
    fig.suptitle(f"Top 5 clusters and bottom 5 clusters based on closeness centrality for names[i]", fontsize=24, y=1.05, x=0.4)

    sq.pl.spatial_scatter(
        adata, groups=inst_clusters, color="cell_label", size=15, img=False, figsize=(10, 10), palette="tab20", ax=axs[0]
    )
    axs[0].set_title("Top 5 clusters based on closeness centrality", loc="left")

    inst_clusters = ser_closeness.index.tolist()[-5:]
    print(inst_clusters)
    sq.pl.spatial_scatter(
        adata, groups=inst_clusters, color="cell_label", size=15, img=False, figsize=(10, 10), palette="tab20", ax=axs[1]
    )
    axs[1].set_title("Bottom 5 clusters based on closeness centrality", loc="left")

In [None]:
for adata in adatas:
    sq.gr.ripley(adata, 
                  cluster_key='cell_label', 
                  mode='L', 
                  spatial_key='spatial', 
                  metric='euclidean', 
                  n_neigh=2, 
                  n_simulations=50, n_observations=1000, 
                  max_dist=None, 
                  n_steps=50, 
                  seed=None, 
                  copy=False)


In [None]:
df_cell_ripley = []
sim = []

for adata in adatas:
    df_cell_ripley.append(pd.DataFrame(adata.uns['cell_label_ripley_L']['L_stat']))
    sim.append(pd.DataFrame(adata.uns['cell_label_ripley_L']['sims_stat']))
    sim.columns = ['bins', 'cell_label', 'stats']
    sim['cell_label'] = 'sim'


In [None]:
df_cell_ripley_high_cen = []
df_cell_ripley_low_cen = []

for closeness in ser_closeness:
    df_cell_ripley_high_cen.append(df_cell_ripley[df_cell_ripley['cell_label'].isin(closeness.index.tolist()[:5])].copy())
    df_cell_ripley_low_cen.append(df_cell_ripley[df_cell_ripley['cell_label'].isin(closeness.index.tolist()[-5:])].copy())

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming sim DataFrame has similar structure with 'bins', 'stats', and 'cell_label' or equivalent columns
# If necessary, adjust the columns to match those in df_cell_ripley_high_cen

fig, axs = plt.subplots(1, 2, figsize=(20, 6))

for i, adata in enumerate(adatas):
    # Add a distinguishing column
    df_cell_ripley_high_cen[i]['source'] = 'High Closeness Centrality'
    sim[i]['source'] = 'Simulated Data'

    # Ensure columns match, renaming if necessary
    # Assuming sim has columns 'bins' and 'stats' similar to df_cell_ripley_high_cen
    # If 'cell_label' is not present, you may need to adjust accordingly
    if 'cell_label' not in sim[i].columns:
        sim[i]['cell_label'] = 'Simulated'  # or any other distinguishing label

    # Concatenate DataFrames
    combined_df = pd.concat([df_cell_ripley_high_cen, sim])

    # Plotting
    ax = sns.lineplot(data=combined_df, x='bins', y='stats', hue='cell_label', style='source', legend='brief', ax=axs[i])

    # Customize the legend
    handles, labels = ax.get_legend_handles_labels()
    # Filter out the legends that are in the combined DataFrame
    filtered_handles_labels = [(h, l) for h, l in zip(handles, labels) if l in combined_df['cell_label'].unique()]
    handles, labels = zip(*filtered_handles_labels)
    ax.legend(handles, labels, loc='center left', bbox_to_anchor=(1, 0.5), title='Legend')

    # Set plot title and labels
    ax.title(f'Ripley L-function for High Closeness Centrality and Simulated Data for names[i]')
    ax.set_xlabel('Bins')
    ax.set_ylabel('Stats')

# Show plot
plt.show()
