
> **ISO2024 INTRODUCTORY SPATIAL 'OMICS ANALYSIS**
>
>
>- HYBRID : TORONTO & ZOOM
>- 9TH JULY 2024 <br>

>**Module 3 : Building your spatial model ** <BR>
>
>**Instructor : Shamini Ayyadhury**
>
---

```
MODULE 3 : Supplementary script02 - CREATING BANKSY OBJECT FOR WT MOUSE SAMPLE
Pearson residual transformation, following by banksy spatial clustering was repeated for the wt mouse sample and stored in module 3 out folder

In [None]:
### import the following libraries. Some of these were used in the previous notebook and we are using them here as well
import sys # system specific parameters and functions
import pandas as pd # data manipulation and analysis
import numpy as np # numerical computing library
import matplotlib.pyplot as plt # plotting library
import seaborn as sns # data visualization library based on matplotlib
import scanpy as sc # single-cell analysis in Python
import os # operating system dependent functionality

sys.path.append('~/data/projects/spatial_workshop/')
sys.path.append('~/data/projects/spatial_workshop/Banksy_py')
import pre_processing_fnc as ppf # from here onwards we will only use the function for memory regulation



In [None]:
### directory & filepaths

data_dir = '~/data1/data_orig/data/spatial/xenium/10xGenomics/'
out = '~/data/projects/spatial_workshop/out/'

os.makedirs(out+'module3/wt_13_4mths', exist_ok=True) # create a new directory to store the output files


color = ppf.colors()


In [None]:
### IMPORT DATA

'''
ANNDATA OBJECT FROM MODULE 2
We will use the anndata object from module 2 to perform the spatial analysis.
'''

adata = sc.read_h5ad(out+'module2/wt_13_4mths/adata_wt.h5ad')

cell_label = pd.read_csv(out+'module3/allen_annotations/wt_allen.csv', index_col=0)
cell_label = cell_label['predicted.id']

# Subset adata to include only the cells present in cell_label
common_indices = adata.obs.index.intersection(cell_label.index)
adata = adata[common_indices].copy()

# Reindex cell_label to match adata_subset
cell_label_subset = cell_label.reindex(adata.obs.index)
adata.obs['cell_label'] = cell_label_subset
adata = adata[~adata.obs['cell_label'].isna()].copy()

print(adata)

We will work on the basic code for spatial and non-spatial clustering. After understanding the principles, you will be provided with an exercise using one of the various spatial clustering packages (Banksy)

1. Transform data using PCA and apply clustering algorithm

In [None]:
    
### Perform clustering using PCA
adata.X = adata.raw.X.copy()  # Ensure raw data is dense if needed
sc.experimental.pp.normalize_pearson_residuals(adata)
print('normalized')



In [None]:
'''
1. FIRST WE WILL PERFORM A NEAREST NEIGHBOR BASED DISTANCE CALCULATION TO COMPUTE THE NECESSARY DISTANCES BETWEEN THE CELLS
'''

# set params
# ==========
plot_graph_weights = True
k_geom = 15 # only for fixed type
max_m = 1 # azumithal transform up to kth order
nbr_weight_decay = "scaled_gaussian" # can also be "reciprocal", "uniform" or "ranked"


In [None]:
from Banksy_py.banksy.main import median_dist_to_nearest_neighbour

# Find median distance to closest neighbours, the median distance will be `sigma`
nbrs = median_dist_to_nearest_neighbour(adata, key = 'spatial')

from banksy.initialize_banksy import initialize_banksy

banksy_dict = initialize_banksy(
    adata,
    ('x_location', 'y_location', 'spatial'),
    k_geom,
    nbr_weight_decay=nbr_weight_decay,
    max_m=max_m,
    plt_edge_hist=False,
    plt_nbr_weights=False,
    plt_agf_angles=False, # takes long time to plot
    plt_theta=False,
)

banksy_dict

### remove all the warnings and messages from the output



In [None]:
'''
2. NEXT WE WILL CONSTRUCT A BANKSY MATRIX
'''

from Banksy_py.banksy.embed_banksy import generate_banksy_matrix

### the following are the main hyperparamters for the banksy algorithm
### ------------------------------------------------------------------

resolutions = [0.3] ### clustering resolution for umap
pca_dims = [18] ### Dimensionality to which to reduce data to
lamda_list = [0, 0.25, 0.50, 0.75, 1.00] ### list of lamda values, setting higher value will result in more domain specific clustering

banksy_dict, banksy_matrix = generate_banksy_matrix(adata, banksy_dict, lamda_list, max_m, verbose=False)


In [None]:
### append non-spatial results to the banksy_dict for comparison

from banksy.main import concatenate_all
banksy_dict['nonspatial'] = {### here we append the non-spatial matrix (adata.X) to obtain the non-spatial clustering results
    0.0: {"adata": concatenate_all([adata.X], 0, adata=adata), }
    }

In [None]:
'''
3. BANKSY APPLIES PCA AND UMAP OVER THE SPATIAL DERIVED MATRIX, FOLLOWING BY LEIDEN CLUSTERING
'''

from banksy_utils.umap_pca import pca_umap

pca_umap(banksy_dict,
         pca_dims = pca_dims,
         add_umap = True,
         plt_remaining_var = False,
         verbose = False)

from banksy.cluster_methods import run_Leiden_partition
seed=329

results_df, max_num_labels = run_Leiden_partition(
    banksy_dict,
    resolutions,
    num_nn = 50,
    num_iterations = -1,
    partition_seed = seed,
    match_labels = True,
    verbose = False
)



In [None]:

### Save the results to appropriates slots back to the priginal adata object

p_names = results_df.index

for p_name in p_names:
    labels = results_df.loc[p_name, 'relabeled']
    adata_results = results_df.loc[p_name, "adata"]
    adata_results

    #pc_temp = adata_results.obsm(f"reduced_pc {pca_dims[0]}")
    #pca_umap = adata_results.obsm(f"umap {pca_dims[0]}")

    label_name = f"labels_{p_name}"
    label_name

    print(label_name)
    adata_results.obs[label_name] = np.char.mod('%d', labels.dense)
    adata_results.obs[label_name] = adata_results.obs[label_name].astype('category')
    adata.obs = adata.obs.reindex(adata_results.obs.index)
    adata.obs[label_name] = adata_results.obs[label_name]

adata.obsm['pc18_banksy'] = adata_results.obsm['reduced_pc_18'].copy()
adata.obsm['umap18_banksy'] = adata_results.obsm['reduced_pc_18_umap'].copy()



PLOT AND DISCUSS

In [None]:

fig, ax = plt.subplots(figsize=(4, 4))
fig.suptitle('B2. Banksy clustering - labeled by cell-type', fontsize=16, x=0.5, y=1.05)
sc.pl.embedding(adata, basis='umap18_banksy', color='cell_label', show=False, ax=ax, legend_loc='on data', legend_fontsize=6)

fig, axes = plt.subplots(2, 2, figsize=(9, 9))
axes = axes.flatten()
fig.suptitle('B3. Banksy clustering - labeled by Banksy clustering', fontsize=16, x=0.3, y=0.95)
sc.pl.embedding(adata, basis='umap18_banksy', 
                color='labels_nonspatial_pc18_nc0.00_r0.30', 
                show=False, ax=axes[0], 
                legend_loc='on data', legend_fontsize=6)

sc.pl.embedding(adata, basis='umap18_banksy', 
                color='labels_scaled_gaussian_pc18_nc0.25_r0.30', 
                show=False, ax=axes[1], 
                legend_loc='on data', legend_fontsize=6)

sc.pl.embedding(adata, basis='umap18_banksy', 
                color='labels_scaled_gaussian_pc18_nc0.75_r0.30', 
                show=False, ax=axes[2], 
                legend_loc='on data', legend_fontsize=6)

sc.pl.embedding(adata, basis='umap18_banksy', 
                color='labels_scaled_gaussian_pc18_nc1.00_r0.30', 
                show=False, ax=axes[3], 
                legend_loc='on data', legend_fontsize=6)



In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.gridspec as gridspec
import matplotlib.lines as mlines

# Plot parameters
# ---------------
color = ppf.colors()
marker = 'o'
s = 1.5
markersize = 9
# ---------------

# Create the figure with GridSpec
fig = plt.figure(figsize=(24, 6.5))
fig.suptitle('B4. Comparison of cell labels, non-spatial and spatial clustering', fontsize=24, x=0.2, y=1.05)

gs = gridspec.GridSpec(1, 5, width_ratios=[1, 0.2, 1, 1, 0.2])

# First subplot for cell labels
ax0 = plt.subplot(gs[0])
sns.scatterplot(data=adata.obs, x='x_location', y='y_location', hue='cell_label', palette=color, s=s, ax=ax0)
ax0.set_title('Cell Label', fontsize=15, loc='left')
ax0_legend = ax0.legend_  # Capture the legend object
ax0.get_legend().remove()  # Remove the legend from the plot

# Empty subplot for cell label legend
ax1 = plt.subplot(gs[1])
ax1.axis('off')
handles_cell_labels = [mlines.Line2D([], [], color=legend_handle.get_color(), marker=marker, linestyle='', markersize=markersize)
                       for legend_handle in ax0_legend.legend_handles]
labels_cell_labels = [t.get_text() for t in ax0_legend.get_texts()]
ax1.legend(handles_cell_labels, labels_cell_labels, loc='center', ncol=1, fontsize=8, title='cell_label')

# Second subplot for non-spatial clustering
ax2 = plt.subplot(gs[2])
sns.scatterplot(data=adata.obs, x='x_location', y='y_location', hue='labels_nonspatial_pc18_nc0.00_r0.30', palette=color, s=s, ax=ax2)
ax2.set_title('Non-spatial clustering', fontsize=15, loc='left')
ax2_legend = ax2.legend_  # Capture the legend object
ax2.get_legend().remove()  # Remove the legend from the plot

# Third subplot for spatial clustering
ax3 = plt.subplot(gs[3])
sns.scatterplot(data=adata.obs, x='x_location', y='y_location', hue='labels_scaled_gaussian_pc18_nc0.75_r0.30', palette=color, s=s, ax=ax3)
ax3.set_title('Spatial clustering', fontsize=15, loc='left')
ax3.get_legend().remove()  # Remove the legend from the plot

# Empty subplot for cluster legend
ax4 = plt.subplot(gs[4])
ax4.axis('off')
handles_clusters = [mlines.Line2D([], [], color=legend_handle.get_color(), marker='o', linestyle='', markersize=markersize)
                    for legend_handle in ax2_legend.legend_handles]
labels_clusters = [t.get_text() for t in ax2_legend.get_texts()]
ax4.legend(handles_clusters, labels_clusters, loc='center', ncol=1, fontsize=9, title='clusters')

# Adjust the space between the subplots
plt.subplots_adjust(wspace=0.01)
sns.despine()
plt.tight_layout()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.gridspec as gridspec
import matplotlib.lines as mlines

# Plot parameters
# ---------------
# Create the figure with GridSpec
fig = plt.figure(figsize=(30, 15))
fig.suptitle('B5. Degree of cluster resolution on collective grouping of cell-types', fontsize=27, x=0.2, y=1.05)

color = ppf.colors()
marker = 'o'
s = 2.7
markersize = 12

# Create a main GridSpec with two rows and one column
gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1])

# Create a GridSpec for the top row with three equal width columns
top_gs = gridspec.GridSpecFromSubplotSpec(1, 3, subplot_spec=gs[0], width_ratios=[1, 1, 1])

# Create a GridSpec for the bottom row with specified width ratios
bottom_gs = gridspec.GridSpecFromSubplotSpec(1, 4, subplot_spec=gs[1], width_ratios=[1, 1, 0.2, 0.8])

# ---------------


# First subplot for cell labels
ax0 = plt.subplot(top_gs[0])
sns.scatterplot(data=adata.obs, x='x_location', y='y_location', hue='labels_nonspatial_pc18_nc0.00_r0.30', palette=color, s=s, ax=ax0)
ax0.set_title('Non-spatial clustering', fontsize=18, loc='left')
ax0_legend = ax0.legend_  # Capture the legend object
ax0.get_legend().remove()  # Remove the legend from the plot


# Second subplot for non-spatial clustering
ax1 = plt.subplot(top_gs[1])
sns.scatterplot(data=adata.obs, x='x_location', y='y_location', hue='labels_scaled_gaussian_pc18_nc0.25_r0.30', palette=color, s=s, ax=ax1)
ax1.set_title('Spatial clustering - 0.25', fontsize=18, loc='left')
ax1.get_legend().remove()  # Remove the legend from the plot

## Third subplot for spatial clustering
ax2 = plt.subplot(top_gs[2])
sns.scatterplot(data=adata.obs, x='x_location', y='y_location', hue='labels_scaled_gaussian_pc18_nc0.50_r0.30', palette=color, s=s, ax=ax2)
ax2.set_title('Spatial clustering - 0.50', fontsize=18, loc='left')
ax2.get_legend().remove()  # Remove the legend from the plot


# Third subplot for spatial clustering
ax3 = plt.subplot(bottom_gs[0])
sns.scatterplot(data=adata.obs, x='x_location', y='y_location', hue='labels_scaled_gaussian_pc18_nc0.75_r0.30', palette=color, s=s, ax=ax3)
ax3.set_title('Spatial clustering - 0.75', fontsize=18, loc='left')
ax3.get_legend().remove()  # Remove the legend from the plot

# Third subplot for spatial clustering
ax4 = plt.subplot(bottom_gs[1])
sns.scatterplot(data=adata.obs, x='x_location', y='y_location', hue='labels_scaled_gaussian_pc18_nc1.00_r0.30', palette=color, s=s, ax=ax4)
ax4.set_title('Spatial clustering - 1.00', fontsize=18, loc='left')
ax4.get_legend().remove()  # Remove the legend from the plot


# Empty subplot for cluster legend
ax5 = plt.subplot(bottom_gs[2])
ax5.axis('off')
handles_clusters = [mlines.Line2D([], [], color=legend_handle.get_color(), marker='o', linestyle='', markersize=markersize)
                    for legend_handle in ax0_legend.legend_handles]
labels_clusters = [t.get_text() for t in ax0_legend.get_texts()]
ax5.legend(handles_clusters, labels_clusters, loc='center', ncol=1, fontsize=8, title='clusters')

# Adjust the space between the subplots
plt.subplots_adjust(wspace=0.01)
sns.despine()
plt.tight_layout()


In [None]:

import matplotlib.gridspec as gridspec

# Create the figure with GridSpec
fig = plt.figure(figsize=(15, 4.5))
fig.suptitle('B6. Comparison of cell labels, non-spatial and spatial clustering', fontsize=16, x=0.2, y=1.05)

gs = gridspec.GridSpec(1, 3, width_ratios=[1, 1, 0.2])

# First plot
ax1 = fig.add_subplot(gs[0])
sns.histplot(data=adata.obs, hue='labels_nonspatial_pc18_nc0.00_r0.30', x='cell_label', palette=color, multiple='stack', shrink=0.8, ax=ax1)
ax1.get_legend().remove()  # Remove the legend from the first plot
ax1.xaxis.set_tick_params(rotation=45)
ax1.set_title('non-spatial clustering of cell types', loc='left')
for label in ax1.get_xticklabels():
    label.set_ha('right')

# Second plot
ax2 = fig.add_subplot(gs[1])
sns.histplot(data=adata.obs, hue='labels_scaled_gaussian_pc18_nc0.75_r0.30', x='cell_label', palette=color, multiple='stack', shrink=0.8, ax=ax2)
ax2.set_title('spatial clustering of cell-types', loc='left')
ax2_legend = ax2.legend_  # Get the legend from the second plot
ax2.get_legend().remove()  # Remove the legend from the second plot
ax2.xaxis.set_tick_params(rotation=45)
for label in ax2.get_xticklabels():
    label.set_ha('right')

# Add the legend to the figure in the third GridSpec cell
ax3 = fig.add_subplot(gs[2])
ax3.axis('off')
handles, labels = ax2_legend.legend_handles, [t.get_text() for t in ax2_legend.get_texts()]
fig.legend(handles, labels, loc='center right', ncol=1, fontsize=8, title='cell_label', bbox_to_anchor=(.985, 0.55))

sns.despine()
plt.tight_layout()
plt.show()


In [None]:
adata.write_h5ad(out+'module3/wt_13_4mths/adata_wt_banksy.h5ad') ### save the anndata object