# The Single-Cell Spatial Transcriptomics Analysis (ScSTA) Cookbook

```
Arun Das
Postdoctoral Associate
Hillman Cancer Center
University of Pittsburgh Medical Center
```

## Non-Small Cell Lung Cancer Exploration

## Install packages

## Import packages

In [None]:
%matplotlib inline
import warnings
# Suppress all warnings
warnings.filterwarnings('ignore')

import os
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
import scanpy as sc
import numpy as np
import squidpy as sq
import pandas as pd

import tqdm as notebook_tqdm

pd.set_option("display.max_columns", None)

# Plotting options, change to your liking
sc.settings.set_figure_params(dpi=80, frameon=False, facecolor="white")
sc.set_figure_params(dpi=80)
sc.set_figure_params(figsize=(4, 4))
sc.settings.verbosity = 0

## Download CosMx SMI NSCLC FFPE Dataset for analysis

### https://nanostring.com/products/cosmx-spatial-molecular-imager/nsclc-ffpe-dataset/

In [None]:
nanostring_dir = Path().resolve() / "tutorial_data" / "nanostring_data"
sample_dir = nanostring_dir / "Lung9_Rep1" / "Lung9_Rep1-Flat_files_and_images"

adata = sq.read.nanostring(
    path=sample_dir,
    counts_file="Lung9_Rep1_exprMat_file.csv",
    meta_file="Lung9_Rep1_metadata_file.csv",
    fov_file="Lung9_Rep1_fov_positions_file.csv",
)

In [None]:
adata

In [None]:
adata.obs

In [None]:
adata.obs['tissue'] = "Lung9_Rep1"

In [None]:
adata.obs.head()

## Load the cell type information for the NSCLC sample

In [None]:
df = pd.read_csv(f'{sample_dir}/Lung9_Rep1_ctypes.csv', index_col=0)
df.index = [f'{str(c)}_{str(fov)}' for c,fov in zip(df['cell_ID'], df['fov'])]
df

## Combine the cell type information with the gene expressions

In [None]:
adata_ctype = pd.merge(adata.obs, df['cell_type'], how='outer', right_index=True, left_index=True)
adata_ctype = adata_ctype.drop_duplicates()
adata_ctype['cell_type'].fillna('unknown', inplace=True)
adata_ctype.replace({k:'tumor' for k in ['tumor '+ str(i) for i in [5, 6, 9, 12, 13]]}, inplace=True)
adata_ctype = adata_ctype.loc[adata.obs.index]

for column_name in adata_ctype.columns:
    if column_name in adata.obs.columns:
        new_dtype = adata.obs[column_name].dtype
        adata_ctype[column_name] = adata.obs[column_name].astype(new_dtype)
        
adata.obs = adata_ctype

In [None]:
adata.obs

In [None]:
adata

## Quality Control

In [None]:
adata.var["NegPrb"] = adata.var_names.str.startswith("NegPrb")
sc.pp.calculate_qc_metrics(adata, qc_vars=["NegPrb"], inplace=True)

In [None]:
adata = adata[:, ~adata.var_names.str.startswith("NegPrb")]

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15, 4))

axs[0].set_title("Total transcripts per cell")
sns.histplot(
    adata.obs["total_counts"],
    kde=False,
    ax=axs[0],
)

axs[1].set_title("Unique transcripts per cell")
sns.histplot(
    adata.obs["n_genes_by_counts"],
    kde=False,
    ax=axs[1],
)

axs[2].set_title("Transcripts per FOV")
sns.histplot(
    adata.obs.groupby("fov")["total_counts"].sum(),
    kde=False,
    ax=axs[2],
)

plt.tight_layout()

## Morphology Image Summary

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(15, 4))

axs[0].set_title("Membrane Stain")
sns.histplot(
    adata.obs["Mean.MembraneStain"],
    kde=False,
    ax=axs[0],
)

axs[1].set_title("PanCK")
sns.histplot(
    adata.obs["Mean.PanCK"],
    kde=False,
    ax=axs[1],
)

axs[2].set_title("CD45")
sns.histplot(
    adata.obs["Mean.CD45"],
    kde=False,
    ax=axs[2],
)

axs[3].set_title("CD3")
sns.histplot(
    adata.obs["Mean.CD3"],
    kde=False,
    ax=axs[3],
)

plt.tight_layout()

## Filter the cells and genes

In [None]:
sc.pp.filter_cells(adata, min_counts=100)
sc.pp.filter_genes(adata, min_cells=100)

In [None]:
adata.obs

In [None]:
selected_fovs = [f"{i}" for i in [17, 18, 19, 13, 14, 15]]

## Spatial plot of cell types (with cell segmentations)

In [None]:
plot_focus = "cell_type"

axs_scatter = sq.pl.spatial_segment(
    adata,
    color=plot_focus,
    library_key="fov",
    library_id = selected_fovs,
    seg_cell_id="cell_ID", 
    seg_outline = True,
    # palette='tab20',
    img=False,
    colorbar=False,
    hspace=0, wspace=0,
    ncols=3,
    legend_loc=None,
    outline=False,
    axis_label = None,
    figsize=(7,4.5),
    # library_first=True,
    frameon=False,
    title=None,
    return_ax = True
)

# Get the current Matplotlib axes object
for ax in axs_scatter:    
    # Remove the title from the Matplotlib axes
    ax.set_title("")

# Adjust the spacing between subplots
plt.subplots_adjust(hspace=0.0001, wspace=0.0001)

# Optionally, tighten the layout
# plt.tight_layout()

# plt.savefig(os.path.join(sample_dir, 'generated_figures', f'whole_tissue_{plot_focus}.png'), dpi=200)
# Show or save the plot
plt.show()

In [None]:
adata

## Subset the AnnData object

In [None]:
adata = adata[adata.obs['fov'].isin(selected_fovs)]

## Spatial plot of cell types (2D scatter plot)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(14, 5))

# Create the scatter plot without grid lines
ax = sc.pl.scatter(
    adata,
    x='CenterX_global_px',
    y='CenterY_global_px',
    color='cell_type',
    show=False, 
    size=15,
    frameon=False, 
    ax=ax
    # legend_loc='none'
)

# Turn off x and y axes
ax.set_xticks([])
ax.set_yticks([])

plt.tight_layout()

plt.savefig(os.path.join(sample_dir, 'generated_figures', f'scatter_plot_celltypes.png'), dpi=200)

## Carry out Preprocessing

In [None]:
adata.layers["counts"] = adata.X.copy()

### Normalize the data based on the total reads

In [None]:
sc.pp.normalize_total(adata)

### Log normalize the dataset

In [None]:
sc.pp.log1p(adata)

### Find the highly variable genes

In [None]:
sc.pp.highly_variable_genes(adata)

In [None]:
sc.pl.highly_variable_genes(adata)

### Carry out PCA to reduce the dimensions of the dataset

In [None]:
sc.pp.pca(adata, svd_solver='arpack')

In [None]:
sc.pl.pca(adata, color='NDRG1')

In [None]:
sc.pl.pca(adata, color='COL1A1')

### Compute the neighborhood graph

In [None]:
sc.pp.neighbors(adata, n_neighbors=100)

In [None]:
adata

### Compute the UMAP

In [None]:
sc.tl.umap(adata)

### Cluster the UMAP

#### We can use any clustering algorithms here. Two of the most popular are Leiden and Louvain.

In [None]:
sc.tl.louvain(adata)

In [None]:
sc.tl.leiden(adata)

In [None]:
sc.pl.umap(
    adata,
    color=[
        "total_counts",
        "n_genes_by_counts",
        "leiden",
        "louvain",
    ],
    wspace=0.4,
)

## Plot the UMAP and paint each cell by the cell-type

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
dotsize = 10

sc.pl.umap(
    adata,
    color=[
        'cell_type'
    ], size=dotsize,
    wspace=0.4, ax=ax, palette='tab20'
)


## Plot T-cells in the UMAP

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
dotsize = 10
ax = sc.pl.umap(adata, color=['cell_type'], groups=["T CD4 memory", "T CD4 naive", "T CD8 memory", "T CD8 naive"], show=False, size=dotsize, ax=ax)

# We can change the 'NA' in the legend that represents all cells outside of the
# specified groups
legend_texts=ax.get_legend().get_texts()
# Find legend object whose text is "NA" and change it
for legend_text in legend_texts:
    if legend_text.get_text()=="NA":
        legend_text.set_text('other cell types')

## Plot gene expressions of tumor cells

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
dot_size=10
# Plot all cells as background
ax=sc.pl.umap(adata, show=False,s=dot_size, ax=ax)

# Plot ontop expression of a single cell group by subsetting adata
sc.pl.umap(adata[adata.obs.cell_type=='tumor',:],color='NDRG1', ax=ax, s=dot_size)

## Plot the leiden clusters of two FOVs

In [None]:
sq.pl.spatial_segment(
    adata,
    color='leiden',
    library_key="fov",
    library_id = selected_fovs,
    seg_cell_id="cell_ID", 
    seg_outline = True,
    # palette='tab20',
    img=False,
    colorbar=False,
    hspace=0, wspace=0,
    ncols=3,
    legend_loc=None,
    outline=False,
    axis_label = None,
    figsize=(7,4.5),
    # library_first=True,
    frameon=False,
    title='',
)

In [None]:
sq.pl.spatial_segment(
    adata,
    color='louvain',
    library_key="fov",
    library_id = selected_fovs,
    seg_cell_id="cell_ID", 
    seg_outline = True,
    # palette='tab20',
    img=False,
    colorbar=False,
    hspace=0.0001, wspace=0.0001,
    ncols=3,
    legend_loc=None,
    outline=False,
    axis_label = None,
    figsize=(7,4.5),
    # library_first=True,
    frameon=False,
    title='',
)

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15, 7))

for _ax in ax:
    _ax.set_facecolor('white')

selected_fov = 19
    # [17, 18, 19, 13, 14, 15]
    
sq.pl.spatial_segment(
    adata,
    shape="hex",
    color="leiden",
    library_key="fov",
    library_id=f"{selected_fov}",
    seg_cell_id="cell_ID",
    img=False,
    size=60,
    ax=ax[0],
)

sq.pl.spatial_segment(
    adata,
    shape="hex",
    color="louvain",
    library_key="fov",
    library_id=f"{selected_fov}",
    seg_cell_id="cell_ID",
    img=False,
    size=60,
    ax=ax[1],
)

sq.pl.spatial_segment(
    adata,
    shape="hex",
    color="cell_type",
    library_key="fov",
    library_id=f"{selected_fov}",
    seg_cell_id="cell_ID",
    img=False,
    size=60,
    ax=ax[2],
)

plt.tight_layout()

## Plot the spatial map of the PanCK staining intensity

In [None]:
sq.pl.spatial_segment(
    adata,
    color='Max.PanCK',
    library_key="fov",
    library_id = selected_fovs,
    seg_cell_id="cell_ID", 
    seg_outline = True,
    img=False,
    colorbar=False,
    hspace=0, wspace=0,
    ncols=3,
    legend_loc=None,
    outline=False,
    axis_label = None,
    figsize=(7,4.5),
    frameon=False,
    title='',
)

## Spatial Connectivity Analysis

In [None]:
adata

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 15))
sq.gr.spatial_neighbors(
    adata,
    n_neighs=15,
    coord_type="generic",
)
_, idx = adata.obsp["spatial_connectivities"][1111, :].nonzero()
idx = np.append(idx, 1111)
sq.pl.spatial_scatter(
    adata[idx, :],
    library_id="16",
    color="cell_type",
    connectivity_key="spatial_connectivities",
    size=3,
    edges_width=1,
    edges_color="black",
    img=False,
    title="K-nearest neighbors",
    ax=ax[0],
)

sq.gr.spatial_neighbors(
    adata,
    n_neighs=15,
    coord_type="generic",
    delaunay=True,
)
_, idx = adata.obsp["spatial_connectivities"][1111, :].nonzero()
idx = np.append(idx, 1111)
sq.pl.spatial_scatter(
    adata[idx, :],
    library_id="16",
    color="cell_type",
    connectivity_key="spatial_connectivities",
    size=3,
    edges_width=1,
    edges_color="black",
    img=False,
    title="Delaunay triangulation",
    ax=ax[1],
)

plt.tight_layout()

In [None]:
sq.gr.centrality_scores(adata, cluster_key="cell_type")

- closeness centrality - measure of how close the group is to other nodes.
- clustering coefficient - measure of the degree to which nodes cluster together.
- degree centrality - fraction of non-group members connected to group members.

In [None]:
sq.pl.centrality_scores(adata, cluster_key="cell_type", figsize=(15, 6))

## Spatial Co-Occurance

In [None]:
adataset = adata[adata.obs.fov.isin(["17", "18"])].copy()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(14, 5))

# Create the scatter plot without grid lines
ax = sc.pl.scatter(
    adataset,
    x='CenterX_global_px',
    y='CenterY_global_px',
    color='cell_type',
    show=False, 
    size=15,
    frameon=False, 
    ax=ax
    # legend_loc='none'
)

# Turn off x and y axes
ax.set_xticks([])
ax.set_yticks([])

plt.tight_layout()

In [None]:
sq.gr.co_occurrence(
    adataset,
    cluster_key="cell_type",
)

In [None]:
sq.pl.co_occurrence(
    adataset,
    cluster_key="cell_type",
    clusters='tumor', figsize=(15, 7), 
)

## Neighborhood Enrichment Analysis

In [None]:
sq.gr.nhood_enrichment(adata, cluster_key="cell_type")

In [None]:
sq.gr.nhood_enrichment(adataset, cluster_key="cell_type")

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(22, 22))
sq.pl.nhood_enrichment(
    adata,
    cluster_key="cell_type",
    figsize=(3, 3), vmin=12, vmax=-12, vcenter=0,
    ax=ax[0],
    title="Neighborhood enrichment adata",
)
sq.pl.nhood_enrichment(
    adataset,
    cluster_key="cell_type",
    figsize=(3, 3), vmin=12, vmax=-12, vcenter=0,
    ax=ax[1],
    title="Neighborhood enrichment adataset", cmap='RdBu_r'
)

plt.tight_layout()

## Spatial statistics

### Ripley's L

https://en.wikipedia.org/wiki/Spatial_descriptive_statistics

In [None]:
mode = "L"
fig, ax = plt.subplots(1, 2, figsize=(20, 6))

sq.gr.ripley(adataset, cluster_key="cell_type", mode=mode)
sq.pl.ripley(
    adataset,
    cluster_key="cell_type",
    mode=mode,
    ax=ax[0],
)

sq.pl.spatial_segment(
    adataset,
    shape="hex",
    color="cell_type",
    library_id=["17"],
    library_key="fov",
    seg_cell_id="cell_ID",
    img=False,
    size=60,
    ax=ax[1],
)

plt.tight_layout()

### Spatial Correlation Analysis of genes

In [None]:
sq.gr.spatial_neighbors(adataset, coord_type="generic", delaunay=True)
sq.gr.spatial_autocorr(
    adataset,
    mode="moran",
    n_perms=100,
    n_jobs=1,
)
adataset.uns["moranI"].head(10)

In [None]:
sq.gr.spatial_neighbors(adataset, coord_type="generic", delaunay=True)
sq.gr.spatial_autocorr(
    adataset,
    mode="moran",
    n_perms=100,
    n_jobs=1,
)
adataset.uns["moranI"].head(10)

In [None]:
adataset.uns["moranI"].index[:10]

In [None]:
sq.pl.spatial_segment(
    adataset,
    shape="hex",
     color=["NDRG1", "HSP90AB1", "SOX4"],
    library_id=["17"],
    library_key="fov",
    seg_cell_id="cell_ID", 
    palette=None,
    legend_loc=None,
    img=False,
    ncols=4,
    size=40,)

plt.tight_layout()

In [None]:
sq.pl.spatial_segment(
    adataset,
    shape="hex",
    color=["NDRG1"],
    library_id=['17','18'],
    library_key="fov",
    seg_cell_id="cell_ID", 
    palette=None,
    legend_loc=None,
    img=False,
    ncols=2,
    size=40, wspace=0, hspace=0.3,
    figsize=(12, 8))

plt.tight_layout()

In [None]:
sq.pl.spatial_segment(
    adataset,
    shape="hex",
    color=["HSP90AB1"],
    library_id=['17','18'],
    library_key="fov",
    seg_cell_id="cell_ID", 
    palette=None,
    legend_loc=None,
    img=False,
    ncols=2,
    size=40, wspace=0, hspace=0.3,
    figsize=(12, 8))

plt.tight_layout()

In [None]:
sq.pl.spatial_segment(
    adataset,
    shape="hex",
    color="cell_type",
    library_id=["18"],
    library_key="fov",
    seg_cell_id="cell_ID",
    img=False, figsize=(8, 3)
    # size=60,
)

In [None]:
sq.pl.spatial_segment(
    adataset,
    shape="hex",
    color=['NDRG1', 'COL1A1', 'COL1A2', 'COL3A1', 'SOX4', 'SLPI', 'MMP1'],
    library_id=["18"],
    library_key="fov",
    seg_cell_id="cell_ID", 
    palette=None,
    legend_loc=None,
    img=False,
    ncols=3,
    size=40, wspace=0, hspace=0.3,
    figsize=(12, 8))

plt.tight_layout()