# Hartmann-2021 dataset
[Hartmann, FJ et al. (2021). Single-cell metabolic profiling of human cytotoxic T cells. Nat Biotechnol 39, 186–197](https://www.nature.com/articles/s41587-020-0651-8)

[Download link](https://zenodo.org/record/3951613)

Focused on MIBI-TOF multiplexed images and segmented single-cell data of colorectal carcinoma and healthy adjacent colon tissue.

- MIBI-TOF images have undergone noise removal as described in Keren et al. (2018)
- Cell Segmentation masks for MIBI-TOF data contain large non-cellular regions that need to be removed during downstream processing (Not corrected in this script)
- MIBI-TOF derived single-cell data is cell size normalized, arcsinh transformed and percentile normalized and contains manually annotated FlowSOM clustering results
- Image properties:
  - 1024 x 1024 pixels
  - 400 µm x 400 µm
  - 1100 ± 524 mean spots per image
  - 1 cell per row


This script takes some inspiration from the scanpy and squidpy tutorials.

# User input

In [None]:
# Healthy sample image
# Will come in handy later and can be changed post-hoc
point_healthy = 49
Point_healthy = "Point49"

# Cancer sample image
# Will come in handy later and can be changed post-hoc
point_cancer = 23
Point_cancer = "Point23"

# Optional
lineage_markers = ['vimentin', 'SMA', 'CD45', 'CD4', 'CD14', 'Ecad', 'CD3', 'CD8', 'CD31', 'CK', 'CD11c', 'CD68']
custom_markers = ['CD45', 'CK', 'vimentin']
scale_factor = 400/1024 # Can be used for acccurate scalebar insertion in spatial plots. Scale factor must be equal between spatial points (`adata.obsm'spatial`) and image (adata.uns['spatial']['PointX']['scalefactors']['tissue_hires_scalef'])
# colours = ['#1E77B3', '#E2191B', '#B15827', '#7F7F7F', '#98BAE2', '#FCBF6E', '#329F2B', '#DD73EF']

# ccmap = ListedColormap(colours)

# Replace user_input_channels from `hartmann_data-preparation`
# user_input_channels_names = ['145_CD45_RGB-0', '174_CK_RGB-1', '113_vimentin_RGB-2']

# ccmap

# Load libraries

In [None]:
#mpl.rcParams['savefig.dpi'] = 100 
# Keep modules updated if they change within the session
%load_ext autoreload
%autoreload 2
%matplotlib inline

# Load modules
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
import matplotlib.colors as mplc
import squidpy as sq
import anndata as ad
import scanpy as sc
import scipy as sp

# Settings
import warnings
warnings.filterwarnings('ignore')
mpl.rcParams['savefig.dpi'] = 600

# Print versions
sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

# Import prepped H5AD file

In [None]:
adata = ad.read_h5ad('../output-data/hartmann/hartmann.h5ad')
adata_light = ad.read_h5ad('../output-data/hartmann/hartmann_light.h5ad')

# Add a single image in adata_light to make sq.pl.spatial_scatter work
# CANCER
adata_light.uns['spatial'] = {}
adata_light.uns['spatial'][Point_cancer] = {}
adata_light.uns['spatial'][Point_cancer]['images'] = {}
adata_light.uns['spatial'][Point_cancer]['images']['hires'] = {}
adata_light.uns['spatial'][Point_cancer]['images']['hires'] = np.zeros([1024,1024,3], dtype='uint8')

# Add a single image in adata_light to make sq.pl.spatial_scatter work
# HEALTHY
adata_light.uns['spatial'] = {}
adata_light.uns['spatial'][Point_healthy] = {}
adata_light.uns['spatial'][Point_healthy]['images'] = {}
adata_light.uns['spatial'][Point_healthy]['images']['hires'] = {}
adata_light.uns['spatial'][Point_healthy]['images']['hires'] = np.zeros([1024,1024,3], dtype='uint8')

In [None]:

# Data subsets
adata_cancer = adata_light[adata_light.obs['diagnosis'] == 'Colorectal carcinoma'].copy()
# adata_tib= adata_light[adata_light.obs['tumour_immune_border'] == True].copy()
adata_healthy = adata_light[adata_light.obs['diagnosis'] == 'Healthy', :].copy()

# Subsets of a single representing sample
adata_pCancer= adata_light[adata_light.obs['point'] == point_cancer].copy()
adata_pHealthy= adata_light[adata_light.obs['point'] == point_healthy].copy()

# Free up unused space
# del adata_light

print(
    adata, 
    adata_cancer, 
    # adata_tib,
    adata_healthy, 
    adata_pCancer,
    adata_pHealthy,
    sep="\n\n"
)


In [None]:
# Get existing user-input channels

# del user_input_channels

# Get RGB channels names
if 'user_input_channels_names' in globals():

    user_input_channels = user_input_channels_names

else:

    user_input_channels_names = [rgb_channel for rgb_channel in adata.uns['spatial']['Point1']['channel_keys'].keys() if "_RGB-" in rgb_channel]



user_input_channels = [adata.uns['spatial']['Point1']['channel_keys'][ch] for ch in user_input_channels_names]

print(f'`user_input_channels_names`: \t {user_input_channels_names}')
print(f'`user_input_channels`: \t\t {user_input_channels}')

# Samples

### Segmentation images

In [None]:
# Chose 4 pseudo-arbitrary samples to show

sample_dict = {
    '90de': ['Point8', 'Point16', 'Point9', 'Point13'], # Patient 90de - colorectal carcinoma - Points 8 and 16 have a tumour-immune border per Hartmann-2021 and Points 9 and 13 don't
    '21d7': ['Point23', 'Point36', 'Point25', 'Point28'], # Patient 21d7 - colorectal carcinoma - Points 23 and 36 have a tumour-immune border per Hartmann-2021 and Points 25 and 28 don't

    'd3d3': ['Point' + str(i) for i in range(41, 45)], # Patient d3d3 - healthy colon
    '99c0': ['Point48', 'Point49', 'Point52', 'Point57'] # Patient 99c0 - healthy colon
}

In [None]:
for donor in sample_dict.keys():

    sq.pl.spatial_segment(

        adata, 
        library_key='library_id', 
        seg_cell_id='cell_id', 
        color='Cluster', 
        library_id=sample_dict[donor],
    
        # Aesthetics
        img=False,
        outline=True,
        # seg_outline=True,
        scalebar_dx=scale_factor,
        scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
        ncols=2,
        # palette=[]
    
        # Format
        figsize=[5,5],
        dpi=300,
        legend_fontsize=10,
        wspace= 0.25,
        hspace=0.1,
        legend_na=True,
    
        # Labels
        title=[point for point in sample_dict[donor]],
        axis_label=["", ""],
    
        save="../../figures/hartmann/spatial_segment_" + donor + ".png"
    
    )
    plt.show()

In [None]:
# For individual sample exploration

# sq.pl.spatial_segment(
#     adata, 
#     img=False,
#     library_key='library_id', 
#     seg_cell_id='cell_id', 
#     color='Cluster', 
#     # library_id=['Point' + str(i) for i in range(1, 21)], # Patient 90de - colorectal carcinoma
#     library_id=sample_dict['90de'],
#     # library_id=['Point' + str(i) for i in range(21, 41)], # Patient 21d7 - colorectal carcinoma
#     # library_id=sample_dict['21d7'],
#     # library_id=['Point' + str(i) for i in range(41, 45)],# Patient d3d3 - healthy colon
#     # library_id=['Point' + str(i) for i in range(45, 59)], # Patient 99c0 - healthy colon
#     # library_id=sample_dict['99c0'],
#     seg_outline=True,
#     img_channel=0,
#     # img_cmap='magma',
#     legend=False
# )

### MIBI images

In [None]:
# CANCER

channels = sorted(adata.uns['spatial'][Point_cancer]['channel_keys'].keys(), key=lambda a: a[4:])
channels = [i for i in channels if 'RGB' not in i]

ncols = 5
tot = len(channels)
nrows = tot // ncols + 1

fig, axs = plt.subplots(nrows, ncols, figsize=(20,40))

counter = 0

for channel in channels:

    ch = adata.uns['spatial'][Point_cancer]['channel_keys'][channel]

    if counter == tot - 1:
        
        sf = scale_factor
        skw = {'scale_loc': 'bottom', 'location': 'lower right'}

    else:

        sf = None
        skw = {}

    
    sq.pl.spatial_scatter(

        adata,
        library_key='library_id',

        library_id=Point_cancer,

        color=None,
        img_channel=int(ch),
        img_cmap='Greys_r',

        # Plot elements
        scalebar_dx=sf,
        scalebar_kwargs=skw,

        wspace= 0.00,
        hspace=0.00,
        legend_na=True,

        # Labels
        title=channel[4:],
        axis_label=["", ""],

        ax=axs[counter // ncols, counter % ncols]

    )

    axs[counter // ncols, counter % ncols].set_title(channel[4:], fontsize=20)
    

    counter += 1


# Remove empty axes
for e in range(1, ncols - (tot % ncols) + 1):

    axs[-1, -e].axis('off')


# Colour bar
cmap = plt.get_cmap('Greys_r')
norm = mpl.colors.Normalize(vmin=0, vmax=1)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
plt.colorbar(sm, ax=axs.ravel().tolist()[-4], anchor=(-8.2,0.5), label='Signal', shrink=0.45)

# Subplot spacing
plt.subplots_adjust(wspace=0.02, hspace=-0.5)

plt.savefig("../figures/hartmann/cancer_sample_mibi_image.jpg")

plt.show()

In [None]:
# HEALTHY

channels = sorted(adata.uns['spatial'][Point_healthy]['channel_keys'].keys(), key=lambda a: a[4:])
channels = [i for i in channels if 'RGB' not in i]

ncols = 5
tot = len(channels)
nrows = tot // ncols + 1

fig, axs = plt.subplots(nrows, ncols, figsize=(20,40))

counter = 0

for channel in channels:

    ch = adata.uns['spatial'][Point_cancer]['channel_keys'][channel]

    if counter == tot - 1:
        
        sf = scale_factor
        skw = {'scale_loc': 'bottom', 'location': 'lower right'}

    else:

        sf = None
        skw = {}

    
    sq.pl.spatial_scatter(

        adata,
        library_key='library_id',

        library_id=Point_cancer,

        color=None,
        img_channel=int(ch),
        img_cmap='Greys_r',

        # Plot elements
        scalebar_dx=sf,
        scalebar_kwargs=skw,

        wspace= 0.00,
        hspace=0.00,
        legend_na=True,

        # Labels
        title=channel[4:],
        axis_label=["", ""],

        ax=axs[counter // ncols, counter % ncols]

    )

    axs[counter // ncols, counter % ncols].set_title(channel[4:], fontsize=20)
    

    counter += 1


# Remove empty axes
for e in range(1, ncols - (tot % ncols) + 1):

    axs[-1, -e].axis('off')


# Colour bar
cmap = plt.get_cmap('Greys_r')
norm = mpl.colors.Normalize(vmin=0, vmax=1)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
plt.colorbar(sm, ax=axs.ravel().tolist()[-4], anchor=(-8.2,0.5), label='Signal', shrink=0.45)

# Subplot spacing
plt.subplots_adjust(wspace=0.02, hspace=-0.3)

plt.savefig("../figures/hartmann/healthy_sample_mibi_image.jpg")

plt.show()

### PCA & UMAP by sample

In [None]:
# PCA single cancer sample
sc.pp.pca(adata_pCancer)
sc.pl.pca(adata_pCancer, color=['Cluster'])

# UMAP single cancer sample
sc.pp.neighbors(adata_pCancer, n_pcs=10)#, n_neighbors=3)
sc.tl.umap(adata_pCancer)
sc.pl.umap(adata_pCancer, color=['Cluster'])

In [None]:
# PCA single cancer sample
sc.pp.pca(adata_pHealthy)
sc.pl.pca(adata_pHealthy, color=['Cluster'])

# UMAP single cancer sample
sc.pp.neighbors(adata_pHealthy, n_pcs=10)#, n_neighbors=20)
sc.tl.umap(adata_pHealthy, spread=3)#, min_dist=3)#maxiter=80)
sc.pl.umap(adata_pHealthy, color=['Cluster'])

# Cell-type distributions

### By sample

In [None]:
df_raw = adata.obs[['library_id', 'Cluster']].groupby(['library_id', 'Cluster']).size().unstack() #'cell_id' is an arbitrary column as the point is to count the rows
df = df_raw.div(df_raw.sum(axis=1), axis=0)

In [None]:
fig, ax = plt.subplots(figsize=[18,5])

df.plot(
    kind='bar', 
    stacked=True, 
    ax=ax,
# color=["orange", "cyan"])
# colormap='viridis',
)

ax.set_title("Cell-type distribution per sample", fontsize=20, y=1.05, x=0.55)
ax.set_xlabel(None)
ax.set_xticklabels([str(i) for i in range(1, 59)], rotation=90, fontsize=13)
ax.set_yticks(np.linspace(0,1,5))
ax.set_yticklabels(np.linspace(0,1,5), fontsize=16)
lgd = ax.legend(bbox_to_anchor=(0,0,1.2,1), loc='right', fontsize=16)

fig.savefig('../figures/hartmann/cell-type_distribution_by_samples.png', bbox_extra_artists=(lgd,), bbox_inches='tight')

### Across all samples

In [None]:
ct_data = np.unique(adata.obs['Cluster'], return_counts=True)
ct = ct_data[0]
pct = ct_data[1]# / sum(ct_data[1])

plt.pie(pct, labels=ct, autopct='%1.1f%%', startangle=90, labeldistance=1.15, pctdistance=0.75, textprops={'fontsize':14})
plt.title("") #'Cell-type distribution across all samples and donors')
plt.axis('equal')
plt.savefig("../figures/hartmann/cell-type_distribution_all.png")


### By tumour-immune border

In [None]:
df = adata_light.obs[['Cluster', 'diagnosis', 'tumour_immune_border']]
df['tumour_immune_border'] = np.where(df['tumour_immune_border'], 'tumour imm border', 'no tumour imm border')
df = df.groupby(['Cluster', 'diagnosis', 'tumour_immune_border']).size().unstack(level=['diagnosis', 'tumour_immune_border'], fill_value=np.nan)

df.columns=df.columns.map(' - '.join)
df = df.loc[:, df.columns!='Healthy - tumour imm border']
df['Healthy'] = df['Healthy - no tumour imm border']
df = df.drop('Healthy - no tumour imm border', axis=1)

In [None]:
colours = ['#C1C1C1', '#6A6868', '#000000']
ccmap2 = mplc.ListedColormap(colours)


fig, ax = plt.subplots(figsize=[7,4])

df.plot(kind='bar', ax=ax, colormap=ccmap2)

ax.set_title("") #("Cell-type distribution by diagnosis", fontsize=20, y=1.05, x=0.48)
ax.set_xlabel(None)
ax.set_xticklabels([item.get_text() for item in ax.get_xticklabels()], fontsize=13)
ax.set_yticklabels([item.get_text() for item in ax.get_yticklabels()], fontsize=16)
ax.legend(loc='upper center', framealpha=0.3, fontsize=13)
# lgd = ax.legend(bbox_to_anchor=(0,0,1.9,1), loc='right', fontsize=12)

fig.savefig('../figures/hartmann/cell-type_distribution_by_diagnosis.png', bbox_extra_artists=(lgd,), bbox_inches='tight')

# Global exploration

In [None]:
sc.set_figure_params(fontsize=16, dpi=150, dpi_save=300, format='jpg')

### Gene expression of pooled single-cell data by cell-type 

In [None]:
fig = sc.pl.matrixplot(
    
    adata, 
    sorted(adata.var_names), 
    groupby='Cluster', 
    
    cmap='magma',

    swap_axes=True,
    return_fig=True
)


fig.savefig('../figures/hartmann/feature_expression_by_cell-type_heatmap.png', bbox_inches="tight")

### Additional complementary plots

In [None]:
# sc.pl.stacked_violin(adata, adata.var_names, groupby='Cluster', rotation=90);

In [None]:
# fig, ax = plt.subplots(figsize=[13,4])

# sc.pl.dotplot(

#     adata, 
#     var_names=sorted(adata.var_names), 
#     groupby='Cluster',

#     cmap='Reds',

#     ax=ax,

# )

# ax.set_title(None)

# fig.savefig('../figures/hartmann/feature_expression_by_cell-type_dotplot.png', bbox_inches="tight")

### PCA & UMAP

In [None]:
# PCA
sc.pp.pca(adata)

fig = sc.pl.pca(adata, color='Cluster', title="", size=10, return_fig=True)
fig.savefig("../figures/hartmann/all-samples_PCA_cell-type.png")
plt.show()

fig = sc.pl.pca(adata, color='library_id', title="", size=10, return_fig=True)
fig.savefig("../figures/hartmann/all-samples_PCA_sample-image.png")
plt.show()

fig = sc.pl.pca(adata, color='donor', title="", size=10, return_fig=True)
fig.savefig("../figures/hartmann/all-samples_PCA_donor.png")
plt.show()

fig = sc.pl.pca(adata, color='diagnosis', title="", save="../figures/hartmann/all-images_PCA_condition", size =10, return_fig=True)
fig.savefig("../figures/hartmann/all-samples_PCA_condition.png")
plt.show()

In [None]:
sc.pl.pca_variance_ratio(adata)
sc.pl.pca_loadings(adata, components=[1,2,3])

PCA by all markers

In [None]:
# sc.pl.pca(
#     adata, 
#     color=[i for i in sorted(adata.var_names)]
# )

In [None]:
# UMAP
sc.pp.neighbors(adata, n_pcs=10)
sc.tl.umap(adata)

sc.pl.umap(adata, color=['Cluster'])
sc.pl.umap(adata, color=['library_id'])
sc.pl.umap(adata, color=['donor'])
sc.pl.umap(adata, color=['diagnosis'])

## Major immune lineage marker recovery

### Feature expression

In [None]:
fig, ax = plt.subplots(figsize=[6,4])

sc.pl.matrixplot(
    
    adata, 
    lineage_markers, 
    groupby='Cluster', 

    cmap='magma',

    ax=ax

)

ax.set_title(None)

fig.savefig('../figures/hartmann/feature_expression_by_cell-type_lineage-markers_heatmap.png', bbox_inches="tight")

### UMAP

In [None]:
sc.set_figure_params(fontsize=16)

sc.pl.umap(
    adata,
    color=sorted(lineage_markers),
    cmap='magma',
    ncols=3,
    hspace=0.23,
    wspace=0.27,
)

## HVGs 

In [None]:
sc.pp.highly_variable_genes(adata, min_disp=0.5) #, min_mean=0.0125, max_mean=3
highly_vargs = list(adata.var_names[adata.var.highly_variable])

highly_vargs

In [None]:
hvgs_non_lin = set(highly_vargs).difference(set(lineage_markers))
hvgs_non_lin

In [None]:
sc.pl.umap(
    adata,
    color=sorted(hvgs_non_lin),
    cmap='magma',
    ncols=3,
    hspace=0.18,
    wspace=0.27
)

# Specific exploration: cancer vs non-cancer

### Removed dimentionality reduction on different levels

In [None]:
# # PCA cancer
# sc.pp.pca(adata_cancer)
# sc.pl.pca(adata_cancer, color=['Cluster', 'library_id', 'donor', 'diagnosis'], wspace=0.6)

# # UMAP cancer
# sc.pp.neighbors(adata_cancer)
# sc.tl.umap(adata_cancer)
# sc.pl.umap(adata_cancer, color=['Cluster', 'library_id', 'donor', 'diagnosis'], wspace=0.6)

In [None]:
# # PCA tib
# sc.pp.pca(adata_tib)
# sc.pl.pca(adata_tib, color=['Cluster', 'library_id', 'donor', 'diagnosis'], wspace=0.6)

# # UMAP tib
# sc.pp.neighbors(adata_tib)
# sc.tl.umap(adata_tib)
# sc.pl.umap(adata_tib, color=['Cluster', 'library_id', 'donor', 'diagnosis'], wspace=0.6)

In [None]:
# # PCA healthy
# sc.pp.pca(adata_healthy)
# sc.pl.pca(adata_healthy, color=['Cluster', 'library_id', 'donor', 'diagnosis'], wspace=0.6)

# # UMAP healthy
# sc.pp.neighbors(adata_healthy)
# sc.tl.umap(adata_healthy)
# sc.pl.umap(adata_healthy, color=['Cluster', 'library_id', 'donor', 'diagnosis'], wspace=0.6)

### Segmentation by cell-type

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(10,20))

figs = [Point_cancer, Point_healthy]

for count, point in enumerate(figs):

    sq.pl.spatial_segment(

        adata,
        library_key='library_id',
        seg_cell_id='cell_id', 
        library_id=point,

        color='Cluster',

       # Aesthetics
        img=False,
        outline=True,
        scalebar_dx=scale_factor,
        scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},

        # Format
        legend_fontsize=15,
        wspace= 0.25,
        hspace=0.1,
        legend_na=True,

        # Labels
        # title="Segmentation mask", #[point],
        axis_label=["", ""],

        ax=axs[count]

    )

    axs[count].set_title("Segmentation mask", y=1.05, fontdict={'fontsize': 25})

plt.subplots_adjust(
    bottom=0.1,
    left=0.1,
    top=0.9,
    right=0.9,
    wspace=0.05,
    hspace=0.5
)


plt.show()

### Lineage marker recovery

In [None]:
# CANCER
markers = sorted(lineage_markers)

ncols = 4
tot = len(markers)
nrows = tot // ncols

fig, axs = plt.subplots(nrows, ncols, figsize=(40,20))

counter = 0

for marker in markers:

    sq.pl.spatial_segment(

        adata,
        color=marker,
        library_key='library_id',
        library_id=Point_cancer,
        seg_cell_id='cell_id',

        # Aesthetics
        img=False,
        outline=True,
        # seg_outline=True,
        scalebar_dx=scale_factor,
        scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
        # ncols=4,
        cmap='viridis',

        # Format
        # figsize=[5,5],
        # dpi=300,
        legend_fontsize=10,
        legend_na=True,

        # Labels
        # title= [Point_cancer],
        axis_label=["", ""],

        ax=axs[counter // ncols, counter % ncols]

    )

    axs[counter // ncols, counter % ncols].set_title(marker, fontsize=40)
    

    counter += 1


# Subplot spacing
plt.subplots_adjust(wspace=-0.5, hspace=0.15)

In [None]:
# HEALTHY
markers = sorted(lineage_markers)

ncols = 4
tot = len(markers)
nrows = tot // ncols

fig, axs = plt.subplots(nrows, ncols, figsize=(40,20))

counter = 0

for marker in markers:

    sq.pl.spatial_segment(

        adata,
        color=marker,
        library_key='library_id',
        library_id=Point_healthy,
        seg_cell_id='cell_id',

        # Aesthetics
        img=False,
        outline=True,
        # seg_outline=True,
        scalebar_dx=scale_factor,
        scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
        # ncols=4,
        cmap='viridis',

        # Format
        # figsize=[5,5],
        # dpi=300,
        legend_fontsize=10,
        legend_na=True,

        # Labels
        # title= [Point_cancer],
        axis_label=["", ""],

        ax=axs[counter // ncols, counter % ncols]

    )

    axs[counter // ncols, counter % ncols].set_title(marker, fontsize=40)
    

    counter += 1


# Subplot spacing
plt.subplots_adjust(wspace=-0.5, hspace=0.15)

## PCA & UMAP

In [None]:
sc.set_figure_params(fontsize=16, dpi=150, dpi_save=300, format='jpg')

In [None]:
# CANCER
sc.pp.pca(adata_pCancer)
fig = sc.pl.pca(adata_pCancer, color='Cluster', title="", size=30, return_fig=True)

In [None]:

sc.pp.neighbors(adata_pCancer, n_pcs=10)
sc.tl.umap(adata_pCancer)
sc.pl.umap(adata_pCancer, color='Cluster', size=30, title="")

In [None]:
# HEALTHY
sc.pp.pca(adata_pHealthy)
fig = sc.pl.pca(adata_pHealthy, color='Cluster', title="", size=30, return_fig=True)

In [None]:

sc.pp.neighbors(adata_pHealthy, n_pcs=10)
sc.tl.umap(adata_pHealthy)
sc.pl.umap(adata_pHealthy, color='Cluster', size=30, title="")

## UMAP marker expression

In [None]:
# CANCER
sc.pp.neighbors(adata_pCancer, n_pcs=10)
sc.tl.umap(adata_pCancer)
sc.pl.umap(adata_pCancer, color=lineage_markers)

In [None]:
# HEALTHY

sc.pl.umap(adata_pHealthy, color=lineage_markers)

## Spatial marker expression

In [None]:
# CANCER

sq.pl.spatial_segment(

    adata,
    color=lineage_markers+['Cluster'],
    library_key='library_id',
    library_id=Point_cancer,
    seg_cell_id='cell_id',
    
    # Aesthetics
    img=False,
    outline=True,
    # seg_outline=True,
    scalebar_dx=scale_factor,
    scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
    ncols=4,
    cmap='viridis',
    # palette=[]

    # Format
    figsize=[5,5],
    dpi=300,
    legend_fontsize=10,
    wspace= 0.25,
    hspace=0.1,
    legend_na=True,
    
    # Labels
    # title= [Point_cancer],
    axis_label=["", ""],
    
    save="../../figures/hartmann/spatial_segment_" + "point-cancer_lineage_markers" + ".png" 

)

In [None]:
# HEALTHY

sq.pl.spatial_segment(

    adata,
    color=lineage_markers+['Cluster'],
    library_key='library_id',
    library_id=Point_healthy,
    seg_cell_id='cell_id',
    
    # Aesthetics
    img=False,
    outline=True,
    # seg_outline=True,
    scalebar_dx=scale_factor,
    scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
    ncols=4,
    cmap='viridis',
    # palette=[]

    # Format
    figsize=[5,5],
    dpi=300,
    legend_fontsize=10,
    wspace= 0.25,
    hspace=0.1,
    legend_na=True,
    
    # Labels
    # title= [Point_healthy],
    axis_label=["", ""],
    
    save="../../figures/hartmann/spatial_segment_" + "point-healthy" + ".png" 

)

# Highly variable genes 

In [None]:
## All samples
sc.pp.highly_variable_genes(adata, min_disp=0.5) #, min_mean=0.0125, max_mean=3

In [None]:
## Sample subsets
sc.pp.highly_variable_genes(adata_cancer, min_disp=0.5) #, min_mean=0.0125, max_mean=3
sc.pp.highly_variable_genes(adata_healthy, min_disp=0.5) #, min_mean=0.0125, max_mean=3

In [None]:
# Highly variable genes list dataframe
hvg_dict = {

    'global': adata.var.highly_variable,
    'cancer': adata_cancer.var.highly_variable,
    'healthy': adata_healthy.var.highly_variable

}

hvg_df = pd.DataFrame(hvg_dict)
hvg_df.T

In [None]:
mpl.rcParams['axes.grid'] = False
fig, ax = plt.subplots(figsize=(15, 5))

fig = ax.imshow(hvg_df.T, cmap='viridis')
ax.set_yticks([i for i in range(0, len(hvg_df.columns.values))], labels=hvg_df.columns.values)
ax.set_xticks([i for i in range(0, len(hvg_df.index.values))], labels=hvg_df.index.values, rotation=90)
ax.set_title("Highly variable genes in different sample subsets", y=1.1, fontsize=18)

plt.show()

In [None]:
from matplotlib_venn import venn3
venn3(

    subsets=[
        set(adata.var_names[adata.var.highly_variable]),
        set(adata_cancer.var_names[adata_cancer.var.highly_variable]),
        set(adata_healthy.var_names[adata_healthy.var.highly_variable])
    ],

    set_labels=('Global', 'Cancer', 'Healthy')
    
)

In [None]:
hvgs = list(hvg_df.index[hvg_df['global'] | hvg_df['cancer'] | hvg_df['healthy']])
hvg_set = set(hvgs)

hvgs

## HVGs not in lineage markers

In [None]:
lineage_markers_set = set(lineage_markers)
hvg_cancer_set = set(adata_cancer.var_names[adata_cancer.var.highly_variable])
hvg_healthy_set = set(adata_healthy.var_names[adata_healthy.var.highly_variable])

In [None]:
hvg_cancer_diff = sorted(list(hvg_cancer_set.difference(lineage_markers_set)))
hvg_healthy_diff = sorted(list(hvg_healthy_set.difference(lineage_markers_set)))

In [None]:
# # CANCER
# # First run on all cancer hvgs, then selected interesting ones

# sq.pl.spatial_segment(

#     adata,
#     color=hvg_cancer_diff+['Cluster'],
#     library_key='library_id',
#     library_id=Point_cancer,
#     seg_cell_id='cell_id',
    
#     # Aesthetics
#     img=False,
#     outline=True,
#     # seg_outline=True,
#     scalebar_dx=scale_factor,
#     scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
#     ncols=4,
#     cmap='viridis',
#     # palette=[]

#     # Format
#     figsize=[5,5],
#     dpi=300,
#     legend_fontsize=10,
#     wspace= 0.25,
#     hspace=0.1,
#     legend_na=True,
    
#     # Labels
#     # title= [Point_healthy],
#     axis_label=["", ""],
    
#     save="../../figures/hartmann/spatial_segment_" + "point-cancer_hvg_diff" + ".png" 

# )

In [None]:
sc.set_figure_params(fontsize=20, dpi=150, dpi_save=300, format='jpg')

sq.pl.spatial_segment(

    adata,
    color=['CD98', 'Ki67', 'NaKATPase'],
    library_key='library_id',
    library_id=Point_cancer,
    seg_cell_id='cell_id',

    # Aesthetics
    img=False,
    outline=True,
    scalebar_dx=scale_factor,
    scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
    cmap='viridis',

    # Format
    # figsize=[5,5],
    # dpi=300,
    # legend_fontsize=10,
    legend_na=True,

    # Labels
    # title= [Point_cancer],
    axis_label=["", ""],

    # ax=axs[counter]

)

In [None]:
# # HEALTHY
# # First run on all cancer hvgs, then selected interesting ones

# sq.pl.spatial_segment(

#     adata,
#     color=hvg_cancer_diff+['Cluster'],
#     library_key='library_id',
#     library_id=Point_healthy,
#     seg_cell_id='cell_id',
    
#     # Aesthetics
#     img=False,
#     outline=True,
#     # seg_outline=True,
#     scalebar_dx=scale_factor,
#     scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
#     ncols=4,
#     cmap='viridis',
#     # palette=[]

#     # Format
#     figsize=[5,5],
#     dpi=300,
#     legend_fontsize=10,
#     wspace= 0.25,
#     hspace=0.1,
#     legend_na=True,
    
#     # Labels
#     # title= [Point_healthy],
#     axis_label=["", ""],
    
#     save="../../figures/hartmann/spatial_segment_" + "point-healthy_hvg_diff" + ".png" 

# )

In [None]:
# HEALTHY
# First run on all cancer hvgs, then selected interesting ones

sq.pl.spatial_segment(

    adata,
    color=['CD98', 'Ki67', 'LDHA', 'NaKATPase'], #hvg_cancer_diff+['Cluster'],
    library_key='library_id',
    library_id=Point_healthy,
    seg_cell_id='cell_id',
    
    # Aesthetics
    img=False,
    outline=True,
    # seg_outline=True,
    scalebar_dx=scale_factor,
    scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
    ncols=4,
    cmap='viridis',
    # palette=[]

    # Format
    legend_fontsize=10,
    wspace= 0.25,
    hspace=0.1,
    legend_na=True,
    
    # Labels
    # title= [Point_healthy],
    axis_label=["", ""],
    
)

# Spatial pattern analysis

### Compute spatial neighbours graph
! spatial_scatter(connectivity_key=True) requires adata to only have one image saved!

In [None]:
sq.gr.spatial_neighbors(adata, radius=35, coord_type='generic')
sq.gr.spatial_neighbors(adata_pCancer, radius=35, coord_type='generic')
sq.gr.spatial_neighbors(adata_pHealthy, radius=35, coord_type='generic')

In [None]:
# CANCER

sq.pl.spatial_scatter(
    adata_pCancer,
    img=None,
    shape=None,
    color='Cluster',
    library_key='library_id',
    connectivity_key='spatial_connectivities',
    edges_width=0.5,
    size=[30],
    scalebar_dx=400/1024,
    scalebar_kwargs={"scale_loc": "bottom", "location": "lower right"},
    axis_label=["", ""],
    title="",
    figsize=(8, 8)
)

sq.pl.spatial_scatter(
    adata_pCancer,
    img=None,
    shape=None,
    color='Cluster',
    library_key='library_id',
    connectivity_key='spatial_connectivities',
    edges_width=0.5,
    size=[180],
    scalebar_dx=400/1024,
    scalebar_kwargs={"scale_loc": "bottom", "location": "lower right"},
    crop_coord=[(50, 0, 500, 450)],
    axis_label=["", ""],
    title="",
    figsize=(8, 8)
)

In [None]:
# HEALTHY

sq.pl.spatial_scatter(
    adata_pHealthy,
    img=None,
    shape=None,
    color='Cluster',
    library_key='library_id',
    connectivity_key='spatial_connectivities',
    edges_width=0.5,
    size=[30],
    scalebar_dx=400/1024,
    scalebar_kwargs={"scale_loc": "bottom", "location": "lower right"},
    axis_label=["", ""],
    title="",
    figsize=(8, 8)
)

sq.pl.spatial_scatter(
    adata_pHealthy,
    img=None,
    shape=None,
    color='Cluster',
    library_key='library_id',
    connectivity_key='spatial_connectivities',
    edges_width=0.5,
    size=[180],
    scalebar_dx=400/1024,
    scalebar_kwargs={"scale_loc": "bottom", "location": "lower right"},
    crop_coord=[(50, 0, 500, 450)],
    axis_label=["", ""],
    title="",
    figsize=(8, 8)
)

## Annotation/Cell-type based statistics

### Interaction matrix

In [None]:
sq.gr.interaction_matrix(adata_pCancer, cluster_key='Cluster')
sq.pl.interaction_matrix(adata_pCancer, cluster_key='Cluster')

sq.gr.interaction_matrix(adata_pCancer, cluster_key='Cluster', normalized=True)
sq.pl.interaction_matrix(adata_pCancer, cluster_key='Cluster')

In [None]:
sq.gr.interaction_matrix(adata_pHealthy, cluster_key='Cluster')
sq.pl.interaction_matrix(adata_pHealthy, cluster_key='Cluster')

sq.gr.interaction_matrix(adata_pHealthy, cluster_key='Cluster', normalized=True)
sq.pl.interaction_matrix(adata_pHealthy, cluster_key='Cluster')

### Neighbours enrichment analysis

In [None]:
# CANCER
sq.gr.nhood_enrichment(adata_pCancer, cluster_key='Cluster')
sq.pl.nhood_enrichment(adata_pCancer, cluster_key='Cluster')

# HEALTHY
sq.gr.nhood_enrichment(adata_pHealthy, cluster_key='Cluster')
sq.pl.nhood_enrichment(adata_pHealthy, cluster_key='Cluster')

### Co-occurence probability

In [None]:
sq.pl.spatial_segment(adata, color='Cluster', library_key='library_id', seg_cell_id='cell_id', library_id=[Point_cancer], img=None, axis_label=["", ""], title="", figsize=(5, 5))

In [None]:
np.unique(adata_pCancer.obs['Cluster'])

In [None]:
mpl.rcParams['axes.grid'] = False

In [None]:
sq.gr.co_occurrence(adata_pCancer, cluster_key='Cluster', n_jobs=16)
sq.pl.co_occurrence(adata_pCancer, cluster_key='Cluster', clusters='Epithelial')

In [None]:
sq.pl.spatial_segment(adata, color='Cluster', library_key='library_id', seg_cell_id='cell_id', library_id=[Point_healthy], img=None, axis_label=["", ""], title="", figsize=(5, 5))

In [None]:
sq.gr.co_occurrence(adata_pHealthy, cluster_key='Cluster', n_jobs=16)
sq.pl.co_occurrence(adata_pHealthy, cluster_key='Cluster', clusters='Epithelial')

### Ripley's L statistic

The Ripley’s L function is a descriptive statistics generally used to determine whether points have a random, dispersed or clustered distribution pattern at certain scale. The Ripley’s L is a variance-normalized version of the Ripley’s K statistic.

In [None]:
# CANCER
sq.gr.ripley(adata_pCancer, cluster_key="Cluster", mode="L")
sq.pl.ripley(adata_pCancer, cluster_key="Cluster", mode="L")

# HEALTHY
sq.gr.ripley(adata_pHealthy, cluster_key="Cluster", mode="L")
sq.pl.ripley(adata_pHealthy, cluster_key="Cluster", mode="L")

### Centrality scores

This example shows how to compute centrality scores, given a spatial graph and cell type annotation.

The scores calculated are closeness centrality, degree centrality and clustering coefficient with the following properties:
- closeness centrality - measure of how close the group is to other nodes.
- clustering coefficient - measure of the degree to which nodes cluster together.
- degree centrality - fraction of non-group members connected to group members.

All scores are descriptive statistics of the spatial graph.

In [None]:
# CANCER

sq.gr.centrality_scores(adata_pCancer, 'Cluster')
sq.pl.centrality_scores(adata_pCancer, 'Cluster', score='closeness_centrality')
sq.pl.centrality_scores(adata_pCancer, 'Cluster', score='average_clustering')
sq.pl.centrality_scores(adata_pCancer, 'Cluster', score='degree_centrality')

In [None]:
# HEALTHY

sq.gr.centrality_scores(adata_pHealthy, 'Cluster')
sq.pl.centrality_scores(adata_pHealthy, 'Cluster', score='closeness_centrality')
sq.pl.centrality_scores(adata_pHealthy, 'Cluster', score='average_clustering')
sq.pl.centrality_scores(adata_pHealthy, 'Cluster', score='degree_centrality')

## Feature-based statistics

### Moran's score

This example shows how to compute the Moran’s I global spatial auto-correlation statistics.

The Moran’s I global spatial auto-correlation statistics evaluates whether features (i.e. genes) shows a pattern that is clustered, dispersed or random in the tissue are under consideration.

In [None]:
# CANCER
sq.gr.spatial_autocorr(
    adata_pCancer,
    mode="moran",
    genes=adata_pCancer.var_names,
    n_perms=100,
    n_jobs=8,
)

moran_cancer = adata_pCancer.uns['moranI'][['I', 'pval_norm']].sort_index()
moran_cancer.head(10)

In [None]:
moran_cancer_top10 = list(adata_pCancer.uns['moranI'].head(10).index)
moran_cancer_top10

In [None]:
moran_cancer_top10_diff = set(moran_cancer_top10).difference(set(lineage_markers))
moran_cancer_top10_diff

In [None]:
adata.var_names[adata_cancer.var.highly_variable]

In [None]:
moran_cancer_top10_diff2 = moran_cancer_top10_diff.difference(hvg_cancer_set)
moran_cancer_top10_diff2

In [None]:
sq.pl.spatial_segment(
    adata,
    img=None,
    color=moran_cancer_top10_diff2,
    library_key='library_id',
    library_id=Point_cancer,

    seg_cell_id='cell_id',
    scalebar_dx=scale_factor,
    scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
    axis_label=["", ""],

    ncols=3,
    figsize=[3, 3],
    wspace=0.25,
    dpi=600

)

In [None]:
# HEALTHY
sq.gr.spatial_autocorr(
    adata_pHealthy,
    mode="moran",
    genes=adata_pHealthy.var_names,
    n_perms=100,
    n_jobs=8,
)

moran_healthy = adata_pHealthy.uns['moranI'][['I', 'pval_norm_fdr_bh']].sort_index()
moran_healthy.head(10)


In [None]:
moran_healthy_top10 = list(adata_pHealthy.uns['moranI'].head(10).index)
moran_healthy_top10

In [None]:
moran_healthy_top10_diff = set(moran_healthy_top10).difference(set(lineage_markers))
moran_healthy_top10_diff

In [None]:
moran_healthy_top10_diff2 = moran_healthy_top10_diff.difference(hvg_healthy_set)
moran_healthy_top10_diff2

In [None]:
sq.pl.spatial_segment(
    adata,
    img=None,
    color=moran_healthy_top10_diff2,
    library_key='library_id',
    library_id=Point_healthy,
    seg_cell_id='cell_id',
    scalebar_dx=scale_factor,
    scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
)

In [None]:
moran_healthy

In [None]:
moran = moran_cancer.copy()
moran.columns = ['I_cancer', 'pval_norm_fdr_bh_cancer']
moran = pd.concat([moran, moran_healthy], axis=1)
moran.columns = list(moran.columns)[0:2] + ['I_heathy', 'pval_norm_healthy_fdr_bh']
moran.to_markdown()

## Sepal score

- Only accepts grid-like spatial graphs
- It is useful to filter out genes that are expressed in very few observations and might be wrongly identified as being spatially variable. If you are performing pre-processing with Scanpy, there is a convenient function that can be used BEFORE normalization scanpy.pp.calculate_qc_metrics(). It computes several useful summary statistics on both observation and feature axis. We will be using the n_cells columns in adata.var to filter out genes that are expressed in less than 100 observations. 

In [None]:
# genes = adata.var_names[(adata.var.n_cells > 100) & adata.var.highly_variable][0:100]
# sq.gr.sepal(adata, max_neighs=6, genes=genes, n_jobs=1)
# adata.uns["sepal_score"].head(10)

# sq.pl.spatial_scatter(adata, color=<top features found>)

## Receptor-ligand analysis: CellPhoneDB

It uses an efficient re-implementation of the [Efremova et al., 2020] algorithm which can handle large number of interacting pairs (100k+) and cluster combinations (100+).

In this specific case it is not applyable since there are no interacting features (e.g. L-R)

In [None]:
# rla = sq.gr.ligrec(
#     adata_pCancer,
#     n_perms=1000,
#     cluster_key='Cluster',
#     copy=True,
#     use_raw=False,
#     transmitter_params={"categories": "ligand"},
#     receiver_params={"categories": "receptor"},
#     corr_method='bonferroni'
# )

In [None]:
# rla['means'].head()

In [None]:
# rla['pvalues'].head()

In [None]:
# rla['metadata'].head()

In [None]:
# In the plot below, to highlight significance, we’ve marked all p-values <= 0.005 with tori.
# sq.pl.ligrec(rla, target_groups='Epithelial', alpha=0.005)

# Temporary plots to inspect Tanevskis's results

"In particular, the gain for markers CD68, ki67, and SMA were found  to be the highest, suggesting that proliferation, presence, or absence of CD68 and  changes in vascularization in different grades and clinical subtypes are significantly  affected by the change in regulation as a result of intercellular interactions. " - Tanevski2022

In [None]:
sq.pl.spatial_segment(

    adata,
    img=None,
    library_key='library_id',
    library_id=Point_cancer,
    seg_cell_id='cell_id',

    color=['CD11c', 'SMA', 'Ki67', 'Cluster'],

    scalebar_dx=scale_factor,
    scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
    outline=True, 

    ncols=3

)

# Temporary plots to inspect Hartmann's results (paper)

In [None]:
adata.obs['cat_epithelial'] = np.where(np.in1d(adata.obs['Cluster'], ['Epithelial', 'Tcell_CD8']), adata.obs['Cluster'], 'Other')

In [None]:
sq.pl.spatial_segment(
    adata,
    library_key='library_id',
    library_id=Point_cancer,
    seg_cell_id='cell_id',
    color='cat_epithelial',
    scalebar_dx=scale_factor,
    scalebar_kwargs={'scale_loc': 'bottom', 'location': 'lower right'},
    # palette='magma_r' #adata_cancer.obs['cat_epithelial'].map({'Epithelial': 'tab:red', 'Tcell_CD8': 'tab:blue', 'Other': 'tab:gray'})
)

Hartmann-2022 et al. defined the tumour-immune border making the assumption that all epithelial cells were malign and had undergone carcinogenesis.

# Analysis to include
- Image processing and feature extraction
    - Smooth image!

# Napari

In [None]:
# viewer = img.interactive(adata)

In [None]:
# viewer.close