# Spatial transcriptomics unveils the in situ cellular and molecular hallmarks of the lung in fatal COVID-19

# Downstream analysis of spatially mapped and deconvoluted Visium ST data

**Author:** Carlos A. Garcia-Prieto

* This notebook explains the downstream analysis of the spatially mapped and deconvoluted Visium ST data with cell2location, including integration of Visium ST slides, estimated cell type abundances plots, identification of cell type marker genes and cellular compartments.
* We followed [cell2location](https://cell2location.readthedocs.io/en/latest/notebooks/cell2location_tutorial.html) and single-cell best practices [spatial deconvultion](https://www.sc-best-practices.org/spatial/deconvolution.html#cell2location) tutorials.

## Import modules

In [1]:
import warnings
warnings.filterwarnings('ignore')
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import matplotlib as mpl
import cell2location
from cell2location.models import RegressionModel
from cell2location.utils import select_slide
from cell2location import run_colocation
import scvi
import os
import colorcet as cc
import seaborn as sns
from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs
pd.set_option('display.max_columns', 100)

Global seed set to 0
  new_rank_zero_deprecation(
  return new_rank_zero_deprecation(*args, **kwargs)


## Read reference model with cell type signatures

In [2]:
#Set data folder
results_folder = "/mnt/beegfs/cgarcia/Spatial/COVID19/cell2location/HLCA_publication/HLCA/"
ref_run_name = f'{results_folder}reference_signatures_finest'
run_name = f'{results_folder}cell2location_map_finest'

In [3]:
#The reference model and output h5ad can be loaded like this:
adata_file_ref = f"{ref_run_name}/adata_ref_finest_post_prob.h5ad"
adata_ref = sc.read_h5ad(adata_file_ref)
mod_ref = cell2location.models.RegressionModel.load(f"{ref_run_name}", adata_ref)

[34mINFO    [0m File                                                                                                      
         [35m/mnt/beegfs/cgarcia/Spatial/COVID19/cell2location/HLCA_publication/HLCA/reference_signatures_finest/[0m[95mmodel.[0m
         [95mpt[0m already downloaded                                                                                     


No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 1/60:   2%|█▍                                                                                   | 1/60 [00:04<04:14,  4.31s/it, v_num=1, elbo_train=3.16e+9]

`Trainer.fit` stopped: `max_steps=1` reached.


Epoch 1/60:   2%|█▍                                                                                   | 1/60 [00:04<04:17,  4.36s/it, v_num=1, elbo_train=3.16e+9]


## Extract cell type specific gene markers

In [4]:
#Compute hiearchical clustering based on gene expression 
sc.tl.dendrogram(adata_ref, groupby='ann_finest_level', var_names=adata_ref.var.index, use_raw=False)

In [5]:
#Extract cell type marker genes
sc.tl.rank_genes_groups(adata_ref, groupby="ann_finest_level", method="t-test", layer="normalized_counts", use_raw=False)

## Plot cell type marker genes

<div class="alert alert-info">
<b>Paper Figure!</b>
Plot Figure S1
</div>

In [6]:
#Plot cell type specific gene markers
with mpl.rc_context({'axes.facecolor':  'white','figure.figsize': [2, 2.5]}):
    sc.pl.rank_genes_groups_dotplot(adata_ref, n_genes=3, groupby="ann_finest_level",show=False, gene_symbols="feature_name", standard_scale='var', dendrogram="dendrogram_ann_finest_level") 
    plt.savefig(f"{ref_run_name}/Reference_Signatures_Marker_Genes_Dotplot_Paper.png",dpi=300, format="png", pad_inches=0.2,bbox_inches="tight")
    plt.close()

### Save reference cell type signatures with marker genes and extract the most differentially expressed marker genes with pval adj < 0.05 & lfc > 2

In [7]:
#Extract list of marker genes with pval adjusted < 0.05 & lfc > 2
marker_genes = sc.get.rank_genes_groups_df(adata_ref, group = None, gene_symbols="feature_name")
marker_genes_filter = sc.get.rank_genes_groups_df(adata_ref, group = None, gene_symbols="feature_name", pval_cutoff=0.05, log2fc_min=2)
marker_genes.to_csv(f"{ref_run_name}/Marker_genes_Paper.csv")
marker_genes_filter.to_csv(f"{ref_run_name}/Marker_genes_filter_pvaladj005_logfc2_Paper.csv")

In [8]:
#Write anndata_ref with marker genes
adata_file_ref_markers = f"{ref_run_name}/adata_ref_finest_post_prob_marker_genes_Paper.h5ad"
adata_ref.write(adata_file_ref_markers)

## Read cell2location model with mapped and deconvoluted Visium ST data

In [9]:
#The model and output h5ad can be loaded later like this:
adata_file = f"{run_name}/ad_vis_post_distrib_finest.h5ad"
adata_vis = sc.read_h5ad(adata_file)
mod = cell2location.models.Cell2location.load(f"{run_name}", adata_vis)

[34mINFO    [0m File                                                                                                      
         [35m/mnt/beegfs/cgarcia/Spatial/COVID19/cell2location/HLCA_publication/HLCA/cell2location_map_finest/[0m[95mmodel.pt[0m 
         already downloaded                                                                                        


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 1/30000:   0%|                                                                             | 1/30000 [00:00<6:05:32,  1.37it/s, v_num=1, elbo_train=7.39e+8]

`Trainer.fit` stopped: `max_steps=1` reached.


Epoch 1/30000:   0%|                                                                             | 1/30000 [00:00<6:08:51,  1.36it/s, v_num=1, elbo_train=7.39e+8]


In [10]:
mod



## Visualizing cell abundance in spatial coordinates

In [11]:
# We use 5% quantile of the posterior distribution, representing the value of cell abundance that the model has high confidence in ('at least this amount is present')
adata_vis.obs[adata_vis.uns['mod']['factor_names']] = adata_vis.obsm['q05_cell_abundance_w_sf']

In [12]:
adata_vis.obs[adata_vis.uns['mod']['factor_names']].columns

Index(['Basal resting', 'Suprabasal', 'Deuterosomal',
       'Multiciliated (non-nasal)', 'Club (non-nasal)', 'AT0',
       'pre-TB secretory', 'AT1', 'AT2', 'AT2 proliferating', 'EC arterial',
       'EC aerocyte capillary', 'EC general capillary', 'EC venous systemic',
       'EC venous pulmonary', 'Lymphatic EC mature',
       'Lymphatic EC differentiating', 'Peribronchial fibroblasts',
       'Adventitial fibroblasts', 'Alveolar fibroblasts', 'Pericytes',
       'Subpleural fibroblasts', 'Myofibroblasts', 'Smooth muscle',
       'SM activated stress response', 'Mesothelium', 'B cells',
       'Plasma cells', 'CD4 T cells', 'CD8 T cells', 'T cells proliferating',
       'NK cells', 'DC1', 'DC2', 'Migratory DCs', 'Plasmacytoid DCs',
       'Alveolar macrophages', 'Alveolar Mph CCL3+',
       'Alveolar Mph MT-positive', 'Alveolar Mph proliferating',
       'Monocyte-derived Mph', 'Interstitial Mph perivascular',
       'Classical monocytes', 'Non-classical monocytes', 'Mast cells'],
 

In [13]:
#Create directory for Plots
directory = f'{run_name}/Plots'
# Check if the directory exists
if not os.path.exists(directory):
    # If it doesn't exist, create it
    os.makedirs(directory)

<div class="alert alert-info">
<b>Paper Figure!</b>
Plot panels of Figures 2B & S6
</div>

In [14]:
# Plot abundance in spatial coordinates
samples = ["L2P","L19P","L11P","HRC5","HRC6","HRC8","HRC10","HRC11","HRC12","HRC13","HRC16","HRC17","L5P","L14P","L24P","L12P","HRC2","HRC4","HRC18","L3C","L14C","L2C","CONTROL2"]
for sample in samples:
    print(sample)
    slide = select_slide(adata_vis, f'{sample}')
    # plot in spatial coordinates
    with mpl.rc_context({'axes.facecolor':  'white',
                         'figure.figsize': [4.5, 5]}):
        sc.pl.spatial(slide, cmap='magma',
                      color=adata_vis.uns['mod']['factor_names'],
                      ncols=4, size=1.3,show=False,
                      img_key='hires',
                      # limit color scale at 99.9% quantile of cell abundance
                      vmin=0, vmax='p99.9' 
                     )
        plt.savefig(f"{run_name}/Plots/{sample}_cell_types_99_Paper.png",dpi=300, format="png",pad_inches=0.2,bbox_inches="tight")
        plt.close()

L2P
L19P
L11P
HRC5
HRC6
HRC8
HRC10
HRC11
HRC12
HRC13
HRC16
HRC17
L5P
L14P
L24P
L12P
HRC2
HRC4
HRC18
L3C
L14C
L2C
CONTROL2


## Identifying tissue regions by Leiden clustering

We cluster Visium spots using estimated cell abundance for each cell type. We constuct a K-nearest neigbour (KNN) graph representing similarity of locations in estimated cell abundance and then apply Leiden clustering. The clustering is done jointly across all Visium slides and batches, hence the cluster identities are directly comparable. 

In [15]:
# Compute KNN using cell2location abundances
sc.pp.neighbors(adata_vis, use_rep='q05_cell_abundance_w_sf', n_neighbors = 8) 

In [16]:
# Cluster spots into regions using scanpy
sc.tl.leiden(adata_vis, resolution=0.5)

In [17]:
# add region as categorical variable
adata_vis.obs["region_cluster"] = adata_vis.obs["leiden"].astype("category")

## We use the location composition similarity graph to build a joint integrated UMAP representation of all Visium slides.

In [18]:
# compute UMAP using KNN graph based on the cell2location output
sc.tl.umap(adata_vis, min_dist = 0.3, spread = 1)

## Plot integrated UMAP by disease condition

<div class="alert alert-info">
<b>Paper Figure!</b>
Plot panel Figure 1B
</div>

In [19]:
#Set color palette for each condition
col_dict_subtype = {'Control': '#eb0077', 'Acute': '#0000dd', 'Proliferative': '#573b00'}

In [20]:
#Select random points for plotting UMAP
new_adata_vis = sc.pp.subsample(adata_vis, fraction=1., copy=True)

In [21]:
# show regions in UMAP coordinates
with mpl.rc_context({'axes.facecolor':  'white',
                     'figure.figsize': [6, 6]}):
    sc.pl.umap(new_adata_vis, color=['condition_subtype'], size=30, wspace=0.5,
               color_map = 'RdPu', ncols = 1, legend_loc=None, show=False, sort_order=False, frameon=False,
               legend_fontsize=20, vmax=["p99.9",None], vmin=[0,None])
    plt.savefig(f"{run_name}/Plots/Condition_99_Paper.png",dpi=300, format="png",pad_inches=0.2,bbox_inches="tight")
    plt.close()

## Plot integrated UMAP by sample and disease condition

In [22]:
#Create directory for Plots
directory = f'{run_name}/Plots/Max_Abundance99_Subtype_Paper'
# Check if the directory exists
if not os.path.exists(directory):
    # If it doesn't exist, create it
    os.makedirs(directory)

<div class="alert alert-info">
<b>Paper Figure!</b>
Plot panel Figure 1B
</div>

In [23]:
# Plot Max Abundance Subtype by sample
samples = ["L2P","L19P","L11P","HRC5","HRC6","HRC8","HRC10","HRC11","HRC12","HRC13","HRC16","HRC17","L5P","L14P","L24P","L12P","HRC2","HRC4","HRC18","L3C","L14C","L2C","CONTROL2"]

for sample in samples:
    print(sample)
    slide = select_slide(adata_vis, f'{sample}')
    #select random points
    new_slide = sc.pp.subsample(slide, fraction=1., copy=True)
    with mpl.rc_context({'axes.facecolor':  'white',
                         'figure.figsize': [6, 6]}):
        sc.pl.umap(new_slide, color=['condition_subtype'], size=50,
                   ncols = 1, show=False, sort_order=False,
                   legend_fontsize=20, legend_fontweight='bold', projection='2d', add_outline=False, legend_loc=None, frameon=False,title=new_slide.obs['condition_subtype'],
                   color_map=sns.blend_palette(["lightgray", sns.xkcd_rgb["blood"]], as_cmap=True), vmax=["p99.9",None], vmin=[0,None])
        plt.savefig(f"{run_name}/Plots/Max_Abundance99_Subtype_Paper/{sample}_Max_Abundance99_subtype_Paper.png",dpi=300, format="png",pad_inches=0.2,bbox_inches="tight")
        plt.close()

L2P
L19P
L11P
HRC5
HRC6
HRC8
HRC10
HRC11
HRC12
HRC13
HRC16
HRC17
L5P
L14P
L24P
L12P
HRC2
HRC4
HRC18
L3C
L14C
L2C
CONTROL2


## Add most abundant cell type for each spot

In [24]:
#Max number of cells
adata_vis.obs["Max_Abundance"] = adata_vis.obs[['Basal resting', 'Suprabasal', 'Deuterosomal',
       'Multiciliated (non-nasal)', 'Club (non-nasal)', 'AT0',
       'pre-TB secretory', 'AT1', 'AT2', 'AT2 proliferating', 'EC arterial',
       'EC aerocyte capillary', 'EC general capillary', 'EC venous systemic',
       'EC venous pulmonary', 'Lymphatic EC mature',
       'Lymphatic EC differentiating', 'Peribronchial fibroblasts',
       'Adventitial fibroblasts', 'Alveolar fibroblasts', 'Pericytes',
       'Subpleural fibroblasts', 'Myofibroblasts', 'Smooth muscle',
       'SM activated stress response', 'Mesothelium', 'B cells',
       'Plasma cells', 'CD4 T cells', 'CD8 T cells', 'T cells proliferating',
       'NK cells', 'DC1', 'DC2', 'Migratory DCs', 'Plasmacytoid DCs',
       'Alveolar macrophages', 'Alveolar Mph CCL3+',
       'Alveolar Mph MT-positive', 'Alveolar Mph proliferating',
       'Monocyte-derived Mph', 'Interstitial Mph perivascular',
       'Classical monocytes', 'Non-classical monocytes', 'Mast cells']].apply(np.max,axis=1)

In [25]:
#Most abundant cell type per spot
adata_vis.obs["Max_Abundance_Cell"] = adata_vis.obs[['Basal resting', 'Suprabasal', 'Deuterosomal',
       'Multiciliated (non-nasal)', 'Club (non-nasal)', 'AT0',
       'pre-TB secretory', 'AT1', 'AT2', 'AT2 proliferating', 'EC arterial',
       'EC aerocyte capillary', 'EC general capillary', 'EC venous systemic',
       'EC venous pulmonary', 'Lymphatic EC mature',
       'Lymphatic EC differentiating', 'Peribronchial fibroblasts',
       'Adventitial fibroblasts', 'Alveolar fibroblasts', 'Pericytes',
       'Subpleural fibroblasts', 'Myofibroblasts', 'Smooth muscle',
       'SM activated stress response', 'Mesothelium', 'B cells',
       'Plasma cells', 'CD4 T cells', 'CD8 T cells', 'T cells proliferating',
       'NK cells', 'DC1', 'DC2', 'Migratory DCs', 'Plasmacytoid DCs',
       'Alveolar macrophages', 'Alveolar Mph CCL3+',
       'Alveolar Mph MT-positive', 'Alveolar Mph proliferating',
       'Monocyte-derived Mph', 'Interstitial Mph perivascular',
       'Classical monocytes', 'Non-classical monocytes', 'Mast cells']].idxmax(axis=1)

### Set color palette for each cell type

In [26]:
#!pip install colorcet

In [27]:
# conversion of lists to dictionary
# using dictionary comprehension
 
# initializing lists
#cell_types = list(adata_vis.obs[adata_vis.uns['mod']['factor_names']].columns)
max_cells = list(adata_vis.obs['Max_Abundance_Cell'].unique())
max_cells = sorted(max_cells)
print(max_cells)

#Create color palette
pal = sns.color_palette(cc.glasbey, n_colors=len(max_cells))
pal_hex = list(map(mpl.colors.rgb2hex, pal))
    
# Printing original keys-value lists
#print("Original key list is : " + str(cell_types))
#print("Original value list is : " + str(pal_hex))
 
# using dictionary comprehension
# to convert lists to dictionary
col_dict = {max_cells[i]: pal_hex[i] for i in range(len(max_cells))}
 
# Printing resultant dictionary
#print("Resultant dictionary is : " + str(col_dict))

['AT0', 'AT1', 'AT2', 'AT2 proliferating', 'Adventitial fibroblasts', 'Alveolar Mph CCL3+', 'Alveolar Mph MT-positive', 'Alveolar fibroblasts', 'Alveolar macrophages', 'B cells', 'Basal resting', 'CD4 T cells', 'CD8 T cells', 'Classical monocytes', 'Club (non-nasal)', 'DC1', 'DC2', 'Deuterosomal', 'EC aerocyte capillary', 'EC arterial', 'EC general capillary', 'EC venous pulmonary', 'EC venous systemic', 'Interstitial Mph perivascular', 'Lymphatic EC mature', 'Mast cells', 'Migratory DCs', 'Monocyte-derived Mph', 'Multiciliated (non-nasal)', 'Myofibroblasts', 'NK cells', 'Non-classical monocytes', 'Peribronchial fibroblasts', 'Pericytes', 'Plasma cells', 'Plasmacytoid DCs', 'SM activated stress response', 'Smooth muscle', 'Subpleural fibroblasts', 'Suprabasal', 'T cells proliferating', 'pre-TB secretory']


In [28]:
pal

## Plot integrated UMAP by individual cell type

In [29]:
#Create directory for Plots
directory = f'{run_name}/Plots/Max_Abundance99_Celltype_Paper'
# Check if the directory exists
if not os.path.exists(directory):
    # If it doesn't exist, create it
    os.makedirs(directory)

In [30]:
#Set color palette 
col_dict_celltype = col_dict.copy()

In [31]:
for key, value in col_dict_celltype.items():
    # replace color with same value
    col_dict_celltype[key] = '#E5E5E5'

<div class="alert alert-info">
<b>Paper Figure!</b>
Plot panel Figure 2B
</div>

In [32]:
#Select random points for plotting UMAP
new_adata_vis = sc.pp.subsample(adata_vis, fraction=1., copy=True)

In [33]:
for celltype, color in col_dict_celltype.items():
    col_dict_cell = col_dict_celltype.copy() 
    print(celltype)
    col_dict_cell[celltype] = col_dict[celltype]
    # show regions in UMAP coordinates
    with mpl.rc_context({'axes.facecolor':  'white',
                            'figure.figsize': [6, 6]}):
        sc.pl.umap(new_adata_vis, color=['Max_Abundance_Cell'], size=15,
                    ncols = 1, show=False, sort_order=False,
                    legend_fontsize=5, legend_fontweight='bold', projection='2d', add_outline=False, palette=col_dict_cell, legend_loc=None, frameon=False, title = f"{celltype}",
                    color_map=sns.blend_palette(["lightgray", sns.xkcd_rgb["blood"]], as_cmap=True), vmax=["p99.9",None], vmin=[0,None])
        plt.savefig(f"{run_name}/Plots/Max_Abundance99_Celltype_Paper/Max_Abundance_99_{celltype}_Paper.png",dpi=300, format="png",pad_inches=0.2,bbox_inches="tight")    
        plt.close()

AT0
AT1
AT2
AT2 proliferating
Adventitial fibroblasts
Alveolar Mph CCL3+
Alveolar Mph MT-positive
Alveolar fibroblasts
Alveolar macrophages
B cells
Basal resting
CD4 T cells
CD8 T cells
Classical monocytes
Club (non-nasal)
DC1
DC2
Deuterosomal
EC aerocyte capillary
EC arterial
EC general capillary
EC venous pulmonary
EC venous systemic
Interstitial Mph perivascular
Lymphatic EC mature
Mast cells
Migratory DCs
Monocyte-derived Mph
Multiciliated (non-nasal)
Myofibroblasts
NK cells
Non-classical monocytes
Peribronchial fibroblasts
Pericytes
Plasma cells
Plasmacytoid DCs
SM activated stress response
Smooth muscle
Subpleural fibroblasts
Suprabasal
T cells proliferating
pre-TB secretory


## Identify cellular compartments using non-negative matrix factorization (NMF)
### We performed non-negative matrix factorization (NMF) of the cell type abundance estimates from cell2location to identify the spatial co-occurrence of cell types.

In [34]:
#from cell2location import run_colocation
res_dict, adata_vis = run_colocation(
    adata_vis, 
    model_name='CoLocatedGroupsSklearnNMF',
    train_args={
      'n_fact': np.arange(8,9), 
      'sample_name_col': 'sample',      
      'n_restarts': 3 
    },
    export_args={'path': f'{run_name}/CoLocatedCombPaper/'}
) 

### Analysis name: CoLocatedGroupsSklearnNMF_8combinations_91068locations_45factors


### Group samples by condition and apply NMF

In [35]:
covid19_proliferative = ["L2P","L19P","L11P","HRC5","HRC6","HRC8","HRC10","HRC11","HRC12","HRC13","HRC16","HRC17"]
covid19_acute = ["L5P","L14P","L24P","L12P","HRC2","HRC4","HRC18"]
controls = ["L3C","L14C","L2C","CONTROL2"]
covid19 = ["L5P","L14P","L24P","L12P","HRC2","HRC4","HRC18","L2P","L19P","L11P","HRC5","HRC6","HRC8","HRC10","HRC11","HRC12","HRC13","HRC16","HRC17"]

In [36]:
adata_vis_ctl = adata_vis[adata_vis.obs['sample'].isin(controls)]
adata_vis_covid_acute = adata_vis[adata_vis.obs['sample'].isin(covid19_acute)]
adata_vis_covid_proliferative = adata_vis[adata_vis.obs['sample'].isin(covid19_proliferative)]

In [37]:
#from cell2location import run_colocation
res_dict, adata_vis_ctl = run_colocation(
    adata_vis_ctl, 
    model_name='CoLocatedGroupsSklearnNMF',
    train_args={
      'n_fact': np.arange(8,9), 
      'sample_name_col': 'sample',      
      'n_restarts': 3 
    },
    export_args={'path': f'{run_name}/CoLocatedCombControlsPaper/'}
) 

### Analysis name: CoLocatedGroupsSklearnNMF_8combinations_13488locations_45factors


In [38]:
#from cell2location import run_colocation
res_dict, adata_vis_covid_acute = run_colocation(
    adata_vis_covid_acute, 
    model_name='CoLocatedGroupsSklearnNMF',
    train_args={
      'n_fact': np.arange(8,9), 
      'sample_name_col': 'sample',      
      'n_restarts': 3 
    },
    export_args={'path': f'{run_name}/CoLocatedCombAcutePaper/'}
) 

### Analysis name: CoLocatedGroupsSklearnNMF_8combinations_27951locations_45factors


In [39]:
#from cell2location import run_colocation
res_dict, adata_vis_covid_proliferative = run_colocation(
    adata_vis_covid_proliferative, 
    model_name='CoLocatedGroupsSklearnNMF',
    train_args={
      'n_fact': np.arange(8,9), 
      'sample_name_col': 'sample',      
      'n_restarts': 3 
    },
    export_args={'path': f'{run_name}/CoLocatedCombProliferativePaper/'}
) 

### Analysis name: CoLocatedGroupsSklearnNMF_8combinations_49629locations_45factors


In [40]:
# Save anndata object by condition with NMF results
adata_file_ctl = f"{run_name}/ad_vis_post_distrib_finest_NMF_Controls_Paper.h5ad"
adata_vis_ctl.write(adata_file_ctl)

adata_file_acute = f"{run_name}/ad_vis_post_distrib_finest_NMF_Acute_Paper.h5ad"
adata_vis_covid_acute.write(adata_file_acute)

adata_file_proliferative = f"{run_name}/ad_vis_post_distrib_finest_NMF_Proliferative_Paper.h5ad"
adata_vis_covid_proliferative.write(adata_file_proliferative)

### Add cell type abundance by lineage

In [41]:
adata_vis.obs[adata_vis.uns['mod']['factor_names']].columns

Index(['Basal resting', 'Suprabasal', 'Deuterosomal',
       'Multiciliated (non-nasal)', 'Club (non-nasal)', 'AT0',
       'pre-TB secretory', 'AT1', 'AT2', 'AT2 proliferating', 'EC arterial',
       'EC aerocyte capillary', 'EC general capillary', 'EC venous systemic',
       'EC venous pulmonary', 'Lymphatic EC mature',
       'Lymphatic EC differentiating', 'Peribronchial fibroblasts',
       'Adventitial fibroblasts', 'Alveolar fibroblasts', 'Pericytes',
       'Subpleural fibroblasts', 'Myofibroblasts', 'Smooth muscle',
       'SM activated stress response', 'Mesothelium', 'B cells',
       'Plasma cells', 'CD4 T cells', 'CD8 T cells', 'T cells proliferating',
       'NK cells', 'DC1', 'DC2', 'Migratory DCs', 'Plasmacytoid DCs',
       'Alveolar macrophages', 'Alveolar Mph CCL3+',
       'Alveolar Mph MT-positive', 'Alveolar Mph proliferating',
       'Monocyte-derived Mph', 'Interstitial Mph perivascular',
       'Classical monocytes', 'Non-classical monocytes', 'Mast cells'],
 

In [42]:
##Select cell types by cell lineage
Epithelial_cell_types = ['Basal resting','Suprabasal','Multiciliated (non-nasal)', 'Deuterosomal', 'Club (non-nasal)', 'pre-TB secretory', 'AT0', 'AT1', 'AT2', 'AT2 proliferating'] 
Stromal_cell_types = ['Adventitial fibroblasts', 'Alveolar fibroblasts', 'Pericytes', 'Peribronchial fibroblasts', 'Subpleural fibroblasts', 'Myofibroblasts', 'Smooth muscle', 'SM activated stress response', 'Mesothelium']
Immune_cell_types = ['B cells', 'Plasma cells', 'CD8 T cells', 'CD4 T cells', 'T cells proliferating', 'NK cells', 'Monocyte-derived Mph', 'Alveolar Mph CCL3+', 'Interstitial Mph perivascular', 'Alveolar Mph MT-positive', 'Alveolar Mph proliferating', 'Alveolar macrophages', 'Mast cells', 'DC1', 'DC2', 'Migratory DCs', 'Plasmacytoid DCs', 'Classical monocytes', 'Non-classical monocytes']
Endothelial_cell_types = ['EC general capillary', 'EC arterial', 'EC aerocyte capillary', 'EC venous systemic', 'EC venous pulmonary', 'Lymphatic EC differentiating', 'Lymphatic EC mature']

In [43]:
len(Epithelial_cell_types),len(Stromal_cell_types),len(Immune_cell_types),len(Endothelial_cell_types)

(10, 9, 19, 7)

In [44]:
len(Epithelial_cell_types)+len(Stromal_cell_types)+len(Immune_cell_types)+len(Endothelial_cell_types)

45

In [45]:
adata_vis.obs['Epithelial'] =  adata_vis.obs['Basal resting']+adata_vis.obs['Suprabasal']+adata_vis.obs['Multiciliated (non-nasal)']+adata_vis.obs['Deuterosomal']+adata_vis.obs['Club (non-nasal)']+adata_vis.obs['pre-TB secretory']+ adata_vis.obs['AT0']+ adata_vis.obs['AT1']+ adata_vis.obs['AT2']+ adata_vis.obs['AT2 proliferating']

In [46]:
adata_vis.obs['Stromal'] = adata_vis.obs['Adventitial fibroblasts']+adata_vis.obs['Alveolar fibroblasts']+adata_vis.obs['Pericytes']+adata_vis.obs['Peribronchial fibroblasts']+adata_vis.obs['Subpleural fibroblasts']+adata_vis.obs['Myofibroblasts']+adata_vis.obs['Smooth muscle']+adata_vis.obs['SM activated stress response']+adata_vis.obs['Mesothelium']

In [47]:
adata_vis.obs['Immune'] = adata_vis.obs['B cells']+adata_vis.obs['Plasma cells']+adata_vis.obs['CD8 T cells']+adata_vis.obs['CD4 T cells']+adata_vis.obs['T cells proliferating']+adata_vis.obs['NK cells']+adata_vis.obs['Monocyte-derived Mph']+adata_vis.obs['Alveolar Mph CCL3+']+adata_vis.obs['Interstitial Mph perivascular']+adata_vis.obs['Alveolar Mph MT-positive']+adata_vis.obs['Alveolar Mph proliferating']+adata_vis.obs['Alveolar macrophages']+adata_vis.obs['Mast cells']+adata_vis.obs['DC1']+adata_vis.obs['DC2']+adata_vis.obs['Migratory DCs']+adata_vis.obs['Plasmacytoid DCs']+adata_vis.obs['Classical monocytes']+adata_vis.obs['Non-classical monocytes']

In [48]:
adata_vis.obs['Endothelial'] =  adata_vis.obs['EC general capillary']+adata_vis.obs['EC arterial']+adata_vis.obs['EC aerocyte capillary']+adata_vis.obs['EC venous systemic']+adata_vis.obs['EC venous pulmonary']+adata_vis.obs['Lymphatic EC differentiating']+adata_vis.obs['Lymphatic EC mature']

### Add most abundant lineage for each spot

In [49]:
adata_vis.obs["Max_Abundance_Lineage"] = adata_vis.obs[['Endothelial', 'Epithelial', 'Immune', 'Stromal']].apply(np.max,axis=1)

In [50]:
adata_vis.obs["Max_Abundance_Cell_Lineage"] = adata_vis.obs[['Endothelial', 'Epithelial', 'Immune', 'Stromal']].idxmax(axis=1)

In [51]:
#Set color palette for each lineage
col_dict_lineage = {'Endothelial': '#d60000', 'Epithelial': '#97ff00', 'Immune': '#ffa52f', 'Stromal': '#005659'}

## Plot integrated UMAP by most abundant lineage for each spot

<div class="alert alert-info">
<b>Paper Figure!</b>
Plot panel Figure 1B
</div>

In [52]:
#Select random points for plotting UMAP
new_adata_vis = sc.pp.subsample(adata_vis, fraction=1., copy=True)

In [53]:
# Plot max abundance lineage 
with mpl.rc_context({'axes.facecolor':  'white',
                        'figure.figsize': [6, 6]}):
    sc.pl.umap(new_adata_vis, color=['Max_Abundance_Cell_Lineage'], size=15,
                ncols = 1, show=False, sort_order=False,
                legend_fontsize=10, legend_fontweight='bold', projection='2d', add_outline=False, palette = col_dict_lineage, legend_loc=None, frameon = False,
                color_map=sns.blend_palette(["lightgray", sns.xkcd_rgb["blood"]], as_cmap=True), vmax=["p99.9",None], vmin=[0,None])
    plt.savefig(f"{run_name}/Plots/Max_Abundance_Lineage_99_Paper.png",dpi=300, format="png",pad_inches=0.2,bbox_inches="tight")
    plt.close()

## Plot integrated UMAP by sample with most abundant lineage for each spot

<div class="alert alert-info">
<b>Paper Figure!</b>
Plot panel Figure 1B
</div>

In [54]:
#Create directory for Plots
directory = f'{run_name}/Plots/Max_Abundance99_Lineage_Paper'
# Check if the directory exists
if not os.path.exists(directory):
    # If it doesn't exist, create it
    os.makedirs(directory)

In [55]:
# Plot Max Abundance Lineage by sample
samples = ["L2P","L19P","L11P","HRC5","HRC6","HRC8","HRC10","HRC11","HRC12","HRC13","HRC16","HRC17","L5P","L14P","L24P","L12P","HRC2","HRC4","HRC18","L3C","L14C","L2C","CONTROL2"]

for sample in samples:
    print(sample)
    slide = select_slide(adata_vis, f'{sample}')
    #select random points
    new_slide = sc.pp.subsample(slide, fraction=1., copy=True)
    with mpl.rc_context({'axes.facecolor':  'white',
                         'figure.figsize': [6, 6]}):
        sc.pl.umap(new_slide, color=['Max_Abundance_Cell_Lineage'], size=50,
                   ncols = 1, show=False, sort_order=False,
                   legend_fontsize=10, legend_fontweight='bold', projection='2d', add_outline=False, palette=col_dict_lineage, legend_loc='right margin', frameon=False,
                   color_map=sns.blend_palette(["lightgray", sns.xkcd_rgb["blood"]], as_cmap=True), vmax=["p99.9",None], vmin=[0,None])
        plt.savefig(f"{run_name}/Plots/Max_Abundance99_Lineage_Paper/{sample}_Max_Abundance99_lineage_Paper.png",dpi=300, format="png",pad_inches=0.2,bbox_inches="tight")
        plt.close()

L2P
L19P
L11P
HRC5
HRC6
HRC8
HRC10
HRC11
HRC12
HRC13
HRC16
HRC17
L5P
L14P
L24P
L12P
HRC2
HRC4
HRC18
L3C
L14C
L2C
CONTROL2


## Plot most abundant cell type per spot on top of Visium ST images

<div class="alert alert-info">
<b>Paper Figure!</b>
Plot panel Figure 2B
</div>

In [56]:
#Create directory for Plots
directory = f'{run_name}/Plots/Max_Abundance_Cell_Histology_Paper'
# Check if the directory exists
if not os.path.exists(directory):
    # If it doesn't exist, create it
    os.makedirs(directory)

In [57]:
# Plot most abudnant cell type per spot in spatial coordinates
samples = ["L2P","L19P","L11P","HRC5","HRC6","HRC8","HRC10","HRC11","HRC12","HRC13","HRC16","HRC17","L5P","L14P","L24P","L12P","HRC2","HRC4","HRC18","L3C","L14C","L2C","CONTROL2"]
for sample in samples:
    print(sample)
    slide = select_slide(adata_vis, f'{sample}')
    with mpl.rc_context({'axes.facecolor':  'black',
                         'figure.figsize': [4.5, 5]}):
        sc.pl.spatial(slide, color=['Max_Abundance_Cell'], library_id=f"{sample}", palette=col_dict,
                      size=1.3, img_key='hires', alpha=0.5, show=False)
        plt.savefig(f"{run_name}/Plots/Max_Abundance_Cell_Histology_Paper/{sample}_Max_Abundance_Cell_Histology_Paper.png",dpi=300, format="png",pad_inches=0.2,bbox_inches="tight")
        plt.close()


L2P
L19P
L11P
HRC5
HRC6
HRC8
HRC10
HRC11
HRC12
HRC13
HRC16
HRC17
L5P
L14P
L24P
L12P
HRC2
HRC4
HRC18
L3C
L14C
L2C
CONTROL2


## Plot most abundant lineage per spot on top of Visium ST images

<div class="alert alert-info">
<b>Paper Figure!</b>
Plot panel Figure 1C
</div>

In [58]:
#Create directory for Plots
directory = f'{run_name}/Plots/Max_Abundance_Lineage_Histology_Paper'
# Check if the directory exists
if not os.path.exists(directory):
    # If it doesn't exist, create it
    os.makedirs(directory)

In [59]:
# Plot most abudnant lineage per spot in spatial coordinates
samples = ["L2P","L19P","L11P","HRC5","HRC6","HRC8","HRC10","HRC11","HRC12","HRC13","HRC16","HRC17","L5P","L14P","L24P","L12P","HRC2","HRC4","HRC18","L3C","L14C","L2C","CONTROL2"]
for sample in samples:
    print(sample)
    slide = select_slide(adata_vis, f'{sample}')
    with mpl.rc_context({'axes.facecolor':  'black',
                         'figure.figsize': [4.5, 5]}):
        sc.pl.spatial(slide, color=['Max_Abundance_Cell_Lineage'], library_id=f"{sample}", palette=col_dict_lineage,
                      size=1.3, img_key='hires', alpha=1, show=False)
        plt.savefig(f"{run_name}/Plots/Max_Abundance_Lineage_Histology_Paper/{sample}_Max_Abundance_Lineage_Histology_Paper.png",dpi=300, format="png",pad_inches=0.2,bbox_inches="tight")
        plt.close()

L2P
L19P
L11P
HRC5
HRC6
HRC8
HRC10
HRC11
HRC12
HRC13
HRC16
HRC17
L5P
L14P
L24P
L12P
HRC2
HRC4
HRC18
L3C
L14C
L2C
CONTROL2


In [60]:
# Save anndata object with results
adata_file = f"{run_name}/ad_vis_post_distrib_finest_discrete_region_NMF_lineage_Paper.h5ad"
adata_vis.write(adata_file)
adata_file

'/mnt/beegfs/cgarcia/Spatial/COVID19/cell2location/HLCA_publication/HLCA/cell2location_map_finest/ad_vis_post_distrib_finest_discrete_region_NMF_lineage_Paper.h5ad'

### Modules and their versions used for this analysis


Useful for debugging and reporting issues.

In [61]:
cell2location.utils.list_imported_modules()

sys 3.9.16 (main, Jan 11 2023, 16:05:54) 
[GCC 11.2.0]
re 2.2.1
ipykernel._version 6.20.2
json 2.0.9
jupyter_client._version 8.0.1
logging 0.5.1.2
platform 1.0.8
_ctypes 1.1.0
ctypes 1.1.0
zmq.sugar.version 25.0.0
zmq.sugar 25.0.0
zmq 25.0.0
traitlets._version 5.8.1
traitlets 5.8.1
jupyter_core.version 5.1.5
jupyter_core 5.1.5
tornado 6.2
zlib 1.0
_curses b'2.2'
socketserver 0.4
argparse 1.1
dateutil._version 2.8.2
dateutil 2.8.2
six 1.16.0
_decimal 1.70
decimal 1.70
platformdirs.version 2.6.2
platformdirs 2.6.2
_csv 1.0
csv 1.0
jupyter_client 8.0.1
ipykernel 6.20.2
IPython.core.release 8.9.0
executing.version 1.2.0
executing 1.2.0
pure_eval.version 0.2.2
pure_eval 0.2.2
stack_data.version 0.6.2
stack_data 0.6.2
pygments 2.14.0
ptyprocess 0.7.0
pexpect 4.8.0
IPython.core.crashhandler 8.9.0
pickleshare 0.7.5
backcall 0.2.0
decorator 5.1.1
_sqlite3 2.6.0
sqlite3.dbapi2 2.6.0
sqlite3 2.6.0
wcwidth 0.2.6
prompt_toolkit 3.0.36
parso 0.8.3
jedi 0.18.2
urllib.request 3.9
IPython.core.magics.c