In [None]:
import scanpy as sc
from anndata import AnnData
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import scanpy.external as sce
import triku as tk
seed = 0

# Trajectory inference of the fibroblast population
In this notebook we run the trajectory analysis and we obtain the graphs shown in **Figure 2C** from the manuscript. 

## Preliminary analysis

In [None]:
# Load dataset
adata = sc.read('../data/adata_fibroblast.h5')

In [None]:
# Set color for each fibroblast subpopulation for consistency across figures
dict_fbs_colors = {  '0A: FB Ptgs2$^+$Crispl2$^+$': '#67dcf1',
                     '0B: FB Hilpda$^+$Mt2$^+$': '#702a17',
                     '1: FB Msc$^+$Itga7$^+$': '#df7935',
                     '2: FB Serpine2$^+$Col23a1$^+$': '#20668d',
                     '3: FB Cxcl1$^+$Ccl2$^+$': '#a05a2c',
                     '4: FB Rab37$^+$Col22a1$^+$': '#d40055',
                     '5: FB Clec3b$^+$Comp$^+$': '#ff5599',
                     '6: FB Cilp2$^+$Fbn1$^+$': '#8e0c6f',
                     '7: FB Cxcl12$^+$C1s1$^+$': '#ff9955',
                     '8: FB Ccn5$^+$Phldb2$^+$': '#d3bc5f',
                     '9: FB Ltbp2$^+$Chodl$^+$': '#ae54b8',
                     '10: FB Coch$^+$Emid1$^+$': '#4d0765',
                     '11: FB Cfh$^+$Spp1$^+$': '#229eb2',
                     }

In [None]:
adata.uns['subtype_colors'] = list(dict_fbs_colors.values())
adata.uns['subtype_number_colors'] = list(dict_fbs_colors.values())
adata.uns['subtype_name_colors'] = list(dict_fbs_colors.values())

In [None]:
sc.pl.umap(adata, color='subtype_number', legend_loc='on data', alpha=0.8)

### Run PAGA

In [None]:
# sc.tl.diffmap(adata)
# sc.pp.neighbors(adata, n_neighbors=10, use_rep='X_diffmap')

In [None]:
sc.tl.draw_graph(adata)

In [None]:
sc.pl.draw_graph(adata, color='subtype_number', legend_loc='on data')

In [None]:
sc.tl.paga(adata, groups='subtype_number')

In [None]:
sc.pl.paga(adata, color=['subtype_number'], fontsize=16, node_size_scale=10, cmap='viridis', fontoutline=4)

In [None]:
sc.tl.draw_graph(adata, init_pos='paga')

In [None]:
sc.pl.paga(adata, color=['subtype_number', 'Hilpda', 'Ptgs2', 'Pdpn', 'Il1rl1'],fontsize=16, node_size_scale=10, cmap='viridis', fontoutline=4)

### Generate graph with all fibroblast populations


In [None]:
sc.pl.paga_compare(
    adata, threshold=0.03, title='', right_margin=0.2, size=10, edge_width_scale=0.5,
    legend_fontsize=12, fontsize=12, frameon=False, edges=True, save=True)

In [None]:
adata.uns['iroot'] = np.flatnonzero(adata.obs['subtype_number']  == '4')[0]

In [None]:
sc.tl.paga(adata, groups='subtype_number')

In [None]:
sc.tl.dpt(adata)

In [None]:
markers = ['Postn', 'Tnc', 'Ccn2', 'Cd14', 'Pi16', 'Clu', 'Cd14', 'Ecrg4']

In [None]:
sc.pl.umap(adata, color=['dpt_pseudotime', 'Pdpn', 'Ptgs2', 'Hilpda', 'Il1rl1'] + markers, use_raw=False)

In [None]:
gene_names = ['Pdpn', 'Ptgs2', 'Hilpda', 'Il1rl1'] + markers

In [None]:
gene_names = markers

In [None]:
sc.pl.draw_graph(adata, color=['subtype_number', 'dpt_pseudotime'], legend_loc='on data')

In [None]:
paths = [('4-3-1-2', ['4', '3', '1', '2']),
         ('4-3-0', ['4', '3', '0'])]

In [None]:
adata.obs['distance'] = adata.obs['dpt_pseudotime']

In [None]:
adata.obs['clusters'] = adata.obs['subtype_number']  # just a cosmetic change: change names to subtype

In [None]:
adata.uns['clusters_colors'] = adata.uns['subtype_number_colors']

In [None]:
_, axs = plt.subplots(ncols=2, figsize=(6, 4), gridspec_kw={'wspace': 0.05, 'left': 0.12})
plt.subplots_adjust(left=0.05, right=0.98, top=0.82, bottom=0.2)
for ipath, (descr, path) in enumerate(paths):
    _, data = sc.pl.paga_path(
        adata, path, gene_names,
        show_node_names=False,
        ax=axs[ipath],
        ytick_fontsize=12,
        left_margin=0.15,
        n_avg=50,
        annotations=['distance'],
        show_yticks=True if ipath==0 else False,
        show_colorbar=False,
        # color_map='Greys',
        color_map='Blues',
        groups_key='clusters',
        color_maps_annotations={'distance': 'viridis'},
        title='{} path'.format(descr),
        return_data=True,
        show=False)
    data.to_csv('../output/paga_path_{}.csv'.format(descr))
plt.savefig('../output/paga_path_fibros_selected_markers_flipped.pdf')
plt.show()

## Analysis on populations 0, 1, 2, 3 and 4
We study the two trajectories: '4-3-0' and '4-3-1-2'. 

In [None]:
adata = sc.read('../data/adata_fibroblast.h5')

In [None]:
dict_fbs_colors = {  '0A: FB Ptgs2$^+$Crispl2$^+$': '#67dcf1',
                     '0B: FB Hilpda$^+$Mt2$^+$': '#702a17',
                     '1: FB Msc$^+$Itga7$^+$': '#df7935',
                     '2: FB Serpine2$^+$Col23a1$^+$': '#20668d',
                     '3: FB Cxcl1$^+$Ccl2$^+$': '#a05a2c',
                     '4: FB Rab37$^+$Col22a1$^+$': '#d40055',
                     '5: FB Clec3b$^+$Comp$^+$': '#ff5599',
                     '6: FB Cilp2$^+$Fbn1$^+$': '#8e0c6f',
                     '7: FB Cxcl12$^+$C1s1$^+$': '#ff9955',
                     '8: FB Ccn5$^+$Phldb2$^+$': '#d3bc5f',
                     '9: FB Ltbp2$^+$Chodl$^+$': '#ae54b8',
                     '10: FB Coch$^+$Emid1$^+$': '#4d0765',
                     '11: FB Cfh$^+$Spp1$^+$': '#229eb2',
                     }

In [None]:
adata.uns['subtype_colors'] = list(dict_fbs_colors.values())
adata.uns['subtype_number_colors'] = list(dict_fbs_colors.values())
adata.uns['subtype_name_colors'] = list(dict_fbs_colors.values())

In [None]:
# Select populations 0-4
adata = adata[adata.obs['subtype_number'].isin(['4', '3', '1', '2', '0'])]
sc.pp.filter_genes(adata, min_cells=5)

In [None]:
sc.pl.umap(adata, color=['subtype_number', 'batch'], legend_loc='on data')

In [None]:
# Rerun preprocessing
sc.pp.pca(adata, random_state=seed, n_comps=50)
sce.pp.harmony_integrate(adata, key='batch', max_iter_harmony=50, plot_convergence=True)
sc.pp.neighbors(adata, random_state=seed, n_neighbors=int(len(adata) ** 0.5 * 0.5), metric='cosine', use_rep='X_pca_harmony')
tk.tl.triku(adata, use_raw=False) 
sc.pp.pca(adata, random_state=seed, n_comps=50)
sce.pp.harmony_integrate(adata, key='batch', max_iter_harmony=50, plot_convergence=True)
sc.pp.neighbors(adata, random_state=seed, n_neighbors=int(len(adata) ** 0.5 * 0.5), metric='cosine', use_rep='X_pca_harmony')
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color='subtype_number', legend_loc = 'on data', alpha=0.6)

In [None]:
markers = ['Vim', 'Tgfb1', 'Tgfb2', 'Tgfbr1', 'Tgfbr3', 'Fap', 'Cd14', 'Ccn2', 'Spp1', 'Clu', 
           'Ecrg4', 'Tagln', 'Acta2', 'Cxcl1', 'Ccl2', 'Il6', 'Il1r1', 'Lif', 'Tnc', 
           'Ptgs2', 'Has1', 'Ly6c1', 'Pi16', 'Mmp2', 'Mmp3']

In [None]:
# We plot two sets of markers that have different mean expression in order to better study their expression
scale_1 = ['Vim', 'Mmp2', 'Mmp14', 'Mmp3', 'Mmp13', 'Timp3', 'Serpine2', 'Pdpn', 'Ptgs2', 'Ccn2', 'Tnc', 'Cxcl1', 'Ccl2']
scale_2 = ['Il6', 'Lif', 'Has1', 'Acta2', 'Tagln', 'Ly6c1', 'Fap', 'Il1r1', 'Tgfb1', 'Tgfb2', 'Tgfbr1']

In [None]:
for gene in scale_2: 
    print(gene)

In [None]:
markers = scale_1 + scale_2

In [None]:
sc.pl.umap(adata, color=['subtype_number'] + scale_1, use_raw=False, legend_loc='on data', legend_fontsize=14, legend_fontoutline=5)

In [None]:
sc.pl.heatmap(adata, var_names=markers, groupby='subtype_number', use_raw=False, show=False, vmax=4)
plt.savefig('../output/figures/Fibro_subtypes_markers_2023_02_03.png', dpi=400)

In [None]:
sc.pl.umap(adata, color=['subtype_number', 'Tgfbr3', 'Il1r1'], use_raw=False, legend_loc='on data', legend_fontsize=14, legend_fontoutline=5, show=False)
plt.savefig('../output/Il1r1_and_Tgfbr3.png', dpi=400)

### Generate Figure 2C
In Figure 2C, we show the PAGA graph superposed over the scatter graph. We obtain the two plots as follows:  

In [None]:
sc.tl.pca(adata)

In [None]:
sc.tl.diffmap(adata)

In [None]:
sc.tl.draw_graph(adata)

In [None]:
sc.tl.paga(adata, groups='subtype_number')

In [None]:
sc.tl.draw_graph(adata, init_pos='paga')

In [None]:
# fig, ax = plt.subplots(1, 1, figsize=(10, 10))
sc.pl.paga_compare(
    adata, threshold=0.03, title='', right_margin=0.2, size=20, edge_width_scale=0.5,
    legend_fontsize=14, fontsize=12, frameon=False, edges=True, legend_fontoutline=3,show=False)
# plt.savefig('../output/paga_graph.png', dpi=400)
# plt.savefig('../output/paga_graph.svg')

In [None]:
adata.uns['iroot'] = np.flatnonzero(adata.obs['subtype_number']  == '4')[0]

In [None]:
sc.tl.paga(adata, groups='subtype_number')

In [None]:
sc.tl.dpt(adata)

## Define trajectories

In [None]:
paths = [('4-3-0', ['4', '3', '0']),
         ('4-3-1-2', ['4', '3', '1', '2'])]

In [None]:
adata.obs['distance'] = adata.obs['dpt_pseudotime']

In [None]:
adata.obs['clusters'] = adata.obs['subtype_number']  # just a cosmetic change

In [None]:
adata.uns['clusters_colors'] = adata.uns['subtype_number_colors']

In [None]:
_, axs = plt.subplots(ncols=2, figsize=(4, 8), gridspec_kw={'wspace': 0.05, 'left': 0.12})
plt.subplots_adjust(left=0.05, right=0.98, top=0.82, bottom=0.2)
for ipath, (descr, path) in enumerate(paths):
    _, data = sc.pl.paga_path(
        adata, path, markers,
        show_node_names=False,
        ax=axs[ipath],
        ytick_fontsize=12,
        left_margin=0.15,
        n_avg=50,
        annotations=['distance'],
        show_yticks=True if ipath==0 else False,
        show_colorbar=False,
        # color_map='Greys',
        color_map='Blues',
        groups_key='clusters',
        color_maps_annotations={'distance': 'viridis'},
        title='{} path'.format(descr),
        return_data=True,
        use_raw=False,
        show=False)
    data.to_csv('../output/paga_path_{}.csv'.format(descr))
plt.savefig('../output/trajectories.pdf', bbox_inches = 'tight')
plt.savefig('../output/trajectories.png', dpi=400, bbox_inches = 'tight')

plt.show()

In [None]:
gene_names = {'4-3-1-2': ['Clec3b', 'C1s1', 'Cxcl12', 'Acta2', 'Ccl2', 'Cxcl1', 'Comp', 'Ccn2'],
              '4-3-0': ['Clec3b', 'Cxcl12', 'C1s1', 'Cxcl1', 'Acta2', 'Ccl2', 'Mt2', 'Hilpda', 'Il1rl1', 'Pdpn', 'Tnc', 'Ptgs2', 'Serpine2', 'Crispld2']}

In [None]:
for g in gene_names['4-3-0']: 
    print(g)

In [None]:
def plot_path_trajectory(trajectory):
    """
    Takes trajectory and plots heatmap of marker expression together with the
    cluster identity and the distance from the root. 
    """
    fig, ax = plt.subplots(1, 1, figsize=(2,4), gridspec_kw={'wspace': 0.05, 'left': 0.12})
    plt.subplots_adjust(left=0.05, right=0.98, top=0.82, bottom=0.2)
    descr, path = trajectory
    fig, data = sc.pl.paga_path(
            adata, path, gene_names[descr],
            show_node_names=False,
            ax=ax,
            ytick_fontsize=12,
            left_margin=0.15,
            n_avg=50,
            annotations=['distance'],
            show_colorbar=False,
            color_map='Blues',
            groups_key='clusters',
            color_maps_annotations={'distance': 'viridis'},
            title='{} path'.format(descr),
            return_data=True,
            use_raw=False, 
            show=False)
    data.to_csv(f'../output/paga_path_{descr}.csv')
    plt.savefig(f'../output/paga_path_{descr}.pdf',bbox_inches='tight')
    plt.savefig(f'../output/paga_path_{descr}.png', dpi=400, bbox_inches='tight')

###  Trajectory 4-3-0

In [None]:
plot_path_trajectory(paths[0])

###  Trajectory 4-3-1-2

In [None]:
plot_path_trajectory(paths[1])

# Umaps for specific markers
Pdpn, Saa3, Il1rl1, C3, MMP2, Acta2, Tagln, Vegfa, Pgf, Vegfc

In [None]:
adata = sc.read('../data/adata_yanling_fbs.h5ad')

In [None]:
dict_fbs_colors = {  '0A: FB Ptgs2$^+$Crispl2$^+$': '#67dcf1',
                     '0B: FB Hilpda$^+$Mt2$^+$': '#702a17',
                     '1: FB Msc$^+$Itga7$^+$': '#df7935',
                     '2: FB Serpine2$^+$Col23a1$^+$': '#20668d',
                     '3: FB Cxcl1$^+$Ccl2$^+$': '#a05a2c',
                     '4: FB Rab37$^+$Col22a1$^+$': '#d40055',
                     '5: FB Clec3b$^+$Comp$^+$': '#ff5599',
                     '6: FB Cilp2$^+$Fbn1$^+$': '#8e0c6f',
                     '7: FB Cxcl12$^+$C1s1$^+$': '#ff9955',
                     '8: FB Ccn5$^+$Phldb2$^+$': '#d3bc5f',
                     '9: FB Ltbp2$^+$Chodl$^+$': '#ae54b8',
                     '10: FB Coch$^+$Emid1$^+$': '#4d0765',
                     '11: FB Cfh$^+$Spp1$^+$': '#229eb2',
                     }

In [None]:
adata.uns['subtype_colors'] = list(dict_fbs_colors.values())
adata.uns['subtype_number_colors'] = list(dict_fbs_colors.values())
adata.uns['subtype_name_colors'] = list(dict_fbs_colors.values())

In [None]:
markers = ['Pdpn', 'Saa3', 'Il1rl1', 'C3', 'Mmp2', 'Acta2', 'Tagln', 'Vegfa', 'Pgf', 'Vegfc',
           'Tnfrsf1b', 'Ifngr1', 'Il6ra', 'Tgfbr3', 'Pdgfra', 'Cd24a', 'Dcn', 'Il6', 
           'Cxcl1', 'Cxcl2', 'Ccl2', 'Cxcl12', 'Has1', 'Has2', 'Cd44']

In [None]:
fig, axs = plt.subplots(5, 5, figsize=(15, 15))
markers = np.array(markers).reshape((5, 5))
for i in range(markers.shape[0]):
    for j in range(markers.shape[1]): 
        sc.pl.umap(adata, color=markers[i][j], ax=axs[i][j], show=False, s=20, use_raw=False)
        axs[i][j].set_title(markers[i][j])
        axs[i][j].set_xlabel('')
        axs[i][j].set_ylabel('')
plt.tight_layout()
plt.savefig('../output/Fibroblasts_selected_markers_on_UMAP.pdf')

In [None]:
markers_sep = ['C3', 'Hif1a', 'Pgam1', 'Ndrg1', 'Ptgs2', 'Pdpn', 'Saa3', 'Il1rl1', 'Acta2', 'Tagln']

In [None]:
fig, axs = plt.subplots(2, 10, figsize=(20, 4))

for j in range(len(markers_sep)):
    # WT
    sc.pl.umap(adata[adata.obs['condition'] == 'WT'], color=markers_sep[j], ax=axs[0][j], show=False, s=20, use_raw=False)
    axs[0][j].set_title(markers_sep[j])
    axs[0][j].set_xlabel('')
    axs[0][j].set_ylabel('')
    
    # KO
    sc.pl.umap(adata[adata.obs['condition'] == 'KO'], color=markers_sep[j], ax=axs[1][j], show=False, s=20, use_raw=False)
    axs[1][j].set_title(markers_sep[j])
    axs[1][j].set_xlabel('')
    axs[1][j].set_ylabel('')
    
axs[0][0].set_ylabel('WT')  
axs[1][0].set_ylabel('KO')

plt.tight_layout()
plt.savefig('../output/Fibroblasts_selected_markers_on_UMAP_KO_and_WT_separated.pdf')

In [None]:
markers_sep = ['Tgfb1', 'Tgfbr3', 'Postn', 'Tnc', 'Ccn2', 'Cd14', 'Pi16', 'Clu']

In [None]:
fig, axs = plt.subplots(2, 8, figsize=(16, 4))

for j in range(len(markers_sep)):
    # WT
    sc.pl.umap(adata[adata.obs['condition'] == 'WT'], color=markers_sep[j], ax=axs[0][j], show=False, s=20, use_raw=False)
    axs[0][j].set_title(markers_sep[j])
    axs[0][j].set_xlabel('')
    axs[0][j].set_ylabel('')
    
    # KO
    sc.pl.umap(adata[adata.obs['condition'] == 'KO'], color=markers_sep[j], ax=axs[1][j], show=False, s=20, use_raw=False)
    axs[1][j].set_title(markers_sep[j])
    axs[1][j].set_xlabel('')
    axs[1][j].set_ylabel('')
    
axs[0][0].set_ylabel('WT')  
axs[1][0].set_ylabel('KO')

plt.tight_layout()
plt.savefig('../output/Fibroblasts_more_markers_on_UMAP_KO_and_WT_separated.pdf')