In [None]:
# Where we will load the h5ad file
results_directory = '../data/'

In [None]:
# Load the relevant packages.
# First load the packages.
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns
from itertools import product

sc.settings.verbosity = 3 # Possible values: (0) errors, (1) warnings, (2) info, (3) hints
sc.settings.set_figure_params(dpi = 100, facecolor='white', fontsize=18, transparent=True)

We also define a custom colour map for visualisation of marker genes.

In [None]:
# Create the custom colourmap
import matplotlib as mpl
from copy import copy
colour_map = copy(mpl.cm.get_cmap('hot_r'))
colour_map.set_under('lightgray')

In [None]:
# Load the data
file_name = 'integratedfibroblastsdata.h5ad'
fibroblasts_merged = sc.read_h5ad(results_directory + file_name)

In [None]:
# Defining these variables for use later
sample_order = ['UW P21', 'UW P49', 'SW PWD4', 'SW PWD7', 'LW PWD12', 'LW PWD14', 'LW FIB PWD18', 'LW REG PWD18']
fibroblast_clusters = fibroblasts_merged.obs['leiden_sub'].value_counts().index.tolist()

Plot the UMAP of clusters just to make sure colours have been initialised.

In [None]:
sc.pl.umap(fibroblasts_merged, color='leiden_sub')

# Marker genes

Plot discriminatory marker genes using a version of a tracks plot, where we calculate the average binned expression (ten bins) per subcluster.

In [None]:
fibroblast_marker_genes_tracksplot = {'FIB-I': ['Crabp1', 'Col7a1',],
                     'FIB-II': ['Col14a1', 'Mgp'],
                     'FIB-III': ['Pcolce2', 'Ndufa4l2'],
                     'FIB-IV': ['Plac8', 'Ptx3'],
                     'FIB-V':['Tyrobp', 'Lyz2'],
                     'FIB-VI': ['Col5a3', 'Prss23'],
                     'FIB-VII': ['Igfbp2', 'Megf6'],
                    'FIB-VIII': ['Igfbp3', 'Bmp4'],
                     'FIB-IX': ['Coch', 'Dkk2'],
                     'FIB-X': ['Lgals7', 'S100a14'],
                     'FIB-XI': ['Nr2f2', 'Cldn1']}

keys = ['leiden_sub']

for category in fibroblast_marker_genes_tracksplot:
    genes = fibroblast_marker_genes_tracksplot[category]
    
    for gene in genes:
        if gene not in keys:
            keys.append(gene)
            
groupby_colors = fibroblasts_merged.uns['leiden_sub_colors'] # If this doesn't work

In [None]:
# Prepare the data for the tracksplot
obs_tidy = sc.get.obs_df(fibroblasts_merged, keys=keys, use_raw=True)
obs_tidy['leiden_sub'] = pd.Categorical(obs_tidy['leiden_sub'],categories=categories,ordered=True)
obs_tidy = obs_tidy.sort_values('leiden_sub',ascending=True)
tracksplot_genes = list(obs_tidy.columns[1:])

# Create the trackplot
nbins = 10

# obtain the start and end of each category and make
# a list of ranges that will be used to plot a different
# color
cumsum = [0] + list(np.cumsum(obs_tidy['leiden_sub'].value_counts(sort=False)))
x_values = [(x, y) for x, y in zip(cumsum[:-1], cumsum[1:])]

dendro_height = 0

groupby_height = 0.24
# +2 because of dendrogram on top and categories at bottom
num_rows = len(tracksplot_genes) + 2
width = 12
track_height = 0.25

height_ratios = [dendro_height] + [track_height] * len(tracksplot_genes) + [groupby_height]
height = 2*sum(height_ratios)

obs_tidy = obs_tidy.T

fig = plt.figure(figsize=(width, height))
axs = gridspec.GridSpec(
    ncols=2,
    nrows=num_rows,
    wspace=1.0 / width,
    hspace=0,
    height_ratios=height_ratios,
    width_ratios=[width, 0.14],
)
axs_list = []
first_ax = None
for idx, var in enumerate(tracksplot_genes):
    ax_idx = idx + 1  # this is because of the dendrogram
    if first_ax is None:
        ax = fig.add_subplot(axs[ax_idx, 0])
        first_ax = ax
    else:
        ax = fig.add_subplot(axs[ax_idx, 0], sharex=first_ax)
    axs_list.append(ax)
    for cat_idx, category in enumerate(categories):
        x_start, x_end = x_values[cat_idx]
        expression_values = np.sort(obs_tidy.iloc[idx + 1, x_start:x_end].to_numpy()) # Get the expression_values
        average_expressions = np.zeros(nbins)

        num = int(np.floor(np.size(expression_values)/nbins))

        for ave_idx in range(nbins):
            if ave_idx < nbins - 1:
                average_expressions[ave_idx] = np.mean(expression_values[num*(ave_idx):num*(1 +ave_idx)])
            else:
                average_expressions[ave_idx] = np.mean(expression_values[num*(ave_idx):])

        ax.fill_between(
            range(cat_idx*10, (cat_idx + 1)*10),
            0,
            average_expressions,
            lw=0.1,
            color=groupby_colors[cat_idx],
        )

    # remove the xticks labels except for the last processed plot.
    # Because the plots share the x axis it is redundant and less compact
    # to plot the axis for each plot
    if idx < len(tracksplot_genes) - 1:
        ax.tick_params(labelbottom=False, labeltop=False, bottom=False, top=False)
        ax.set_xlabel('')
#     if log:
#         ax.set_yscale('log')
    ax.spines['left'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.grid(False)
    ymin, ymax = ax.get_ylim()
    ymax = int(ymax)
    ax.set_yticks([ymax])
    ax.set_yticklabels([str(ymax)], ha='left', va='top')
    ax.spines['right'].set_position(('axes', 1.01))
    ax.tick_params(
        axis='y',
        labelsize='x-small',
        right=True,
        left=False,
        length=2,
        which='both',
        labelright=True,
        labelleft=False,
        direction='in',
    )
    ax.set_ylabel(var, rotation=0, fontsize='small', ha='right', va='bottom')
    ax.yaxis.set_label_coords(-0.005, 0.1)
# ax.set_xlim(0, x_end
ax.set_xlim(0, len(categories)*10)
ax.tick_params(axis='x', bottom=False, labelbottom=False)

# the ax to plot the groupby categories is split to add a small space
# between the rest of the plot and the categories
axs2 = gridspec.GridSpecFromSubplotSpec(
    2, 1, subplot_spec=axs[num_rows - 1, 0], height_ratios=[1, 1]
)

groupby_ax = fig.add_subplot(axs2[1])


# Upper and lower dermis markers

Define the upper and lower dermis markers based on previous reportings in the literature.

In [None]:
upper_markers = ['Dpp4', 'Prdm1', 'Lef1', 'Prss35', 'Runx1', 'Crabp1', 'Fabp5']
lower_markers = ['Dlk1', 'Ly6a', 'Fmo1', 'Cnn1', 'Mest']

In [None]:
sc.tl.score_genes(fibroblasts_merged, gene_list=upper_markers, score_name='Upper')
sc.tl.score_genes(fibroblasts_merged, gene_list=lower_markers, score_name='Lower')

Plot the split violin plot to see whether clusters can be classified as consisting predominantly of "upper" or "lower" fibroblasts.

In [None]:
plt.rcParams['figure.figsize']=(8, 6) #rescale figures
df = sc.get.obs_df(fibroblasts_merged, ['Upper', 'Lower', 'leiden_sub'])
df = df.set_index('leiden_sub').stack().reset_index()
df.columns = ['leiden_sub', 'Position', 'Score']
import seaborn as sns
sns.violinplot(data=df, x='leiden_sub', y='Score', hue="Position",
                split=True, inner="quartile", rotation=90, linewidth=1.5)

Upper dermal markers.

In [None]:
plt.rcParams['figure.figsize']=(6, 6) #rescale figures
sc.pl.umap(fibroblasts_merged, color='Upper', size=10, cmap=colour_map, vmin=0.0001)

Lower dermal markers.

In [None]:
sc.pl.umap(fibroblasts_merged, color='Lower', size=10, cmap=colour_map, vmin=0.0001)

# Compositional changes over time

Plot the subcluster composition for each wound healing condition.

In [None]:
fibroblast_merged_df = fibroblasts_merged.obs

tmp = pd.crosstab(fibroblast_merged_df['sample'], fibroblast_merged_df['leiden_sub'], normalize='index')
tmp = tmp.reindex(sample_order)

In [None]:
axis = tmp.plot.bar(stacked=True, width=0.9, grid=False, figsize=(6,10), linewidth=1.0, color=fibroblasts_merged.uns['leiden_sub_colors'].tolist())
# axis.invert_yaxis()
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
fig = axis.get_figure()

We now calculate the Bray-Curtis "similarity" to quantify the compositional differences between wound healing times.

In [None]:
fibroblast_sample_braycurtis =  np.zeros((len(sample_order), len(sample_order)))

In [None]:
# Calculate the Bray-Curtis dissimilarities between samples
for sample1, sample2 in product(sample_order, sample_order):
    
    sample1_index = sample_order.index(sample1)
    sample2_index = sample_order.index(sample2)
    
    fibroblasts_sample1 = fibroblasts_merged[(fibroblasts_merged.obs['sample'].isin([sample1]))]
    fibroblasts_sample2 = fibroblasts_merged[(fibroblasts_merged.obs['sample'].isin([sample2]))]
    
    num_cells_in_sample1 = fibroblasts_sample1.n_obs
    num_cells_in_sample2 = fibroblasts_sample2.n_obs
    
    num_cells_in_both = 0
    
    # Get the cluster counts for each sample
    sample1_counts = fibroblasts_sample1.obs['leiden_sub'].value_counts()
    sample2_counts = fibroblasts_sample2.obs['leiden_sub'].value_counts()
    
    # We take the sum of the minimum number of cells for each cluster present in both samples
    num_cells_in_both = np.sum([np.min([sample1_counts[index], sample2_counts[index]]) for index in sample1_counts.index.intersection(sample2_counts.index).tolist()])

    # This stupid indexing ensures that after plotting, UW P21 is the bottom row and LW REG PWD18 is the top row.
    fibroblast_sample_braycurtis[len(sample_order) - 1 - sample1_index, sample2_index] = 1.0 - 2.0 * ( (num_cells_in_both)\
                                                                / (num_cells_in_sample1 + num_cells_in_sample2) )


Plot the Bray-Curtis similarity matrix as a reflected lower triangular matrix. We don't need to plot above the diagonal as the matrix is symmetric

In [None]:
# Generate a mask for the upper triangle
mask = np.rot90(np.triu(np.ones_like(fibroblast_sample_braycurtis, dtype=bool), k = 1))

sns.heatmap(fibroblast_sample_braycurtis,
           mask = mask, # This blocks the upper triangular matrix
           vmin=0, vmax=1.0,
           yticklabels = sample_order[::-1], 
           xticklabels = sample_order,
           square=True,
           cmap = 'RdYlBu',
           linewidths=0.0,
           cbar_kws={"shrink": 0.5})

Calculate the Jaccard similarities to measure the overlap between wound healing time labels and fibroblast subcluster labels.

In [None]:
fibroblast_cluster_sample_jaccard = np.zeros((len(sample_order), len(fibroblast_clusters)))

In [None]:
# Calculate the Jaccard index and cluster proportions per sample
for sample, cluster in product(sample_order, fibroblast_clusters):
    
    sample_index = sample_order.index(sample)
    cluster_index = fibroblast_clusters.index(cluster)
    
    num_cells_in_both = fibroblasts_merged[(fibroblasts_merged.obs['leiden_sub'].isin([cluster]))\
                                           &(fibroblasts_merged.obs['sample'].isin([sample]))].n_obs
    
    num_cells_in_sample = fibroblasts_merged[(fibroblasts_merged.obs['sample'].isin([sample]))].n_obs
    num_cells_in_cluster = fibroblasts_merged[(fibroblasts_merged.obs['leiden_sub'].isin([cluster]))].n_obs
    
    fibroblast_cluster_sample_jaccard[sample_index, cluster_index] = num_cells_in_both \
                                                                / (num_cells_in_sample + num_cells_in_cluster - num_cells_in_both)
    
    # Why the hell did I do this stupid indexing. Oh I remember now, I wanted U21 at the bottom row and LW REG PWD18 as the top row.
    fibroblast_cluster_proportions[len(sample_order) - 1 - sample_index, cluster_index] = num_cells_in_both / num_cells_in_cluster
    

Define the Jaccard similarity matrix as a dataframe to make it easier to plot using `sns.relplot`

In [None]:
fibroblast_jaccard_df = pd.DataFrame(fibroblast_cluster_sample_jaccard.T, index=fibroblast_clusters, columns=sample_order)

In [None]:
sns.set_theme(style="ticks")

ax = sns.relplot(data=fibroblast_jaccard_df.T,
            linewidth=2.5,
            palette=fibroblasts_merged.uns['leiden_sub_colors'].tolist(),
            kind='line', alpha=0.75)
plt.ylim((0, 0.75))
plt.yticks([0, 0.25, 0.5, 0.75])
plt.xticks(rotation=45)