In [None]:
# Where we will load the h5ad file
results_directory = '../data/'

In [None]:
# Load the relevant packages.
# First load the packages.
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns

sc.settings.verbosity = 3 # Possible values: (0) errors, (1) warnings, (2) info, (3) hints
sc.settings.set_figure_params(dpi = 100, facecolor='white', fontsize=18, transparent=True)

We also define a custom colour map for visualisation of marker genes.

In [1]:
# Create the custom colourmap
import matplotlib as mpl
from copy import copy
colour_map = copy(mpl.cm.get_cmap('hot_r'))
colour_map.set_under('lightgray')

In [None]:
# Load the data
file_name = 'integratedfibroblastsdata.h5ad'
fibroblasts_merged = sc.read_h5ad(results_directory + file_name)

# Analysis of cell cycle phases

We follow the standard analysis for cell cycle phase inference as suggested by the Scanpy and Seurat developers [here](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.score_genes_cell_cycle.html). That is, we consider "known" markers of S phase and G2M phase progression and calculate gene scores for each fibroblast.

In [None]:
# Score the FIB RNA population on cell cycle phase
cell_cycle_genes = pd.read_csv(results_directory + 'regev_lab_cell_cycle_genes.txt', header=None)[0].tolist()
cell_cycle_genes = list(map(lambda x: x[0] + x[1:].lower(), cell_cycle_genes)) # Convert these into mouse symbols (roughly)
s_genes = cell_cycle_genes[:43] # Pulled from Scanpy example
g2m_genes = cell_cycle_genes[43:] # Pulled from Scanpy example

# Filter out genes not in the list of variables
cell_cycle_genes = [x for x in cell_cycle_genes if x in fibroblasts_merged.var_names]
s_genes = [x for x in s_genes if x in fibroblasts_merged.var_names]
g2m_genes = [x for x in g2m_genes if x in fibroblasts_merged.var_names]

In [None]:
sc.tl.score_genes(fibroblasts_merged, gene_list=cell_cycle_genes, score_name='Cell cycle')
sc.tl.score_genes_cell_cycle(fibroblasts_merged, s_genes=s_genes, g2m_genes=g2m_genes)

Plotting the cell cycle gene score shows there are ``patches`` of high cell cycle gene expression, which may indicate proliferative activity.

In [None]:
sc.pl.umap(fibroblasts_merged, color='Cell Cycle', size=10, cmap=colour_map, vmin=0.0001)

Plotting the phases as well suggests that cells with high cell cycle scores tend to be in G2M or S phase, i.e. these cells are progressing through the cell cycle.

In [None]:
# Define colours for the phase colours (for illustrative purposes)
cell_cycle_phase_colours = ['#3D96AD', '#FAEC5D', '#FA669A']
fibroblasts_merged.uns['phase_colors'] = cell_cycle_phase_colours

In [None]:
sc.pl.umap(fibroblasts_merged, color='phase', size=10)

Plot the proportions of each fibroblast subcluster that are in each cell cycle phase

In [None]:
fibroblasts_merged_df = fibroblasts_merged.obs

tmp = pd.crosstab(fibroblasts_merged_df['sample'], fibroblasts_merged_df['phase'], normalize=False)
tmp = tmp.reindex(sample_order)

axis = tmp.plot.bar(stacked=True, width=0.9, grid=False, figsize=(6,10), linewidth=1.0, color=cell_cycle_phase_colours)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
fig = axis.get_figure()

Of the above subclusters, FIB-I, FIB-IV, FIB-VI, and FIB-X seem to contain proliferating subsets.

We now look at what proportions of each subclusters are contributing to proliferative activity over wound healing time.

In [None]:
fibroblast_clusters = leiden_clusters.value_counts().index.tolist()
sample_order = ['UW P21', 'UW P49', 'SW PWD4', 'SW PWD7', 'LW PWD12', 'LW PWD14', 'LW FIB PWD18', 'LW REG PWD18']

In [None]:
cell_cycle_phase_contributions = np.zeros((len(fibroblast_clusters), len(sample_order)))

In [None]:
for sample in sample_order:
    
    sample_index = sample_order.index(sample)
    
    
    fibroblast_cycling_subset = fibroblasts_merged[(fibroblasts_merged.obs['sample'] == sample)\
                                                    &(fibroblasts_merged.obs['phase'].isin(['G2M']))]
        
    g2m_s_contributions = fibroblast_cycling_subset.obs['leiden_sub'].value_counts(normalize='index').reindex(fibroblast_clusters).to_numpy()
    
    cell_cycle_phase_contributions[~np.isnan(g2m_s_contributions), sample_index] = g2m_s_contributions[~np.isnan(g2m_s_contributions)]
    

In [None]:
# Create a dataframe to make it easier to plot these contributions
cycling_contributions_df = pd.DataFrame(cell_cycle_phase_contributions, index=fibroblast_clusters, columns=sample_order)

In [None]:
sns.set_theme(style="ticks")

ax = sns.relplot(data=cycling_proportions_df.T,
            linewidth=2.5,
            palette=fibroblasts_merged.uns['leiden_sub_colors'].tolist(),
            kind='line', alpha=0.75)
plt.ylim((0, 0.2))
plt.yticks([0, 0.05, 0.1, 0.15, 0.2])
plt.xticks(rotation=90)

# Plotting the results from CellChat inference

In [None]:
# Load the CCC activities
pwd4_communications = pd.read_csv(results_directory + 'integratedfibroblasts_communications_dividing_swpwd4.csv')
pwd7_communications = pd.read_csv(results_directory + 'integratedfibroblasts_communications_dividing_swpwd7.csv')
pwd12_communications = pd.read_csv(results_directory + 'integratedfibroblasts_communications_dividing_lwpwd12.csv')
pwd18_communications = pd.read_csv(results_directory + 'integratedfibroblasts_communications_dividing_lwpwd18.csv')

# Filter based on targets (we know these are the proliferative subclusters at the relevant timepoints)
pwd4_communications = pwd4_communications[pwd4_communications['target'] == 'FIB-IV']
pwd7_communications = pwd7_communications[pwd7_communications['target'] == 'FIB-IX']
pwd12_communications = pwd12_communications[pwd12_communications['target'] == 'FIB-I']
pwd18_communications = pwd18_communications[pwd18_communications['target'] == 'FIB-VI']

all_communications = [pwd4_communications, pwd7_communications, pwd12_communications, pwd18_communications]


Collate all of the information together by pathway.

In [None]:
significant_pathways = {}

for i in range(len(all_communications)):
    communications = all_communications[i]
    time = timepoints[i]
    fibroblast_subcluster = relevant_clusters[i]
    
    for index, row in communications.iterrows():
        pathway_name = row['pathway_name']
        cluster_A = row['source']
        cluster_B = row['target']
        score_AB = row['prob']
        ligand_A = row['ligand']
        receptor_B = row['receptor']
        
        relevant_pathway = pathway_name
            
        if relevant_pathway in significant_pathways:
            significant_pathways[relevant_pathway][i] += score_AB
        else:
            pathway_scores = np.zeros(len(all_communications))
            pathway_scores[i] += score_AB
            significant_pathways[relevant_pathway] = pathway_scores


We now classify which pathways are universally present and which are dynamically present, i.e. on and off at different times. 

In [None]:
activity_threshold = 0.05 # This was picked to get "cleaner" results
universal_pathways = []
dynamic_pathways = []
for pathway in significant_pathways:
    
    pathway_activities = significant_pathways[pathway]
    
    if pathway_activities.max() > activity_threshold:
        
        nonzero_times = len(pathway_activities.nonzero()[0])

        if nonzero_times == len(all_communications):
            universal_pathways.append(pathway)
        else:
            dynamic_pathways.append(pathway)
            
universal_pathways_df = pd.DataFrame(data={pathway:significant_pathways[pathway] for pathway in universal_pathways})
dynamic_pathways_df = pd.DataFrame(data={pathway:significant_pathways[pathway] for pathway in dynamic_pathways})

Plot the results.

In [None]:
sns.set_theme(style="ticks")

ax = sns.relplot(data=universal_pathways_df,
            linewidth=2.5,
            kind='line', alpha=0.75);
plt.xticks(rotation=45, ticks=[0, 1, 2, 3], labels=timepoints);
plt.ylim(0, 0.2)
plt.yticks(ticks=np.arange(0, 0.25, 0.05))

In [None]:
sns.set_theme(style="ticks")

ax = sns.relplot(data=dynamic_pathways_df,
            linewidth=2.5,
            kind='line', alpha=0.75);
plt.xticks(rotation=45, ticks=[0, 1, 2, 3], labels=timepoints);
plt.ylim(0, 0.45)
plt.yticks(ticks=np.arange(0, 0.5, 0.1))