In [1]:
# Load the relevant packages.
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns
import anndata as ad
from scipy import sparse
import loompy as lp

sc.settings.verbosity = 3 # Possible values: (0) errors, (1) warnings, (2) info, (3) hints
sc.settings.set_figure_params(dpi = 100, facecolor='white', fontsize=18, transparent=True)
sc.settings.set_figure_params('scvelo')

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [2]:
# Load a bunch of pyscenic modules
from pyscenic.export import add_scenic_metadata
from pyscenic.cli.utils import load_signatures
from pyscenic.utils import load_motifs
import operator as op
from cytoolz import compose
from pyscenic.transform import df2regulons
import operator as op
from IPython.display import HTML, display
from pyscenic.rss import regulon_specificity_scores
from pyscenic.plotting import plot_rss
from adjustText import adjust_text
from pyscenic.utils import modules_from_adjacencies

import json
import zlib
import base64

  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)


In [3]:
# Create the custom colourmap
import matplotlib as mpl
from copy import copy
colour_map = copy(mpl.cm.get_cmap('hot_r'))
colour_map.set_under('lightgray')
plt.rcParams['figure.figsize']= (6, 6) #rescale figures


In [4]:
# Load the processed fibroblast data
results_directory = '../data/' # Initialise the results file

In [6]:
# Load the processed fibroblast and immune data so that we can subset for just the fibroblasts
# Load the file
file_name = 'integratedfibroblastsdata.h5ad'
fibroblasts_merged_scenic = sc.read_h5ad(results_directory + file_name)

In [None]:
# Load the functional gene sets because we'll need them later
ecm_synthesis_genes = pd.read_csv(results_directory + 'fibroblast_ecm_structural_genes.csv')['0'].tolist()
ecm_modifying_genes = pd.read_csv(results_directory + 'fibroblast_ecm_modifying_genes.csv')['0'].tolist()
signal_ligands = pd.read_csv(results_directory + 'fibroblast_signal_ligands.csv')['0'].tolist()
signal_receptors = pd.read_csv(results_directory + 'fibroblast_signal_receptors.csv')['0'].tolist()

functional_genes = list(set(ecm_synthesis_genes + ecm_modifying_genes + signal_ligands + signal_receptors))

In [8]:
# Load the regulons
regulon_signatures = load_signatures(results_directory + 'reg_fib.csv')

Create regulons from a dataframe of enriched features.
Additional columns saved: []


In [9]:
# Load the SCENIC output 
scenic_output_file = 'fibroblasts_merged_scenic_output.loom'
lf = lp.connect(results_directory + scenic_output_file, mode='r+', validate=False )
exprMat = pd.DataFrame( lf[:,:], index=lf.ra.Gene, columns=lf.ca.CellID).T
auc_mtx = pd.DataFrame( lf.ca.RegulonsAUC, index=lf.ca.CellID)
lf.close()

In [None]:
# Add the SCENIC metadata
add_scenic_metadata(fibroblasts_merged_scenic, auc_mtx, regulon_signatures)

# Cluster the fibroblasts based on AUCell scores

In [None]:
# k = 30 looks about reasonable
sc.pp.neighbors(fibroblasts_merged_scenic, use_rep='X_aucell', n_neighbors=15)
sc.tl.umap(fibroblasts_merged_scenic)
sc.pl.umap(fibroblasts_merged_scenic, color='leiden_sub')

In [None]:
sc.tl.leiden(fibroblasts_merged_scenic, resolution=0.6, key_added = 'leiden_scenic')

In [None]:
new_scenic_cluster_names = ['REG-1', 'REG-2', 'REG-3', 'REG-4', 'REG-5',\
                            'REG-6', 'REG-7', 'REG-8', 'REG-9', 'REG-10',
                            'REG-11', 'REG-12']
fibroblasts_merged_scenic.rename_categories('leiden_scenic', new_scenic_cluster_names)

In [None]:
# Plot so that we store the new colours
sc.pl.umap(fibroblasts_merged_scenic, size=10.0, color='leiden_scenic', palette='Paired')

In [None]:
# Calculate the DEGs to work out the later intersections
sc.tl.rank_genes_groups(fibroblasts_merged_scenic, groupby='leiden_scenic', key_added='leiden_scenic', method='wilcoxon')

# Overlap between regulon-based clusters and original annotations

In [None]:
regulon_clusters = fibroblasts_merged_scenic.obs['leiden_scenic'].value_counts().index.tolist()
fibroblast_clusters = fibroblasts_merged_scenic.obs['leiden_sub'].value_counts().index.tolist()
fibroblast_samples = ['UW P21', 'UW P49', 'SW PWD4', 'SW PWD7', 'LW PWD12', 'LW PWD14', 'LW FIB PWD18', 'LW REG PWD18']
fibroblast_positions = fibroblasts_merged_scenic.obs['position'].value_counts().index.tolist()

In [None]:
# Calculate the Jaccard scores
fibroblast_jaccard_regulon_clusters = np.zeros((len(regulon_clusters), len(fibroblast_clusters)))
fibroblast_jaccard_regulon_samples = np.zeros((len(regulon_clusters), len(fibroblast_samples)))

In [None]:
from itertools import product

In [None]:
# Calculate the Jaccard index and cluster proportions per sample
for regulon, cluster in product(regulon_clusters, fibroblast_clusters):
    
    regulon_index = regulon_clusters.index(regulon)
    cluster_index = fibroblast_clusters.index(cluster)
    
    num_cells_in_both = fibroblasts_merged_scenic[(fibroblasts_merged_scenic.obs['leiden_sub'].isin([cluster]))\
                                           &(fibroblasts_merged_scenic.obs['leiden_scenic'].isin([regulon]))].n_obs
    
    num_cells_in_regulon = fibroblasts_merged_scenic[(fibroblasts_merged_scenic.obs['leiden_scenic'].isin([regulon]))].n_obs
    num_cells_in_cluster = fibroblasts_merged_scenic[(fibroblasts_merged_scenic.obs['leiden_sub'].isin([cluster]))].n_obs
    
    fibroblast_jaccard_regulon_clusters[regulon_index, cluster_index] = num_cells_in_both \
                                                                / (num_cells_in_regulon + num_cells_in_cluster - num_cells_in_both)


In [None]:
# Calculate the Jaccard index and cluster proportions per sample
for regulon, cluster in product(regulon_clusters, fibroblast_samples):
    
    regulon_index = regulon_clusters.index(regulon)
    cluster_index = fibroblast_samples.index(cluster)
    
    num_cells_in_both = fibroblasts_merged_scenic[(fibroblasts_merged_scenic.obs['sample'].isin([cluster]))\
                                           &(fibroblasts_merged_scenic.obs['leiden_scenic'].isin([regulon]))].n_obs
    
    num_cells_in_regulon = fibroblasts_merged_scenic[(fibroblasts_merged_scenic.obs['leiden_scenic'].isin([regulon]))].n_obs
    num_cells_in_cluster = fibroblasts_merged_scenic[(fibroblasts_merged_scenic.obs['sample'].isin([cluster]))].n_obs
    
    fibroblast_jaccard_regulon_samples[regulon_index, cluster_index] = num_cells_in_both \
                                                                / (num_cells_in_regulon + num_cells_in_cluster - num_cells_in_both)
    

In [None]:
fibroblast_jaccard_df = pd.DataFrame(fibroblast_jaccard_regulon_clusters, index=regulon_clusters, columns=fibroblast_clusters)

sns.set_theme(style="ticks");

ax = sns.relplot(data=fibroblast_jaccard_df.T,
            linewidth=2.5,
            palette=fibroblasts_merged_scenic.uns['leiden_scenic_colors'],
            kind='line', alpha=0.75)
plt.xlabel('Fibroblast subcluster')
plt.ylabel('Jaccard similarity')
plt.ylim((0, 0.85))
plt.yticks([0, 0.2, 0.4, 0.6, 0.8])
plt.xticks(rotation=90)

In [None]:
fibroblast_jaccard_df = pd.DataFrame(fibroblast_jaccard_regulon_samples, index=regulon_clusters, columns=fibroblast_samples)

sns.set_theme(style="ticks");

ax = sns.relplot(data=fibroblast_jaccard_df.T,
            linewidth=2.5,
            palette=fibroblasts_merged_scenic.uns['leiden_scenic_colors'],
            kind='line', alpha=0.75)
plt.xlabel('Wound healing time')
plt.ylabel('Jaccard similarity')
plt.ylim((0, 0.85))
plt.yticks([0, 0.2, 0.4, 0.6, 0.8])
plt.xticks(rotation=90)


# Calculate the intersection between the downstream targets of the top regulons and their DEGs

In [None]:
fib_functional_scenic_clusters = []
fib_functional_regulons_for_deg = []
fib_functional_scenic_degs = []

other_functional_scenic_clusters = []
other_functional_regulons_for_deg = []
other_functional_scenic_degs = []
n_top_genes = 300

for cluster in fibroblast_scenic_clusters:
    top_genes_per_cluster = pd.DataFrame(fibroblasts_merged_scenic.uns['leiden_scenic']['names'])[cluster][0:n_top_genes].tolist()
    top_genes = [gene for gene in top_genes_per_cluster if not gene.startswith('Rp') and not gene.startswith('mt-')] # Remove the non-informative genes
    top_fib_functional_genes = [gene for gene in top_genes if gene in functional_genes]
    top_other_functional_genes = [gene for gene in top_genes if gene not in functional_genes]
    regulons_and_targets_per_cluster = regulon_targets_for_reg_clusters[cluster]
    
    for gene in top_fib_functional_genes:
        
        relevant_regulons = []
        
        for reg in regulons_and_targets_per_cluster:
            
            targets = regulon_targets_for_reg_clusters[cluster][reg][0].genes
            
            if gene in targets:
                
                relevant_regulons.append(reg)
                
        joined_regulons = ', '.join(sorted(relevant_regulons))
        
        fib_functional_scenic_clusters.append(cluster)
        fib_functional_scenic_degs.append(gene)
        fib_functional_regulons_for_deg.append(joined_regulons)
        
    for gene in top_other_functional_genes:
        
        relevant_regulons = []
        
        for reg in regulons_and_targets_per_cluster:
            
            targets = regulon_targets_for_reg_clusters[cluster][reg][0].genes
            
            if gene in targets:
                
                relevant_regulons.append(reg)
                
        joined_regulons = ', '.join(sorted(relevant_regulons))
        
        other_functional_scenic_clusters.append(cluster)
        other_functional_scenic_degs.append(gene)
        other_functional_regulons_for_deg.append(joined_regulons)
        
scenic_fib_functional_regulons_df = pd.DataFrame(data={'Cluster': fib_functional_scenic_clusters,
                                       'Regulon': fib_functional_regulons_for_deg,
                                       'Target': fib_functional_scenic_degs})

scenic_other_functional_regulons_df = pd.DataFrame(data={'Cluster': fib_other_scenic_clusters,
                                       'Regulon': fib_other_functional_regulons_for_deg,
                                       'Target': fib_other_functional_scenic_degs})


In [None]:
# Get the DEGs for a specific cluster
cluster = 'REG-3'
n_top_genes = 50
scenic_fib_functional_regulons_df[scenic_fib_functional_regulons_df['Cluster'] == cluster][0:50]

In [None]:
scenic_other_functional_regulons_df[scenic_other_functional_regulons_df['Cluster'] == cluster][0:50]

In [None]:
# We need to check that the DEGs are actually specific to that cluster
for gene in scenic_fib_functional_regulons_df[scenic_fib_functional_regulons_df['Cluster'] == cluster][0:50]['Target']:
    sc.pl.umap(fibroblasts_merged, color=gene, cmap=colour_map, vmin=0.0001, size=10)

In [None]:
# We need to check that the DEGs are actually specific to that cluster
for gene in scenic_other_functional_regulons_df[scenic_other_functional_regulons_df['Cluster'] == cluster][0:50]['Target']:
    sc.pl.umap(fibroblasts_merged, color=gene, cmap=colour_map, vmin=0.0001, size=10)