In [None]:
import sys
import os
import scanpy as sc
import anndata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

import cell2location
import scvi

from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs

We load the results generated in the previous script

In [None]:
root_path = os.getcwd()

In [None]:
results_folder = os.path.join(root_path, 'deconvolution')
ref_run_name =  os.path.join(results_folder, 'reference_signatures') 
run_name = os.path.join(results_folder, 'cell2location_map')  

In [None]:
adata_file = f"{run_name}/sp.h5ad"
adata_vis = sc.read_h5ad(adata_file)
# mod = cell2location.models.Cell2location.load(f"{run_name}", adata_vis)

# Identifying discrete tissue regions by Leiden clustering

In [None]:
# compute KNN using the cell2location output stored in adata.obsm
sc.pp.neighbors(adata_vis, use_rep='q05_cell_abundance_w_sf',
                n_neighbors = 15)

# Cluster spots into regions using scanpy
sc.tl.leiden(adata_vis, resolution=0.6)

# add region as categorical variable
adata_vis.obs["region_cluster"] = adata_vis.obs["leiden"].astype("category")

In [None]:
adata_vis.obs

In [None]:
# compute UMAP using KNN graph based on the cell2location output
sc.tl.umap(adata_vis, min_dist = 0.5, spread = 1.5)

# show regions in UMAP coordinates
with mpl.rc_context({'axes.facecolor':  'white',
                     'figure.figsize': [8, 8]}):
    sc.pl.umap(adata_vis, color=['region_cluster'], size=30,
               color_map = 'RdPu', ncols = 2, legend_loc='on data',
               legend_fontsize=20)
    sc.pl.umap(adata_vis, color=['Sample_ID'], size=30,
               color_map = 'RdPu', ncols = 2,
               legend_fontsize=20)
    sc.pl.umap(adata_vis, color=['Gender'], size=30,
               color_map = 'RdPu', ncols = 2,
               legend_fontsize=20)
    sc.pl.umap(adata_vis, color=['Condition'], size=30,
               color_map = 'RdPu', ncols = 2,
               legend_fontsize=20)
# plot in spatial coordinates
# with mpl.rc_context({'axes.facecolor':  'black',
#                     'figure.figsize': [4.5, 5]}):
#   sc.pl.spatial(adata_vis, color=['region_cluster'],
#                 size=1.3, img_key='hires', alpha=0.5)

In [None]:
clusters_colors = dict(
    zip([str(i) for i in range(20)], adata_vis.uns["region_cluster_colors"])
)

In [None]:
for i, library in enumerate(
   adata_vis.obs["Sample_ID"].unique().tolist()
):
    ad = adata_vis[adata_vis.obs.library_id == library, :].copy()
    print(library)
    print(ad.obs['Condition'].unique()[0])
    sc.pl.spatial(
        ad,
        img_key="hires",
        library_id=library,
        color="region_cluster",
        size=1.5,
        palette=[
            v
            for k, v in clusters_colors.items()
            if k in ad.obs.region_cluster.unique().tolist()
        ])

In [None]:
sample_cluster_counts_sample = adata_vis.obs.groupby(['Sample_ID', 'region_cluster']).size().unstack(fill_value=0)

In [None]:
sample_cluster_counts_sample

In [None]:
with mpl.rc_context({'figure.figsize': [4.5, 5]}):
    ax = sample_cluster_counts_sample.plot(kind='bar', stacked=True, figsize=(10, 6))
    plt.show() 

In [None]:
sample_cluster_counts_condition = adata_vis.obs.groupby(['Condition', 'region_cluster']).size().unstack(fill_value=0)

In [None]:
sample_cluster_counts_condition

In [None]:
with mpl.rc_context({'figure.figsize': [4.5, 5]}):
    ax = sample_cluster_counts_condition.plot(kind='bar', stacked=True, figsize=(10, 6))
    plt.show() 

In [None]:
sample_cluster_counts_sex = adata_vis.obs.groupby(['Gender', 'region_cluster']).size().unstack(fill_value=0)

In [None]:
sample_cluster_counts_sex

In [None]:
with mpl.rc_context({'figure.figsize': [4.5, 5]}):
    ax = sample_cluster_counts_condition.plot(kind='bar', stacked=True, figsize=(10, 6))
    plt.show() 

The variablity is too large between samples. Use proportions rather than abundances? 

In [None]:
# sc.pp.normalize_total(adata_vis, inplace=True)
# sc.pp.log1p(adata_vis)
# sc.tl.rank_genes_groups(adata_vis, groupby='region_cluster', method='t-test')

In [None]:
# sc.pl.rank_genes_groups(adata_vis, n_genes = 10, groups = ['5','10'])

# Identifying cellular compartments / tissue zones using matrix factorisation (NMF)

In [None]:
# adata_subset_treated = adata_vis[adata_vis.obs['CONDITION'] == 'FAP_LTBR'].copy()
# adata_subset_untreated = adata_vis[adata_vis.obs['CONDITION'] == 'Untreated'].copy()

In [None]:
# from cell2location import run_colocation
# res_dict_treated, adata_subset_treated = run_colocation(
#    adata_subset_treated,
#    model_name='CoLocatedGroupsSklearnNMF',
#    train_args={
#      'n_fact': np.arange(5, 21), # IMPORTANT: use a wider range of the number of factors (5-30)
#      'sample_name_col': 'readout_id', # columns in adata_vis.obs that identifies sample
#      'n_restarts': 3 # number of training restarts
#    },
    # the hyperparameters of NMF can be also adjusted:
#    model_kwargs={'alpha': 0.01, 'init': 'random', "nmf_kwd_args": {"tol": 0.000001}},
#    export_args={'path': f'{run_name}/CoLocatedComb_treated/'}
#)

In [None]:
# from cell2location import run_colocation
# res_dict_untreated, adata_subset_untreated = run_colocation(
#    adata_subset_untreated,
#    model_name='CoLocatedGroupsSklearnNMF',
#    train_args={
#      'n_fact': np.arange(5, 21), # IMPORTANT: use a wider range of the number of factors (5-30)
#      'sample_name_col': 'readout_id', # columns in adata_vis.obs that identifies sample
#      'n_restarts': 3 # number of training restarts
#    },
#    # the hyperparameters of NMF can be also adjusted:
#    model_kwargs={'alpha': 0.01, 'init': 'random', "nmf_kwd_args": {"tol": 0.000001}},
#    export_args={'path': f'{run_name}/CoLocatedComb_untreated/'}
#)

In [None]:
# res_dict_treated['n_fact5']['mod'].plot_cell_type_loadings()
# res_dict_untreated['n_fact5']['mod'].plot_cell_type_loadings()
# plt.show() 

In [None]:
# res_dict_treated['n_fact8']['mod'].plot_cell_type_loadings()
# res_dict_untreated['n_fact8']['mod'].plot_cell_type_loadings()
# plt.show() 

In [None]:
# res_dict_treated['n_fact12']['mod'].plot_cell_type_loadings()
# res_dict_untreated['n_fact12']['mod'].plot_cell_type_loadings()
# plt.show() 

In [None]:
# res_dict_treated['n_fact16']['mod'].plot_cell_type_loadings()
# res_dict_untreated['n_fact16']['mod'].plot_cell_type_loadings()
# plt.show() 

In [None]:
# res_dict_treated['n_fact20']['mod'].plot_cell_type_loadings()
# res_dict_untreated['n_fact20']['mod'].plot_cell_type_loadings()
# plt.show() 

In [None]:
! jupyter nbconvert --to html 21_01_Deconvolution_C2L_Downstream.ipynb