In [None]:
import os
import sys
print("Python version" + sys.version)
os.getcwd()
print(sys.executable)

In [None]:
import numpy as np
np.random.seed(123)
import pandas as pd
import scipy
import itertools

import umap
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import scanpy as sc
import anndata as ad
import scvelo as scv
from tqdm.notebook import tqdm

from pathlib import Path

In [3]:
import cellrank as cr

In [None]:
cr.__version__

In [5]:
from cellrank.kernels import PseudotimeKernel

In [6]:
from cellrank.kernels import CytoTRACEKernel

In [7]:
import scanpy.external as sce

In [None]:
sc.settings.verbosity = 1
sc.logging.print_header()
sc.settings.set_figure_params(dpi=300, facecolor='white')

In [9]:
# remove weird grid from scvelo
plt.rcParams['axes.grid'] = False
plt.rcParams['figure.dpi'] = 150
plt.rcParams['xtick.bottom'] = False
plt.rcParams['ytick.left'] = False


In [10]:
new_data_folder = '../processed_data'

In [11]:
adata_epi = sc.read(Path(new_data_folder)/'CB_epi_Numbat_CCISM_inferCNV_iCMS.h5')

In [12]:
adata_epi.obs['tumour_normal_normal'] = None

for i in np.arange(0, adata_epi.shape[0]):
    if adata_epi.obs['sample_origin'][i] == '\nnormal\nsample\n':
        adata_epi.obs['tumour_normal_normal'][i] = 'normal_sample'
    elif (adata_epi.obs['numbat'][i] == 'tumour\n(tumour sample)') & (adata_epi.obs['scitcem_call'][i] == 'tumour\n(tumour sample)'):
        adata_epi.obs['tumour_normal_normal'][i] = 'genomically_tumour'
    elif (adata_epi.obs['numbat'][i] == 'normal\n(tumour sample)') & (adata_epi.obs['scitcem_call'][i] == 'normal\n(tumour sample)'):
        adata_epi.obs['tumour_normal_normal'][i] = 'genomically_normal'
    else:
        adata_epi.obs['tumour_normal_normal'][i] = 'no confident assignment'
adata_epi.obs['tumour_normal_normal'] = adata_epi.obs['tumour_normal_normal'].astype('category')


In [13]:
adata_epi.obs['tumour_normal_normal'] = adata_epi.obs['tumour_normal_normal'].cat.reorder_categories([
    'genomically_tumour', 'no confident assignment','genomically_normal','normal_sample'])

In [14]:
adata_epi.uns['tumour_normal_normal_colors'] = ['#ff7f0e', '#9b1ee3','#1f77b4','#808080']

In [15]:
sc.tl.diffmap(adata_epi, n_comps=15)

In [16]:
adata = adata_epi.copy()

### cytotrace

In [None]:
# format hack
adata.layers["spliced"] = adata.X
adata.layers["unspliced"] = adata.X

# calculate 
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)

In [18]:
ctk = CytoTRACEKernel(adata).compute_cytotrace()

In [None]:
ctk.compute_transition_matrix(threshold_scheme="soft", nu=0.5)

In [21]:
adata.obs['inferCNV_result'] = adata.obs['inferCNV_result'].cat.rename_categories({'failed_sample':'no detectable CNA',
    'CNA\n(tumour sample)':'CNA calls\n(tumour sample)',
    'CNN\n(tumour sample)':'CNN calls\n(tumour sample)'})

In [None]:
scv.pl.scatter(adata, basis='umap', color=['inferCNV_result'],
               ncols=1, dpi=150, legend_loc='right margin', size = 2, 
               title = 'copy number - inferCNV',
               )

In [23]:
adata.obs['CCISM_call'] = adata.obs['CCISM_call'].cat.rename_categories({'tumour\n(tumour sample)':'tumour calls\n(tumour sample)',
                                                'normal\n(tumour sample)':'normal calls\n(tumour sample)'})

In [None]:
scv.pl.scatter(adata, basis='umap', color=['CCISM_call'],
               ncols=1, dpi=150, legend_loc='right margin', size = 2, 
               title = 'somatic variant - CCISM',
               )

In [25]:
adata.obs['iCMS_scANVI'] = adata.obs['iCMS_scANVI'].cat.rename_categories({'normal\n(tumour sample)':
                                                                          'normal calls\n(tumour sample)'})

In [None]:
ax = scv.pl.scatter(adata, basis='umap', color=['iCMS_scANVI'],
               ncols=1, dpi=150, legend_loc='right margin', size = 2, 
               show = False, title = ''
               )
ax.set_title('transcriptomic signatures - iCMS', x = 0.65)
;

In [27]:
adata.obs['numbat'] = adata.obs['numbat'].cat.rename_categories({'tumour\n(tumour sample)':
                                                                 'tumour calls\n(tumour sample)',
                                                                 'normal\n(tumour sample)':
                                                                 'normal calls\n(tumour sample)'})

In [None]:
ax = scv.pl.scatter(adata, basis='umap', color=['numbat'],
               ncols=1, dpi=150, legend_loc='right margin', size = 2, 
               show = False, title = ''
              )
ax.set_title('haplotype-aware copy number - Numbat', x = 0.8)
;

In [None]:
scv.pl.scatter(adata[adata.obs['tumour_normal_normal'].isin(['genomically_normal',
                                                            'normal_sample'])], 
               basis='umap', color=['Uhlitz_scANVI'],
               ncols=1, dpi=150, legend_loc='right margin', size = 2, 
               title = 'Epithelial cell type\n(GN+N)')

In [None]:
scv.pl.scatter(adata, basis='umap', color=['ct_pseudotime'],
               ncols=1, dpi=150, legend_loc='right margin', size = 2)

In [None]:
# if use diffmap calculated on adata_epi
scv.pl.scatter(adata, basis='diffmap', color=['Uhlitz_scANVI'], 
               components = ['1,2'],
               ncols=2, dpi=300, legend_loc='right', size = 2,
               title = ['DC 1,2'], frameon = True)

In [None]:
# if use diffmap calculated on adata_epi
scv.pl.scatter(adata, basis='diffmap', color=['Uhlitz_scANVI'], 
               components = ['1,2', '2,3', '1,3', '2,4', '1,4'],
               ncols=2, dpi=300, legend_loc=None, size = 2,
               title = ['DC 1,2', 'DC 2,3', 'DC 1,3', 'DC 2,4', 'DC 1,4'])

In [None]:
# if use diffmap calculated on adata_epi
scv.pl.scatter(adata, basis='diffmap', color=['tumour_normal_normal'], 
               components = ['1,2', '2,3', '1,3', '2,4', '1,4'],
               ncols=2, dpi=300, legend_loc=None, size = 2,
               title = ['DC 1,2', 'DC 2,3', 'DC 1,3', 'DC 2,4', 'DC 1,4'])

In [34]:
new_order = ['TC1', 'TC2', 'TC3', 'TC4', 'Stem', 'Stem/TA',
             'Immature Goblet', 'Goblet',
             'Enterocyte progenitor', 'Enterocytes', 'Tuft']

### check the cytoscape assumption upon gene counts decreased with differentiation stages

In [None]:
# “n_genes_by_counts”. The number of genes with at least 1 count in a cell. Calculated for all cells.
with plt.style.context('./plt_style'):
    g=sns.FacetGrid(adata.obs, col = 'Uhlitz_scANVI', hue = 'tumour_normal_normal', 
                    height = 4, aspect = 5/9, despine = False, sharey = True, 
                    col_order = new_order, palette = adata.uns['tumour_normal_normal_colors'])
    g.map_dataframe(sns.violinplot, x = 'tumour_normal_normal', y = 'n_genes_by_counts', width = 0.7)
    g.figure.subplots_adjust(wspace=0)
    
    g.set_xticklabels(rotation = 90)
    g.set_titles('{col_name}\n')
    #g.set(ylim=(0, 1))
    g.set_xlabels('')
    #g.set_ylabels('n_genes\n', loc = 'top')
    
    g.fig.text(x = 0.5, y = -1, horizontalalignment = 'center', s = 'Cell identity', size = 24)
    
    axes = g.axes.flatten()
    axes[8].set_title('Enterocyte\nprogenitor\n')
    axes[9].set_title('Enterocyte\n')
    axes[6].set_title('Immature\ngoblet\n')
;    

In [None]:
# no trend in cell cycle
with plt.style.context('./plt_style'):
    g=sns.FacetGrid(adata.obs, col = 'Uhlitz_scANVI', hue = 'tumour_normal_normal', 
                    height = 4, aspect = 5/9, despine = False, sharey = True, 
                    col_order = new_order, palette = adata.uns['tumour_normal_normal_colors'])
    g.map_dataframe(sns.violinplot, x = 'tumour_normal_normal', y = 'G2M_score', width = 0.7)
    g.figure.subplots_adjust(wspace=0)
    
    g.set_xticklabels(rotation = 90)
    g.set_titles('{col_name}\n')
    #g.set(ylim=(0, 1))
    g.set_xlabels('')
    #g.set_ylabels('n_genes\n', loc = 'top')
    
    g.fig.text(x = 0.5, y = -1, horizontalalignment = 'center', s = 'Cell identity', size = 24)
    
    axes = g.axes.flatten()
    axes[8].set_title('Enterocyte\nprogenitor\n')
    axes[9].set_title('Enterocyte\n')
    axes[6].set_title('Immature\ngoblet\n')
;    

In [None]:
# cytoTRACE
with plt.style.context('./plt_style'):
    g=sns.FacetGrid(adata.obs, col = 'Uhlitz_scANVI', hue = 'tumour_normal_normal', 
                    height = 4, aspect = 5/9, despine = False, sharey = True, 
                    col_order = new_order, palette = adata.uns['tumour_normal_normal_colors'])
    g.map_dataframe(sns.violinplot, x = 'tumour_normal_normal', y = 'ct_pseudotime', width = 0.7)
    g.figure.subplots_adjust(wspace=0)
    
    g.set_xticklabels(rotation = 90)
    g.set_titles('{col_name}\n')
    g.set(ylim=(0, 1))
    g.set_xlabels('')
    g.set_ylabels('Cytoscape pseudotime\n', loc = 'top')
    
    g.fig.text(x = 0.5, y = -1, horizontalalignment = 'center', s = 'Cell identity', size = 24)
    
    axes = g.axes.flatten()
    axes[8].set_title('Enterocyte\nprogenitor\n')
    axes[9].set_title('Enterocyte\n')
    axes[6].set_title('Immature\ngoblet\n')
;    

In [None]:
# DC2 time
with plt.style.context('./plt_style'):
    g=sns.FacetGrid(adata.obs, col = 'Uhlitz_scANVI', hue = 'tumour_normal_normal', 
                    height = 4, aspect = 5/9, despine = False, sharey = True, 
                    col_order = new_order, palette = adata.uns['tumour_normal_normal_colors'])
    g.map_dataframe(sns.violinplot, x = 'tumour_normal_normal', y = 'DC2', width = 0.7)
    g.figure.subplots_adjust(wspace=0)
    
    g.set_xticklabels(rotation = 90)
    g.set_titles('{col_name}\n')
    #g.set(ylim=(0, 1))
    g.set_xlabels('')
    g.set_ylabels('DC2 pseudotime\n', loc = 'top')
    
    g.fig.text(x = 0.5, y = -1, horizontalalignment = 'center', s = 'Cell identity', size = 24)
    
    axes = g.axes.flatten()
    axes[8].set_title('Enterocyte\nprogenitor\n')
    axes[9].set_title('Enterocyte\n')
    axes[6].set_title('Immature\ngoblet\n')
;    

In [None]:
# rm conflicting
with plt.style.context('./plt_style'):
    g=sns.FacetGrid(adata[adata.obs['tumour_normal_normal'].isin(['genomically_tumour',
                                                                 'genomically_normal',
                                                                 'normal_sample'])].obs, 
                    col = 'Uhlitz_scANVI', hue = 'tumour_normal_normal', 
                    height = 4, aspect = 1/2, despine = False, sharey = True, 
                    col_order = new_order, palette = ['#ff7f0e', '#1f77b4', '#808080'])
    g.map_dataframe(sns.violinplot, x = 'tumour_normal_normal', y = 'ct_pseudotime', width = 0.7)
    g.figure.subplots_adjust(wspace=0)
    
    g.set_xticklabels(rotation = 90)
    g.set_titles('{col_name}\n')
    g.set(ylim=(0, 1))
    g.set_xlabels('')
    g.set_ylabels('Cytoscape pseudotime\n', loc = 'top')
    
    g.fig.text(x = 0.5, y = -1, horizontalalignment = 'center', s = 'Cell identity', size = 24)
    
    axes = g.axes.flatten()
    axes[8].set_title('Enterocyte\nprogenitor\n')
    axes[9].set_title('Enterocyte\n')
    axes[6].set_title('Immature\ngoblet\n')
;    

In [None]:
ctk.plot_projection(basis="umap", color="Uhlitz_scANVI", legend_loc="right", dpi=200,
                    recompute=False, density = 1, 
                   stream = True,
                   size = 2, alpha = 1)

In [None]:
ctk.plot_random_walks(
    n_sims=10,
    start_ixs={"Uhlitz_scANVI": "Stem"},
    basis="umap",
    color="Uhlitz_scANVI",
    legend_loc="right",
    seed=1, size = 2, alpha = 1, dpi = 200
)

In [None]:
# heatmap pseudotime
with plt.rc_context({'ytick.major.size':16}):
    sc.pl.heatmap(adata, ['ct_pseudotime'],
                  groupby=['tumour_normal_normal'], swap_axes=True, figsize=[16,4])


In [44]:
adata.obs['is_normal_sample'] = np.array(pd.get_dummies(adata.obs['tumour_normal_normal'])['normal_sample'], dtype='int')


In [None]:
# heatmap pseudotime
with plt.rc_context({'ytick.major.size':16}):
    sc.pl.heatmap(adata, ['ct_pseudotime', 'is_normal_sample'],
                  groupby=['Uhlitz_scANVI'], swap_axes=True, figsize=[16,4])

In [None]:
# y axis is normal sample or not
# x axis cell type
# value pseudotime
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (10,5))
    sns.heatmap(adata.obs.pivot_table(index='tumour_normal_normal', 
                                      columns = 'Uhlitz_scANVI', 
                                      values='ct_pseudotime', 
                                      aggfunc='median')[new_order], 
                square=True, cmap='viridis_r')
    plt.title('Cytoscape pseudotime (median)\n')
    plt.ylabel('Numbat and Scitcem assignment\n', size = 18)
    plt.xlabel('Cell type',size = 18);

In [None]:
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (10,5))
    sns.heatmap(adata.obs.pivot_table(index='tumour_normal_normal',
                                      columns = 'Uhlitz_scANVI', 
                                      values='ct_pseudotime', 
                                      aggfunc='mean')[new_order], 
                square=True, cmap='viridis_r')
    plt.title('Cytoscape pseudotime (mean)\n')
    plt.ylabel('Numbat and Scitcem assignment\n', size = 18)
    plt.xlabel('Cell type',size = 18);

### use DC as pseudotime

In [48]:
adata.obs[['DC1', 'DC2', 'DC3', 'DC4']]= adata.obsm['X_diffmap'][:,1:5]

In [49]:
cmap = sns.color_palette("blend:#701f57,#ad1759,#d5d5d5,#f6b48f,bisque", as_cmap=True)

In [None]:
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (10,5))
    sns.heatmap(adata.obs.pivot_table(index='tumour_normal_normal',
                                      columns = 'Uhlitz_scANVI', values='DC2', aggfunc='mean')[new_order], 
                cmap=cmap, vmin = -0.015, vmax = 0.015, 
                square=True)
    plt.title('Diffusion time axis 2 (mean)\n')
    plt.ylabel('Numbat and Scitcem assignment\n', size = 18)
    plt.xlabel('Cell type',size = 18)
    
    plt.figure(figsize = (10,5))
    sns.heatmap(adata.obs.pivot_table(index='tumour_normal_normal',
                                      columns = 'Uhlitz_scANVI', values='DC2', aggfunc='median')[new_order], 
                cmap=cmap, vmin = -0.015, vmax = 0.015, 
                square=True)
    plt.title('Diffusion time axis 2 (median)\n')
    plt.ylabel('Numbat and Scitcem assignment\n', size = 18)
    plt.xlabel('Cell type',size = 18)
    ;

In [54]:
sample_color_map = dict(zip(adata_epi.obs['Uhlitz_scANVI'].cat.categories, adata_epi.uns['Uhlitz_scANVI_colors']))

In [55]:
order_start_with_stem = [
 'Stem',
 'Stem/TA',
 'Immature Goblet',
 'Goblet',
 'Enterocyte progenitor',
 'Enterocytes',
 'Tuft',
 'TC1',
 'TC2',
 'TC3',
 'TC4']

In [None]:
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC1', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 1 distribution\n')
    

with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC2', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 2 distribution\n')


with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC3', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 3 distribution\n')

    
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC4', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 4 distribution\n')

    
;

In [None]:
# scale 
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC1', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 1 distribution\n')
    plt.ylabel('Normalised count')
    

with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC2', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 2 distribution\n')
    plt.ylabel('Normalised count')



with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC3', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 3 distribution\n')
    plt.ylabel('Normalised count')

    
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC4', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 4 distribution\n')
    plt.ylabel('Normalised count')

    
;

In [None]:
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC1', hue = 'tumour_normal_normal', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=adata.uns['tumour_normal_normal_colors'],
                 hue_order=adata.obs['tumour_normal_normal'].cat.categories, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.45, 1.0), title='Cell type')
    plt.title('Diffusion time axis 1 distribution\n')
    

with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC2', hue = 'tumour_normal_normal', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=adata.uns['tumour_normal_normal_colors'],
                 hue_order=adata.obs['tumour_normal_normal'].cat.categories, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.45, 1.0), title='Cell type')
    plt.title('Diffusion time axis 2 distribution\n')


with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC3', hue = 'tumour_normal_normal', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=adata.uns['tumour_normal_normal_colors'],
                 hue_order=adata.obs['tumour_normal_normal'].cat.categories, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.45, 1.0), title='Cell type')
    plt.title('Diffusion time axis 3 distribution\n')
    


with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC4', hue = 'tumour_normal_normal', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=adata.uns['tumour_normal_normal_colors'],
                 hue_order=adata.obs['tumour_normal_normal'].cat.categories, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.45, 1.0), title='Cell type')
    plt.title('Diffusion time axis 4 distribution\n')
    
;

In [None]:
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC1', hue = 'normal_vs_MS', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=adata.uns['normal_vs_MS_colors'].tolist(),
                 hue_order=adata.obs['normal_vs_MS'].cat.categories, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, 1.0), title='Cell type')
    plt.title('Diffusion time axis 1 distribution\n')
    

with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC2', hue = 'normal_vs_MS', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=adata.uns['normal_vs_MS_colors'].tolist(),
                 hue_order=adata.obs['normal_vs_MS'].cat.categories, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, 1.0), title='Cell type')
    plt.title('Diffusion time axis 2 distribution\n')


with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC3', hue = 'normal_vs_MS', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=adata.uns['normal_vs_MS_colors'].tolist(),
                 hue_order=adata.obs['normal_vs_MS'].cat.categories, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, 1.0), title='Cell type')
    plt.title('Diffusion time axis 3 distribution\n')
    


with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata.obs , x = 'DC4', hue = 'normal_vs_MS', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=adata.uns['normal_vs_MS_colors'].tolist(),
                 hue_order=adata.obs['normal_vs_MS'].cat.categories, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, 1.0), title='Cell type')
    plt.title('Diffusion time axis 4 distribution\n')
    
;

In [None]:
# DC genomically normal by cell type
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'genomically_normal'].obs , x = 'DC1', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 1 distribution\n')
    

with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'genomically_normal'].obs , x = 'DC2', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 2 distribution\n')


with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'genomically_normal'].obs , x = 'DC3', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 3 distribution\n')

    
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'genomically_normal'].obs , x = 'DC4', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 4 distribution\n')

    
;

In [None]:
# DC genomically normal by cell type (filled)
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'genomically_normal'].obs , x = 'DC1', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 1 distribution\n')
    plt.ylabel('Normalised count')

with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'genomically_normal'].obs , x = 'DC2', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 2 distribution\n')
    plt.ylabel('Normalised count')


with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'genomically_normal'].obs , x = 'DC3', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 3 distribution\n')
    plt.ylabel('Normalised count')

    
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'genomically_normal'].obs , x = 'DC4', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 4 distribution\n')
    plt.ylabel('Normalised count')

    
;

In [None]:
# DC normal by cell type
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'normal_sample'].obs , x = 'DC1', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 1 distribution\n')
    

with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'normal_sample'].obs , x = 'DC2', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 2 distribution\n')


with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'normal_sample'].obs , x = 'DC3', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 3 distribution\n')

    
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'normal_sample'].obs , x = 'DC4', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="stack", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 4 distribution\n')

    
;

In [None]:
# DC normal by cell type (filled)
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'normal_sample'].obs , x = 'DC1', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 1 distribution\n')
    plt.ylabel('Normalised count')

with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'normal_sample'].obs , x = 'DC2', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 2 distribution\n')
    plt.ylabel('Normalised count')


with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'normal_sample'].obs , x = 'DC3', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 3 distribution\n')
    plt.ylabel('Normalised count')

    
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (8,5))
    ax = sns.histplot(adata[adata.obs['tumour_normal_normal'] == 'normal_sample'].obs , x = 'DC4', hue = 'Uhlitz_scANVI', binwidth=0.001, stat = 'count', 
                 legend='right', multiple="fill", palette=[sample_color_map[key] for key in order_start_with_stem],
                 hue_order=order_start_with_stem, edgecolor = 'black')
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.4, 1.0), title='Cell type')
    plt.title('Diffusion time axis 4 distribution\n')
    plt.ylabel('Normalised count')

    
;

### dpt pseudotime

In [120]:
cellltype_df = adata.obs['Uhlitz_scANVI'].reset_index()

In [141]:
root_ixs = adata.obsm['X_diffmap'][cellltype_df[cellltype_df['Uhlitz_scANVI'] == 'Stem'].index,2].argmin()

In [None]:
scv.pl.scatter(
    adata,
    basis="diffmap",
    c=["Uhlitz_scANVI", root_ixs],
    legend_loc="right",
    components=["1, 2"],
)

In [143]:
adata.uns["iroot"] = root_ixs

In [None]:
sc.tl.dpt(adata, n_branchings=1)

In [None]:
sc.pl.embedding(
    adata,
    basis="diffmap",
    color=["dpt_pseudotime", 'dpt_groups',"ct_pseudotime"],
    color_map="gnuplot2",
)


In [None]:
sc.pl.embedding(
    adata,
    basis="umap",
    color=["dpt_pseudotime", 'dpt_groups',"ct_pseudotime"],
    color_map="gnuplot2",
)


### DC2 expression of a gene

In [None]:
scv.pl.scatter(
    adata[adata.obs['tumour_normal_normal'].isin(['genomically_tumour', 'no confident assignment'])],
    basis="diffmap",
    c=['GPA33'],
    legend_loc="right",
    components=["1, 2"],
    size = 5,
    title = 'GPA33 expression mapped on DC1-2 of \ngenomically tumour and no condident assignment cells'
)

### plot some gene expression in our group

In [182]:
gene_df = adata.var.reset_index()

In [None]:
gene_exp = pd.DataFrame(adata.X[:,gene_df[gene_df['index'].isin(['EHF', 'VIL1',
                                                                'GPA33', 'KRT20', 
                                                                 'CDX1', 'ELF3','ISX'])].index].toarray(), 
             columns=['EHF', 'VIL1',
                      'GPA33', 'KRT20',
                      'CDX1', 'ELF3', 'ISX'])

In [None]:
plot_gene_df = gene_exp.merge(adata.obs[['Uhlitz_scANVI', 'tumour_normal_normal', 'MS_status']].reset_index(), 
                              left_index=True, right_index=True)

In [None]:
# epi specific TF: EHF and CDX1, expression lost in poorly differentiated CRC
with plt.style.context('./plt_style'):
    g=sns.FacetGrid(plot_gene_df, col = 'Uhlitz_scANVI', hue = 'tumour_normal_normal', 
                    height = 4, aspect = 5/9, despine = False, sharey = True, 
                    col_order = new_order, palette = adata.uns['tumour_normal_normal_colors'])
    g.map_dataframe(sns.violinplot, x = 'tumour_normal_normal', y = 'EHF', width = 0.7)
    g.figure.subplots_adjust(wspace=0)
    
    g.set_xticklabels(rotation = 90)
    g.set_titles('{col_name}\n')
    #g.set(ylim=(0, 1))
    g.set_xlabels('')
    #g.set_ylabels('n_genes\n', loc = 'top')
    
    g.fig.text(x = 0.5, y = -1, horizontalalignment = 'center', s = 'Cell identity', size = 24)
    
    axes = g.axes.flatten()
    axes[8].set_title('Enterocyte\nprogenitor\n')
    axes[9].set_title('Enterocyte\n')
    axes[6].set_title('Immature\ngoblet\n')

In [None]:
# epi specific TF: EHF and CDX1, expression lost in poorly differentiated CRC
with plt.style.context('./plt_style'):
    g=sns.FacetGrid(plot_gene_df, col = 'Uhlitz_scANVI', hue = 'tumour_normal_normal', 
                    height = 4, aspect = 5/9, despine = False, sharey = True, 
                    col_order = new_order, palette = adata.uns['tumour_normal_normal_colors'])
    g.map_dataframe(sns.violinplot, x = 'tumour_normal_normal', y = 'CDX1', width = 0.7)
    g.figure.subplots_adjust(wspace=0)
    
    g.set_xticklabels(rotation = 90)
    g.set_titles('{col_name}\n')
    #g.set(ylim=(0, 1))
    g.set_xlabels('')
    #g.set_ylabels('n_genes\n', loc = 'top')
    
    g.fig.text(x = 0.5, y = -1, horizontalalignment = 'center', s = 'Cell identity', size = 24)
    
    axes = g.axes.flatten()
    axes[8].set_title('Enterocyte\nprogenitor\n')
    axes[9].set_title('Enterocyte\n')
    axes[6].set_title('Immature\ngoblet\n')

In [None]:
# epi specific TF: EHF and CDX1, expression lost in poorly differentiated CRC
with plt.style.context('./plt_style'):
    g=sns.FacetGrid(plot_gene_df, col = 'Uhlitz_scANVI', hue = 'MS_status', 
                    height = 4, aspect = 5/9, despine = False, sharey = True, 
                    col_order = new_order, palette = adata.uns['tumour_normal_normal_colors'])
    g.map_dataframe(sns.violinplot, x = 'MS_status', y = 'CDX1', width = 0.7)
    g.figure.subplots_adjust(wspace=0)]
    
    g.set_xticklabels(rotation = 90)
    g.set_titles('{col_name}\n')
    #g.set(ylim=(0, 1))
    g.set_xlabels('')
    #g.set_ylabels('n_genes\n', loc = 'top')
    
    g.fig.text(x = 0.5, y = -1, horizontalalignment = 'center', s = 'Cell identity', size = 24)
    
    axes = g.axes.flatten()
    axes[8].set_title('Enterocyte\nprogenitor\n')
    axes[9].set_title('Enterocyte\n')
    axes[6].set_title('Immature\ngoblet\n')
;

In [None]:
# epi specific TF: EHF and CDX1, expression lost in poorly differentiated CRC
with plt.style.context('./plt_style'):
    g=sns.FacetGrid(plot_gene_df, col = 'Uhlitz_scANVI', #hue = 'MS_status', 
                    height = 4, aspect = 5/9, despine = False, sharey = True, 
                    col_order = new_order, palette = adata.uns['tumour_normal_normal_colors'])
    g.map_dataframe(sns.violinplot, x = 'tumour_normal_normal', y = 'CDX1', 
                    hue="MS_status", split=True, width = 0.7, palette = adata.uns['tumour_normal_normal_colors'])
    g.figure.subplots_adjust(wspace=0)
    
    g.set_xticklabels(rotation = 90)
    g.set_titles('{col_name}\n')
    #g.set(ylim=(0, 1))
    g.set_xlabels('')
    #g.set_ylabels('n_genes\n', loc = 'top')
    
    g.fig.text(x = 0.5, y = -1, horizontalalignment = 'center', s = 'Cell identity', size = 24)
    
    axes = g.axes.flatten()
    axes[8].set_title('Enterocyte\nprogenitor\n')
    axes[9].set_title('Enterocyte\n')
    axes[6].set_title('Immature\ngoblet\n')
;

In [None]:
# epi specific TF: EHF and CDX1, expression lost in poorly differentiated CRC
with plt.style.context('./plt_style'):
    g=sns.FacetGrid(plot_gene_df, col = 'Uhlitz_scANVI', #hue = 'MS_status', 
                    height = 4, aspect = 5/9, despine = False, sharey = True, 
                    col_order = new_order, palette = adata.uns['tumour_normal_normal_colors'])
    g.map_dataframe(sns.violinplot, x = 'tumour_normal_normal', y = 'EHF', 
                    hue="MS_status", split=True, width = 0.7, palette = adata.uns['tumour_normal_normal_colors'])
    g.figure.subplots_adjust(wspace=0)
    
    g.set_xticklabels(rotation = 90)
    g.set_titles('{col_name}\n')
    #g.set(ylim=(0, 1))
    g.set_xlabels('')
    #g.set_ylabels('n_genes\n', loc = 'top')
    
    g.fig.text(x = 0.5, y = -1, horizontalalignment = 'center', s = 'Cell identity', size = 24)
    
    axes = g.axes.flatten()
    axes[8].set_title('Enterocyte\nprogenitor\n')
    axes[9].set_title('Enterocyte\n')
    axes[6].set_title('Immature\ngoblet\n')
;

In [None]:
with plt.rc_context({'ytick.major.size':16, 'axes.titlesize':18}):
    plt.figure(figsize = (10,5))
    sns.heatmap(plot_gene_df.pivot_table(index='tumour_normal_normal',
                                      columns = 'Uhlitz_scANVI', values='EHF', aggfunc='mean')[new_order], 
                cmap=cmap, square=True)
    plt.title('EHF (mean)\n')

    plt.figure(figsize = (10,5))
    sns.heatmap(plot_gene_df.pivot_table(index='tumour_normal_normal',
                                      columns = 'Uhlitz_scANVI', values='VIL1', aggfunc='mean')[new_order], 
                cmap=cmap, square=True)
    plt.title('VIL1 (mean)\n')
        
    plt.figure(figsize = (10,5))
    sns.heatmap(plot_gene_df.pivot_table(index='tumour_normal_normal',
                                      columns = 'Uhlitz_scANVI', values='GPA33', aggfunc='mean')[new_order], 
                cmap=cmap, square=True)
    plt.title('GPA33 (mean)\n')
    
    plt.figure(figsize = (10,5))
    sns.heatmap(plot_gene_df.pivot_table(index='tumour_normal_normal',
                                      columns = 'Uhlitz_scANVI', values='KRT20', aggfunc='mean')[new_order], 
                cmap=cmap, square=True)
    plt.title('KRT20 (mean)\n')

        
    plt.figure(figsize = (10,5))
    sns.heatmap(plot_gene_df.pivot_table(index='tumour_normal_normal',
                                      columns = 'Uhlitz_scANVI', values='ISX', aggfunc='mean')[new_order], 
                cmap=cmap, square=True)
    plt.title('ISX (mean)\n')
    

    plt.figure(figsize = (10,5))
    sns.heatmap(plot_gene_df.pivot_table(index='tumour_normal_normal',
                                      columns = 'Uhlitz_scANVI', values='CDX1', aggfunc='mean')[new_order], 
                cmap=cmap, square=True)
    plt.title('CDX1 (mean)\n')

    plt.figure(figsize = (10,5))
    sns.heatmap(plot_gene_df.pivot_table(index='tumour_normal_normal',
                                      columns = 'Uhlitz_scANVI', values='ELF3', aggfunc='mean')[new_order], 
                cmap=cmap, square=True)
    plt.title('ELF3 (mean)\n')



In [260]:
colonic_markers = {'normal stem cells': ['LGR5', 'ASCL2', 'SOX9', 'OLFM4', 'SMOC2', 'EPHB2'],
           'enterocytic differentiation': ['VIL1', 'GPA33', 'KRT20', 'CDH17', 'LGALS4'],
           'TF enrich at colonic epi': ['CDX1', 'CDX2', 'ELF3', 'GATA6', 'ISX', 'EHF', 'HNF1A']
}

In [None]:
with plt.style.context('./plt_style'):
    sc.pl.dotplot(adata, colonic_markers, 
                  groupby=['tumour_normal_normal', 'Uhlitz_scANVI'], dendrogram=False)
;    

### run fit and prediction

In [None]:
# compute initial & terminal states
g = cr.estimators.GPCCA(ctk)
print(g)

In [None]:
g.compute_schur()

In [None]:
g.plot_spectrum(real_only=True)

In [None]:
g.fit(cluster_key="Uhlitz_scANVI", n_states=[5,33])

In [None]:
g.plot_macrostates(which="all", discrete=True, legend_loc="right", s=100)

In [None]:
g.predict_terminal_states()

In [None]:
g.plot_macrostates(which="terminal", legend_loc="right", s=100)

In [None]:
g.plot_macrostates(which="terminal", discrete=False)

In [None]:
g.predict_initial_states(allow_overlap=True)

In [None]:
g.plot_macrostates(which="initial", legend_loc="right", s=100)

In [None]:
g.plot_macrostate_composition(key="Uhlitz_scANVI", figsize=(7, 4))

In [None]:
# probability of leaving the state once entered
g.plot_coarse_T()

In [None]:
# for cells not assigned to a terminal state > estimate the fate probabilities of reaching any terminal state
# random walks > arrival frequency
g.compute_fate_probabilities()

In [None]:
g.plot_fate_probabilities(legend_loc="right", same_plot=False, vmin = 0, vmax = 1, ncols = 3)

In [None]:
cr.pl.circular_projection(adata, keys="Uhlitz_scANVI", legend_loc="right")

In [None]:
cr.pl.aggregate_fate_probabilities(
    adata,
    mode="violin",
    lineages=["Stem/TA"],
    cluster_key="Uhlitz_scANVI",
    clusters=['Stem', 'Stem/TA', 'Immature Goblet', 'Goblet',
              'Enterocyte progenitor', 'Enterocytes', 'Tuft'],
)

In [None]:
cr.pl.aggregate_fate_probabilities(
    adata,
    mode="violin",
    lineages=["Enterocytes"],
    cluster_key="Uhlitz_scANVI",
    clusters=['Stem', 'Stem/TA', 'Immature Goblet', 'Goblet',
              'Enterocyte progenitor', 'Enterocytes', 'Tuft'],
)

In [None]:
cr.pl.aggregate_fate_probabilities(
    adata,
    mode="violin",
    lineages=["Goblet"],
    cluster_key="Uhlitz_scANVI",
    clusters=['Stem', 'Stem/TA', 'Immature Goblet', 'Goblet',
              'Enterocyte progenitor', 'Enterocytes', 'Tuft'],
)

In [None]:
cr.pl.aggregate_fate_probabilities(
    adata,
    mode="violin",
    lineages=["Tuft"],
    cluster_key="Uhlitz_scANVI",
    clusters=['Stem', 'Stem/TA', 'Immature Goblet', 'Goblet',
              'Enterocyte progenitor', 'Enterocytes', 'Tuft'],
)

In [None]:
cr.pl.aggregate_fate_probabilities(
    adata,
    mode="violin",
    lineages=["TC4"],
    cluster_key="Uhlitz_scANVI",
    clusters=['Stem', 'Stem/TA', 'Immature Goblet', 'Goblet',
              'Enterocyte progenitor', 'Enterocytes', 'Tuft'],
)

In [None]:
g.fate_probabilities

In [None]:
adata.obs["fate_probabilities_Enterocytes"] = g.fate_probabilities["Enterocytes"].X.flatten()
adata.obs["fate_probabilities_Stem/TA"] = g.fate_probabilities["Stem/TA"].X.flatten()
adata.obs["fate_probabilities_Goblet"] = g.fate_probabilities["Goblet"].X.flatten()
adata.obs["fate_probabilities_TC4"] = g.fate_probabilities["TC4"].X.flatten()
adata.obs["fate_probabilities_Tuft"] = g.fate_probabilities["Tuft"].X.flatten()


In [None]:
adata.obs['tumour_normal_normal'].cat.categories

In [None]:
with plt.style.context('./plt_style'):

    ax = sns.violinplot(data= adata[(adata.obs['tumour_normal_normal'] == 'normal_sample') | 
                           (adata.obs['tumour_normal_normal'] == 'genomically_normal') ].obs,
               x = 'Uhlitz_scANVI', y = 'fate_probabilities_Enterocytes', 
               hue = 'tumour_normal_normal', width = 0.9)
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, 1.0), title='Cell identity')

    plt.xticks(rotation = 90)
;

In [None]:
with plt.style.context('./plt_style'):

    ax = sns.violinplot(data= adata[(adata.obs['tumour_normal_normal'] == 'normal_sample') | 
                           (adata.obs['tumour_normal_normal'] == 'genomically_normal') ].obs,
               x = 'Uhlitz_scANVI', y = 'fate_probabilities_Goblet', 
               hue = 'tumour_normal_normal', width = 0.9)
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, 1.0), title='Cell identity')

    plt.xticks(rotation = 90)
;

In [None]:
with plt.style.context('./plt_style'):

    ax = sns.violinplot(data= adata[(adata.obs['tumour_normal_normal'] == 'normal_sample') | 
                           (adata.obs['tumour_normal_normal'] == 'genomically_normal') ].obs,
               x = 'Uhlitz_scANVI', y = 'fate_probabilities_Stem/TA', 
               hue = 'tumour_normal_normal', width = 0.9)
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, 1.0), title='Cell identity')

    plt.xticks(rotation = 90)
;

In [None]:
with plt.style.context('./plt_style'):

    ax = sns.violinplot(data= adata[(adata.obs['tumour_normal_normal'] == 'normal_sample') | 
                           (adata.obs['tumour_normal_normal'] == 'genomically_normal') ].obs,
               x = 'Uhlitz_scANVI', y = 'fate_probabilities_TC4', 
               hue = 'tumour_normal_normal', width = 0.9)
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, 1.0), title='Cell identity')

    plt.xticks(rotation = 90)
;

In [None]:
with plt.style.context('./plt_style'):

    ax = sns.violinplot(data= adata[(adata.obs['tumour_normal_normal'] == 'normal_sample') | 
                           (adata.obs['tumour_normal_normal'] == 'genomically_normal') ].obs,
               x = 'Uhlitz_scANVI', y = 'fate_probabilities_Tuft', 
               hue = 'tumour_normal_normal', width = 0.9)
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, 1.0), title='Cell identity')

    plt.xticks(rotation = 90)
;

In [None]:
# driver genes on the lineages with relevant clusters
# Enterocytes
ent_df = g.compute_lineage_drivers(
    lineages=["Enterocytes"], cluster_key="Uhlitz_scANVI", 
    clusters=['Stem','Stem/TA', 'Enterocyte progenitor',"Enterocytes"]
)
ent_df.head(10)

In [None]:
sc.pl.embedding(
    adata,
    basis="umap",
    color=["fate_probabilities_Enterocytes"] + list(ent_df.index[:8]),
    color_map="viridis",
    s=2,
    ncols=3
)

In [None]:
driver_df = g.compute_lineage_drivers()

In [None]:
adata.var["mean expression"] = adata.X.A.mean(axis=0)

In [None]:
driver_df.sort_values('Goblet_corr', ascending=False)['Goblet_corr'].index[:8]

In [None]:
genes_oi = {
    "Enterocytes": driver_df.sort_values('Enterocytes_corr', ascending=False)['Enterocytes_corr'].index[:8],
    "Goblet": driver_df.sort_values('Goblet_corr', ascending=False)['Goblet_corr'].index[:8],
    "Stem/TA": driver_df.sort_values('Stem/TA_corr', ascending=False)['Stem/TA_corr'].index[:8]
    
}

# green, purple, blue

In [None]:
g.plot_lineage_drivers_correlation(
    lineage_x="Enterocytes",
    lineage_y="Tuft",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50
)

In [None]:
model = cr.models.GAMR(adata, n_knots=6, smoothing_penalty=10.0)

In [None]:
cr.pl.heatmap(
    adata,
    model=model, 
    lineages="Enterocytes",
    cluster_key="Uhlitz_scANVI",
    show_fate_probabilities=True,
    genes=driver_df.sort_values('Enterocytes_corr', ascending=False)['Enterocytes_corr'].index[:40],
    time_key="ct_pseudotime",
    figsize=(12, 10),
    show_all_genes=True,
    weight_threshold=(1e-3, 1e-3),
)


In [None]:
cr.pl.heatmap(
    adata,
    model=model, 
    lineages="TC4",
    cluster_key="Uhlitz_scANVI",
    show_fate_probabilities=True,
    genes=driver_df.sort_values('TC4_corr', ascending=False)['TC4_corr'].index[:40],
    time_key="ct_pseudotime",
    figsize=(12, 10),
    show_all_genes=True,
    weight_threshold=(1e-3, 1e-3),
)
