## imports

In [None]:
%load_ext autoreload
%autoreload

In [None]:
import scanpy as sc
import scanpy.external as sce
import pandas as pd
import numpy as np
import os
import triku as tk
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from tqdm.notebook import tqdm
import scipy.sparse as spr
import networkx as nx
from matplotlib import pylab

In [None]:
!pip install cellassign --upgrade

In [None]:
# local imports and imports from other notebooks
from cellassign import assign_cats
from fb_functions import make_gene_scoring_with_expr
from fb_functions import plot_score_graph
%store -r dict_colors_mouse
%store -r seed
%store -r magma
%store -r data_dir

In [None]:
%store -r plot_params

pylab.rcParams.update(plot_params)
pd.set_option('display.max_columns', None)
pd.options.display.float_format = "{:,.2f}".format

**IMPORTANT: I am running this analysis in a computer with ~500 GB of RAM. I will load many datasets at once, which might be too much for some computers. I took this decision conciously, to have as much info available at any time as possible. If you cannot run all the analysis at once, you can run it by parts.**

## Anndata loading

In [None]:
abbasi_2020_dir = data_dir + '/abassi_2020'
abassi_2020_ctrl_mouse_fb = sc.read(f"{abbasi_2020_dir}/abassi_2020_ctrl_mouse_fb_processed.h5")

In [None]:
boothby_2021_dir = data_dir + '/boothby_2021'
boothby_2021_ctrl_mouse_fb = sc.read(boothby_2021_dir + '/boothby_2021_ctrl_mouse_fb_processed.h5')

In [None]:
buechler_2021_dir = data_dir + '/buechler_2021'
buechler_2021_ctrl_mouse_fb = sc.read(buechler_2021_dir + '/buechler_2021_ctrl_mouse_fb_processed.h5')

In [None]:
haensel_2021_dir = data_dir + '/haensel_2021'
haensel_2021_ctrl_mouse_wounding_fb = sc.read(haensel_2021_dir + '/haensel_2021_ctrl_mouse_wounding_fb_processed.h5')

In [None]:
joost_2020_dir = data_dir + '/joost_2020'
joost_2020_ctrl_mouse_fb = sc.read(joost_2020_dir + '/joost_2020_ctrl_mouse_fb_processed.h5')

In [None]:
phan_2020_dir = data_dir + '/phan_2020'
phan_2020_ctrl_mouse_21d_fb = sc.read(f"{phan_2020_dir}/phan_2020_ctrl_mouse_21d_fb_processed.h5")

In [None]:
shin_2020_dir = data_dir + '/shin_2020'
shin_2020_ctrl_mouse_fb = sc.read(f"{shin_2020_dir}/shin_2020_ctrl_mouse_fb_processed.h5")

In [None]:
shook_2020_dir = data_dir + '/shook_2020'
shook_2020_ctrl_mouse_fb = sc.read(f"{shook_2020_dir}/shook_2020_ctrl_mouse_fb_processed.h5")

In [None]:
vorstandlechner_2021_dir = data_dir + '/Vorstandlechner_2021'
vorstandlechner_2021_ctrl_mouse_fb = sc.read(f"{vorstandlechner_2021_dir}/vorstandlechner_2021_ctrl_mouse_fb_processed.h5")

In [None]:
sc.pl.umap(haensel_2021_ctrl_mouse_wounding_fb, color='Condition', s=3)

## Evaluate marker score for adatas
All the **commented anndatas are discarded** for marker selection because they do not show enough marker consistency and might bias the result, or because they belong to dataset from non control/healthy conditions; or they show a UMAP with cluster distribution we do not trust.

In [None]:
list_datasets_mouse = [
                 abassi_2020_ctrl_mouse_fb,
                 boothby_2021_ctrl_mouse_fb,
#                  buechler_2021_ctrl_mouse_fb,
                 haensel_2021_ctrl_mouse_wounding_fb,
                 joost_2020_ctrl_mouse_fb,
                 phan_2020_ctrl_mouse_21d_fb,
                 shin_2020_ctrl_mouse_fb,
                 shook_2020_ctrl_mouse_fb,
                 vorstandlechner_2021_ctrl_mouse_fb
]

list_accepted_clusters_mouse = ['x1', 'x2', 'x/y', 'y1', 'y2', 'y3', 'y4', 'y5', 'z1', 'z2', 'w/x', 'w1', 'w2', 'w3', 'w4', 'w5', 'v1']           
list_accepted_axis_mouse = ['x', 'y', 'z', 'w', 'v']                


# manual_axis is to create a robust set of markers
for adata in list_datasets_mouse:
    adata.obs['manual_axis'] = [i[0] if len(i) == 2 else i for i in adata.obs['cluster']]
    adata.obs['manual_axis'] = adata.obs['manual_axis'].astype('category')

#### 

In [None]:
dict_make_gene_scoring = make_gene_scoring_with_expr(list_datasets=list_datasets_mouse, calculate_DEGs = True, group_name = 'cluster', value_ref = 'scores', 
                                                     select_method = 'pval', select_thres = 0.01, list_clusters=list_accepted_clusters_mouse)

In [None]:
dict_make_gene_scoring['w5'].iloc[:25]

In [None]:
dict_make_gene_scoring['v1'].iloc[:25]

In [None]:
dict_make_gene_scoring_axis = make_gene_scoring_with_expr(list_datasets=list_datasets_mouse, calculate_DEGs = True, group_name = 'manual_axis', 
                                                          list_clusters=list_accepted_axis_mouse,
                                                     value_ref = 'scores', select_method = 'pval', select_thres = 0.05)

## Recalculating clusters in the datasets

In [None]:
dict_cats_clusters_robust = {i: np.array(dict_make_gene_scoring[i].index[:30]) for i in dict_make_gene_scoring.keys()}
dict_cats_axes_robust = {i: np.array(dict_make_gene_scoring_axis[i].index[:30]) for i in dict_make_gene_scoring_axis.keys()}

In [None]:
dict_cats_clusters_robust_3M = dict_cats_clusters_robust
%store dict_cats_clusters_robust_3M 

In [None]:
print([len(i) for i in dict_cats_clusters_robust.values()])

In [None]:
pd.DataFrame(dict_cats_clusters_robust)

In [None]:
dict_cats_axes_robust

### Abassi 2020

In [None]:
assign_cats(abassi_2020_ctrl_mouse_fb, dict_cats=dict_cats_clusters_robust, min_score=0.5, quantile_gene_sel=0.95, key_added='cluster_robust', others_name='U', verbose=False)
assign_cats(abassi_2020_ctrl_mouse_fb, column_groupby='cluster_robust', dict_cats=dict_cats_axes_robust, min_score=0.4, quantile_gene_sel=0.8, key_added='axis_robust', 
            intermediate_states=True, diff=0.05, others_name='U', verbose=False)
abassi_2020_ctrl_mouse_fb.obs['cluster_robust'] = abassi_2020_ctrl_mouse_fb.obs['cluster_robust'].astype('category')
abassi_2020_ctrl_mouse_fb.uns['cluster_robust_colors'] = [dict_colors_mouse[i] if i in dict_colors_mouse else '#bcbcbc' for i in abassi_2020_ctrl_mouse_fb.obs['cluster_robust'].cat.categories]
sc.pl.umap(abassi_2020_ctrl_mouse_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

In [None]:
for key in list(dict_cats_axes_robust.keys()):
    del abassi_2020_ctrl_mouse_fb.obs[key]
plot_score_graph(abassi_2020_ctrl_mouse_fb, cluster_column='cluster_robust')

In [None]:
for key, val in dict_cats_clusters_robust.items():
    print(key)
    sc.pl.umap(abassi_2020_ctrl_mouse_fb, color=['cluster_robust'] + [i for i in val if i in abassi_2020_ctrl_mouse_fb.var_names], legend_loc='on data', cmap=magma, use_raw=False, ncols=4) 

In [None]:
abassi_2020_ctrl_mouse_fb.write_h5ad(abbasi_2020_dir + '/abassi_2020_ctrl_mouse_fb_robust.h5')

### Boothby 2021

In [None]:
assign_cats(boothby_2021_ctrl_mouse_fb, dict_cats=dict_cats_clusters_robust, min_score=0.5, quantile_gene_sel=0.8, key_added='cluster_robust', others_name='U', verbose=False)
assign_cats(boothby_2021_ctrl_mouse_fb, column_groupby='cluster_robust', dict_cats=dict_cats_axes_robust, min_score=0.4, quantile_gene_sel=0.8, key_added='axis_robust', 
            intermediate_states=True, diff=0.05, others_name='U', verbose=False)
boothby_2021_ctrl_mouse_fb.obs['cluster_robust'] = boothby_2021_ctrl_mouse_fb.obs['cluster_robust'].astype('category')
boothby_2021_ctrl_mouse_fb.uns['cluster_robust_colors'] = [dict_colors_mouse[i] if i in dict_colors_mouse else '#bcbcbc' for i in boothby_2021_ctrl_mouse_fb.obs['cluster_robust'].cat.categories]
sc.pl.umap(boothby_2021_ctrl_mouse_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

In [None]:
for key in list(dict_cats_axes_robust.keys()):
    del boothby_2021_ctrl_mouse_fb.obs[key]
plot_score_graph(boothby_2021_ctrl_mouse_fb, cluster_column='cluster_robust')

In [None]:
for key, val in dict_cats_clusters_robust.items():
    print(key)
    sc.pl.umap(boothby_2021_ctrl_mouse_fb, color=['cluster_robust'] + [i for i in val if i in boothby_2021_ctrl_mouse_fb.var_names], legend_loc='on data', cmap=magma, use_raw=False, ncols=4) 

In [None]:
boothby_2021_ctrl_mouse_fb.write_h5ad(boothby_2021_dir + '/boothby_2021_ctrl_mouse_fb_robust.h5')

### Buechler 2021

In [None]:
assign_cats(buechler_2021_ctrl_mouse_fb, dict_cats=dict_cats_clusters_robust, min_score=0.5, quantile_gene_sel=0.9, key_added='cluster_robust', others_name='U', verbose=False)
assign_cats(buechler_2021_ctrl_mouse_fb, column_groupby='cluster_robust', dict_cats=dict_cats_axes_robust, min_score=0.4, quantile_gene_sel=0.8, key_added='axis_robust', 
            intermediate_states=True, diff=0.05, others_name='U', verbose=False)
buechler_2021_ctrl_mouse_fb.obs['cluster_robust'] = buechler_2021_ctrl_mouse_fb.obs['cluster_robust'].astype('category')
buechler_2021_ctrl_mouse_fb.uns['cluster_robust_colors'] = [dict_colors_mouse[i] if i in dict_colors_mouse else '#bcbcbc' for i in buechler_2021_ctrl_mouse_fb.obs['cluster_robust'].cat.categories]
sc.pl.umap(buechler_2021_ctrl_mouse_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

In [None]:
for key in list(dict_cats_axes_robust.keys()):
    del buechler_2021_ctrl_mouse_fb.obs[key]
plot_score_graph(buechler_2021_ctrl_mouse_fb, cluster_column='cluster_robust')

In [None]:
for key, val in dict_cats_clusters_robust.items():
    print(key)
    sc.pl.umap(buechler_2021_ctrl_mouse_fb, color=['cluster_robust'] + [i for i in val if i in buechler_2021_ctrl_mouse_fb.var_names], legend_loc='on data', cmap=magma, use_raw=False, ncols=4) 

In [None]:
buechler_2021_ctrl_mouse_fb.write_h5ad(buechler_2021_dir + '/buechler_2021_ctrl_mouse_fb_robust.h5')

### Haensel 2021

In [None]:
assign_cats(haensel_2021_ctrl_mouse_wounding_fb, dict_cats=dict_cats_clusters_robust, min_score=0.4, quantile_gene_sel=0.55, key_added='cluster_robust', others_name='U', verbose=False)
assign_cats(haensel_2021_ctrl_mouse_wounding_fb, column_groupby='cluster_robust', dict_cats=dict_cats_axes_robust, min_score=0.4, quantile_gene_sel=0.8, key_added='axis_robust', 
            intermediate_states=True, diff=0.05, others_name='U', verbose=False)
haensel_2021_ctrl_mouse_wounding_fb.obs['cluster_robust'] = haensel_2021_ctrl_mouse_wounding_fb.obs['cluster_robust'].astype('category')
haensel_2021_ctrl_mouse_wounding_fb.uns['cluster_robust_colors'] = [dict_colors_mouse[i] if i in dict_colors_mouse else '#bcbcbc' for i in haensel_2021_ctrl_mouse_wounding_fb.obs['cluster_robust'].cat.categories]
sc.pl.umap(haensel_2021_ctrl_mouse_wounding_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

In [None]:
for key in list(dict_cats_axes_robust.keys()):
    del haensel_2021_ctrl_mouse_wounding_fb.obs[key]
plot_score_graph(haensel_2021_ctrl_mouse_wounding_fb, cluster_column='cluster_robust')

In [None]:
for key, val in dict_cats_clusters_robust.items():
    print(key)
    sc.pl.umap(haensel_2021_ctrl_mouse_wounding_fb, color=['cluster_robust'] + [i for i in val if i in haensel_2021_ctrl_mouse_wounding_fb.var_names], legend_loc='on data', cmap=magma, use_raw=False, ncols=4) 

In [None]:
haensel_2021_ctrl_mouse_wounding_fb.write_h5ad(haensel_2021_dir + '/haensel_2021_ctrl_mouse_wounding_fb_robust.h5')

### Joost 2020

In [None]:
assign_cats(joost_2020_ctrl_mouse_fb, dict_cats=dict_cats_clusters_robust, min_score=0.45, quantile_gene_sel=0.9, key_added='cluster_robust', others_name='U', verbose=False)
assign_cats(joost_2020_ctrl_mouse_fb, column_groupby='cluster_robust', dict_cats=dict_cats_axes_robust, min_score=0.4, quantile_gene_sel=0.8, key_added='axis_robust', 
            intermediate_states=True, diff=0.05, others_name='U', verbose=False)
joost_2020_ctrl_mouse_fb.obs['cluster_robust'] = joost_2020_ctrl_mouse_fb.obs['cluster_robust'].astype('category')
joost_2020_ctrl_mouse_fb.uns['cluster_robust_colors'] = [dict_colors_mouse[i] if i in dict_colors_mouse else '#bcbcbc' for i in joost_2020_ctrl_mouse_fb.obs['cluster_robust'].cat.categories]
sc.pl.umap(joost_2020_ctrl_mouse_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

In [None]:
for key in list(dict_cats_axes_robust.keys()):
    del joost_2020_ctrl_mouse_fb.obs[key]
plot_score_graph(joost_2020_ctrl_mouse_fb, cluster_column='cluster_robust')

In [None]:
for key, val in dict_cats_clusters_robust.items():
    print(key)
    sc.pl.umap(joost_2020_ctrl_mouse_fb, color=['cluster_robust'] + [i for i in val if i in joost_2020_ctrl_mouse_fb.var_names], legend_loc='on data', cmap=magma, use_raw=False, ncols=4) 

In [None]:
joost_2020_ctrl_mouse_fb.write_h5ad(joost_2020_dir + '/joost_2020_ctrl_mouse_fb_robust.h5')

### Phan 2020

In [None]:
assign_cats(phan_2020_ctrl_mouse_21d_fb, dict_cats=dict_cats_clusters_robust, min_score=0.4, quantile_gene_sel=0.99, key_added='cluster_robust', others_name='U', verbose=False)
assign_cats(phan_2020_ctrl_mouse_21d_fb, column_groupby='cluster_robust', dict_cats=dict_cats_axes_robust, min_score=0.4, quantile_gene_sel=0.8, key_added='axis_robust', 
            intermediate_states=True, diff=0.05, others_name='U', verbose=False)
phan_2020_ctrl_mouse_21d_fb.obs['cluster_robust'] = phan_2020_ctrl_mouse_21d_fb.obs['cluster_robust'].astype('category')
phan_2020_ctrl_mouse_21d_fb.uns['cluster_robust_colors'] = [dict_colors_mouse[i] if i in dict_colors_mouse else '#bcbcbc' for i in phan_2020_ctrl_mouse_21d_fb.obs['cluster_robust'].cat.categories]
sc.pl.umap(phan_2020_ctrl_mouse_21d_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

In [None]:
for key in list(dict_cats_axes_robust.keys()):
    del phan_2020_ctrl_mouse_21d_fb.obs[key]
plot_score_graph(phan_2020_ctrl_mouse_21d_fb, cluster_column='cluster_robust')

In [None]:
for key, val in dict_cats_clusters_robust.items():
    print(key)
    sc.pl.umap(phan_2020_ctrl_mouse_21d_fb, color=['cluster_robust'] + [i for i in val if i in phan_2020_ctrl_mouse_21d_fb.var_names], legend_loc='on data', cmap=magma, use_raw=False, ncols=4) 

In [None]:
phan_2020_ctrl_mouse_21d_fb.write_h5ad(phan_2020_dir + '/phan_2020_ctrl_mouse_21d_fb_robust.h5')

### Shin 2020

In [None]:
assign_cats(shin_2020_ctrl_mouse_fb, dict_cats=dict_cats_clusters_robust, min_score=0.5, quantile_gene_sel=0.9, key_added='cluster_robust', others_name='U', verbose=False)
assign_cats(shin_2020_ctrl_mouse_fb, column_groupby='cluster_robust', dict_cats=dict_cats_axes_robust, min_score=0.4, quantile_gene_sel=0.8, key_added='axis_robust', 
            intermediate_states=True, diff=0.05, others_name='U', verbose=False)
shin_2020_ctrl_mouse_fb.obs['cluster_robust'] = shin_2020_ctrl_mouse_fb.obs['cluster_robust'].astype('category')
shin_2020_ctrl_mouse_fb.uns['cluster_robust_colors'] = [dict_colors_mouse[i] if i in dict_colors_mouse else '#bcbcbc' for i in shin_2020_ctrl_mouse_fb.obs['cluster_robust'].cat.categories]
sc.pl.umap(shin_2020_ctrl_mouse_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

In [None]:
for key in list(dict_cats_axes_robust.keys()):
    del shin_2020_ctrl_mouse_fb.obs[key]
plot_score_graph(shin_2020_ctrl_mouse_fb, cluster_column='cluster_robust')

In [None]:
for key, val in dict_cats_clusters_robust.items():
    print(key)
    sc.pl.umap(shin_2020_ctrl_mouse_fb, color=['cluster_robust'] + [i for i in val if i in shin_2020_ctrl_mouse_fb.var_names], legend_loc='on data', cmap=magma, use_raw=False, ncols=4) 

In [None]:
shin_2020_ctrl_mouse_fb.write_h5ad(shin_2020_dir + '/shin_2020_ctrl_mouse_fb_robust.h5')

In [None]:
genes_w1 = ['Crabp1', 'Notum', 'Pappa2', 'Sostdc1', 'Corin', 'Nrg2', 'Cntn1']
sc.pl.umap(joost_2020_ctrl_mouse_fb, color=['cluster_robust'] + genes_w1, legend_loc='on data', cmap=magma, use_raw=False, ncols=2)
sc.pl.umap(shin_2020_ctrl_mouse_fb, color=['cluster_robust'] + genes_w1, legend_loc='on data', cmap=magma, use_raw=False, ncols=2)


In [None]:
sc.pl.umap(joost_2020_ctrl_mouse_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust', 'Notum', 'Frzb'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

In [None]:
sc.pl.umap(joost_2020_ctrl_mouse_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust', 'Corin', 'Nrg2', 'Cntn1', 'Crabp1', 'Notum', 'Pappa2', 'Acta2', 'Tagln', 'Grem2', 'Abi3bp', 'Ramp1', 'Mylk'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

In [None]:
sc.pl.umap(shin_2020_ctrl_mouse_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust', 'Notum', 'Frzb', 'Sostdc1', 'Cyr61', 'Mgp', 'Ednrb', 'Cdk1'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

### Shook 2020

In [None]:
assign_cats(shook_2020_ctrl_mouse_fb, dict_cats=dict_cats_clusters_robust, min_score=0.5, quantile_gene_sel=0.9, key_added='cluster_robust', others_name='U', verbose=False)
assign_cats(shook_2020_ctrl_mouse_fb, column_groupby='cluster_robust', dict_cats=dict_cats_axes_robust, min_score=0.4, quantile_gene_sel=0.8, key_added='axis_robust', 
            intermediate_states=True, diff=0.05, others_name='U', verbose=False)
shook_2020_ctrl_mouse_fb.obs['cluster_robust'] = shook_2020_ctrl_mouse_fb.obs['cluster_robust'].astype('category')
shook_2020_ctrl_mouse_fb.uns['cluster_robust_colors'] = [dict_colors_mouse[i] if i in dict_colors_mouse else '#bcbcbc' for i in shook_2020_ctrl_mouse_fb.obs['cluster_robust'].cat.categories]
sc.pl.umap(shook_2020_ctrl_mouse_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

In [None]:
for key in list(dict_cats_axes_robust.keys()):
    del shook_2020_ctrl_mouse_fb.obs[key]
plot_score_graph(shook_2020_ctrl_mouse_fb, cluster_column='cluster_robust')

In [None]:
for key, val in dict_cats_clusters_robust.items():
    print(key)
    sc.pl.umap(shook_2020_ctrl_mouse_fb, color=['cluster_robust'] + [i for i in val if i in shook_2020_ctrl_mouse_fb.var_names], legend_loc='on data', cmap=magma, use_raw=False, ncols=4) 

In [None]:
shook_2020_ctrl_mouse_fb.write_h5ad(shook_2020_dir + '/shook_2020_ctrl_mouse_fb_robust.h5')

### Vorstandlechner 2021

In [None]:
assign_cats(vorstandlechner_2021_ctrl_mouse_fb, dict_cats=dict_cats_clusters_robust, min_score=0.45, quantile_gene_sel=0.99, key_added='cluster_robust', others_name='U', verbose=False)
assign_cats(vorstandlechner_2021_ctrl_mouse_fb, column_groupby='cluster_robust', dict_cats=dict_cats_axes_robust, min_score=0.4, quantile_gene_sel=0.8, key_added='axis_robust', 
            intermediate_states=True, diff=0.05, others_name='U', verbose=False)
vorstandlechner_2021_ctrl_mouse_fb.obs['cluster_robust'] = vorstandlechner_2021_ctrl_mouse_fb.obs['cluster_robust'].astype('category')
vorstandlechner_2021_ctrl_mouse_fb.uns['cluster_robust_colors'] = [dict_colors_mouse[i] if i in dict_colors_mouse else '#bcbcbc' for i in vorstandlechner_2021_ctrl_mouse_fb.obs['cluster_robust'].cat.categories]
sc.pl.umap(vorstandlechner_2021_ctrl_mouse_fb, color=['Internal sample identifier', 'leiden', 'axis_robust', 'cluster_robust'], legend_loc='on data', cmap=magma, use_raw=False, ncols=2)

In [None]:
for key in list(dict_cats_axes_robust.keys()):
    del vorstandlechner_2021_ctrl_mouse_fb.obs[key]
plot_score_graph(vorstandlechner_2021_ctrl_mouse_fb, cluster_column='cluster_robust')

In [None]:
for key, val in dict_cats_clusters_robust.items():
    print(key)
    sc.pl.umap(vorstandlechner_2021_ctrl_mouse_fb, color=['cluster_robust'] + [i for i in val if i in vorstandlechner_2021_ctrl_mouse_fb.var_names], legend_loc='on data', cmap=magma, use_raw=False, ncols=4) 

In [None]:
vorstandlechner_2021_ctrl_mouse_fb.write_h5ad(vorstandlechner_2021_dir + '/vorstandlechner_2021_ctrl_mouse_fb_robust.h5')

## Reevaluate the presence of clusters for each dataset

In [None]:
from fb_functions import plot_adata_cluster_properties

In [None]:
# The structure of the dataset dict is dict: [Name, Status (healthy, young, psoriasis, etc), year, ]
list_all_datasets_mouse = [abassi_2020_ctrl_mouse_fb, boothby_2021_ctrl_mouse_fb, buechler_2021_ctrl_mouse_fb, haensel_2021_ctrl_mouse_wounding_fb, joost_2020_ctrl_mouse_fb,
                 phan_2020_ctrl_mouse_21d_fb, shin_2020_ctrl_mouse_fb, shook_2020_ctrl_mouse_fb, vorstandlechner_2021_ctrl_mouse_fb]

list_datasets_mouse = [abassi_2020_ctrl_mouse_fb, boothby_2021_ctrl_mouse_fb, buechler_2021_ctrl_mouse_fb, haensel_2021_ctrl_mouse_wounding_fb, joost_2020_ctrl_mouse_fb,
                 phan_2020_ctrl_mouse_21d_fb, shin_2020_ctrl_mouse_fb, shook_2020_ctrl_mouse_fb, vorstandlechner_2021_ctrl_mouse_fb]

list_names_mouse = [adata.obs['Author'].values[0] + ' ' + str(int(adata.obs['Year'].values[0])) for adata in list_datasets_mouse]

In [None]:
%store list_all_datasets_mouse
%store list_datasets_mouse

%store list_accepted_clusters_mouse
%store list_accepted_axis_mouse
%store list_names_mouse

In [None]:
plot_adata_cluster_properties(dict_cats_clusters=dict_cats_clusters_robust, list_datasets=list_datasets_mouse, what='presence', cluster_name='cluster_robust', axis_name='axis_robust', list_clusters = list_accepted_clusters_mouse)

In [None]:
plot_adata_cluster_properties(dict_cats_clusters=dict_cats_clusters_robust, list_datasets=list_datasets_mouse, what='percentage', cluster_name='cluster_robust', axis_name='axis_robust', list_clusters = list_accepted_clusters_mouse)

In [None]:
plot_adata_cluster_properties(dict_cats_clusters=dict_cats_clusters_robust, list_datasets=list_datasets_mouse, what='axis', cluster_name='cluster_robust', axis_name='axis_robust', list_clusters = list_accepted_clusters_mouse)

## Plotting all Adatas

In [None]:
fig, axs = plt.subplots(3, 3, figsize=(3 * 4, 3 * 4))

for ax in axs.ravel()[len(list_datasets_mouse) - len(axs) :]:
    ax.set_axis_off()

for adata, name, idx in zip(list_datasets_mouse, list_names_mouse, range(len(list_datasets_mouse))):
    sc.pl.umap(adata, color=['cluster_robust'], legend_loc='on data', show=False, ax = axs.ravel()[idx], 
               title=str(adata.obs['Author'].iloc[0]) + ' ' + str(int(adata.obs['Year'].iloc[0])) + ' mouse', size=15, cmap=magma, frameon=False)


In [None]:
list_genes = ['Col1a1', 'Sparc']

for gene_idx, gene in enumerate(list_genes):
    fig, axs = plt.subplots(3, 3, figsize=(3 * 4, 3 * 4))

    for ax in axs.ravel()[len(list_datasets_mouse) - len(axs) :]:
        ax.set_axis_off()

    for adata, name, idx in zip(list_datasets_mouse, list_names_mouse, range(len(list_datasets_mouse))):
        sc.pl.umap(adata, color=[gene], legend_loc='on data', show=False, ax = axs.ravel()[idx], 
                   title=str(adata.obs['Author'].iloc[0]) + ' ' + str(int(adata.obs['Year'].iloc[0])) + ' mouse' + ' ' + gene, size=15, cmap=magma, frameon=False)


In [None]:
list_genes = ['Il13ra1', 'Gap43', 'Anxa3'] + ['Sfrp2', 'Pi16', 'Sema3c', 'Ccl11', 'Fgf18'] # genes de población incipiente + genes comunes

for gene_idx, gene in enumerate(list_genes):
    fig, axs = plt.subplots(3, 3, figsize=(3 * 4, 3 * 4))

    for ax in axs.ravel()[len(list_datasets_mouse) - len(axs) :]:
        ax.set_axis_off()

    for adata, name, idx in zip(list_datasets_mouse, list_names_mouse, range(len(list_datasets_mouse))):
        sc.pl.umap(adata, color=[gene], legend_loc='on data', show=False, ax = axs.ravel()[idx], 
                   title=str(adata.obs['Author'].iloc[0]) + ' ' + str(int(adata.obs['Year'].iloc[0])) + ' mouse' + ' ' + gene, size=15, cmap=magma, frameon=False)


## PAGA 

In [None]:
fig, axs = plt.subplots(3, 3, figsize=(3 * 4, 3 * 4))

for ax in axs.ravel()[len(list_datasets_mouse) - len(axs) :]:
    ax.set_axis_off()
    
for adata, name, idx in zip(list_datasets_mouse, list_names_mouse, range(len(list_datasets_mouse))):
    sc.tl.paga(adata, groups='cluster_robust')
    sc.pl.paga(adata, ax=axs.ravel()[idx], frameon=False, show=False,
               title=str(adata.obs['Author'].iloc[0]) + ' ' + str(int(adata.obs['Year'].iloc[0])) + ' mouse')

In [None]:
fig, axs = plt.subplots(3, 3, figsize=(3 * 4, 3 * 4))

for ax in axs.ravel()[len(list_datasets_mouse) - len(axs) :]:
    ax.set_axis_off()
    
for adata, name, idx in zip(list_datasets_mouse, list_names_mouse, range(len(list_datasets_mouse))):
    sc.tl.paga(adata, groups='cluster_robust')
    sc.pl.paga(adata, ax=axs.ravel()[idx], frameon=False, show=False, solid_edges='connectivities_tree',
               title=str(adata.obs['Author'].iloc[0]) + ' ' + str(int(adata.obs['Year'].iloc[0])) + ' mouse')

## Combined PAGA 

### Using connectivities_tree

In [None]:
df_all = pd.DataFrame(0, index=list_accepted_clusters_mouse + ['U'], columns=list_accepted_clusters_mouse + ['U'])

for adata in list_datasets_mouse:
    dfsub = pd.DataFrame(adata.uns['paga']['connectivities_tree'].todense(), 
                  columns=adata.obs['cluster_robust'].cat.categories, 
                  index=adata.obs['cluster_robust'].cat.categories)
    
    df_all.loc[dfsub.index, dfsub.index] = df_all.loc[dfsub.index, dfsub.index] + dfsub
    
df_all = df_all.iloc[:-1, :-1] # To remove U
df_all.loc[(df_all.sum(axis=1) != 0), (df_all.sum(axis=0) != 0)]  # Remove e1 or other clusters that are not interesting or outliers in the graph

df_all[df_all < 0.6] = 0
df_all = df_all ** 2

In [None]:
# CHOSEN SEED 5
for seed in range(10):
    display(seed)
    fig, ax = plt.subplots(1, 1, figsize=(5, 4))
    G = nx.convert_matrix.from_pandas_adjacency(df_all)
    pos=nx.spring_layout(G, seed=seed)

    edges = G.edges()
    weights = [G[u][v]['weight'] for u,v in edges]

    nx.draw_networkx(G, pos, width=weights, node_color=[dict_colors_mouse[i] for i in df_all.index], ax=ax)
    plt.axis('off')
    plt.show()

### Using connectivities

In [None]:
df_all = pd.DataFrame(0, index=list_accepted_clusters_mouse + ['U'], columns=list_accepted_clusters_mouse + ['U'])

for adata in list_datasets_mouse:
    dfsub = pd.DataFrame(adata.uns['paga']['connectivities'].todense(), 
                  columns=adata.obs['cluster_robust'].cat.categories, 
                  index=adata.obs['cluster_robust'].cat.categories)
    
    df_all.loc[dfsub.index, dfsub.index] = df_all.loc[dfsub.index, dfsub.index] + dfsub
    
df_all = df_all.iloc[:-1, :-1] # To remove U

df_all[df_all < 0.6] = 0
df_all = df_all ** 1.1

df_all = (df_all + df_all.transpose()) / 2


In [None]:
# CHOSEN SEED 9
for seed in range(10):
    fig, ax = plt.subplots(1, 1, figsize=(5, 4))
    G = nx.convert_matrix.from_pandas_adjacency(df_all)
    pos=nx.spring_layout(G, seed=seed)

    edges = G.edges()
    weights = [G[u][v]['weight'] for u,v in edges]

    nx.draw_networkx(G, pos, width=weights, node_color=[dict_colors_mouse[i] for i in df_all.index], ax=ax)
    plt.axis('off')
    print(seed)
    plt.show()

## Mapping known pops to clusters

In [None]:
dict_mapping = {'Reticular fb': ['Dpt', 'Dlk1'],                                # b2
                'Papillary fb': ['Dpt', 'Dpp4',],                               # a2
                'Dermal Papilla': ['Wif1', 'Corin'],                            # d2/d3
                'Dermal Sheath': ['Col11a1', 'Acta2', 'Acan'],                  # d4
                'Arrector pili': ['Cpe', 'Pcdh9', 'Ppp1r1a'],                   # Not defined
                'Fascia': ['Gpx3', 'Thbs2', 'Dcn', 'Osmr', 'Gfra1', 'Pde10a'],  # a2, b/c, b3, b4???
               }

In [None]:
for cat, genes in dict_mapping.items():
    print(cat)
    sc.pl.umap(boothby_2021_ctrl_mouse_fb, color=['cluster_robust'] + genes, legend_loc='on data', frameon=False, cmap=magma)

## Joost2020 markers

In [None]:
dict_mapping_joost = {'FIB1': ['Col1a1', 'Sparc', 'Creb3', 'Creb3l3', 'Mxd4', 'Cldn10',],          # c1, c2 > c3
                      'FIB2': ['Dcn', 'Lum', 'Mfap4', 'Igfbp7', 'Cd63', 'Ccl19', 'Fth1',],         # c3, b/c > c1, c2
                      'FIB3': ['Cxcl12', 'Gpx3', 'Cygb', 'F3', 'Myoc'],                            # b4 > b1 > b3 > b2 > a
                      'FIB4': ['Gpx3', 'Mfap5', 'Plac8', 'Anxa3', 'Akr1c18', 'Pla1a', 'Ifi205'],   # a > b4
                      'tDP': ['Crabp1', 'Notum', 'Pappa2', 'Rasd1', 'Ramp3', 'a', 'Slc26a7'],      # d1
                      'aDP': ['Corin', 'Nrg2', 'Cntn1', 'Nrg2', 'Cntn1', 'Ptprz1'],                # d2
                      'DS1': ['Abi3bp', 'Ramp1', 'Mylk', 'Prelid2', 'Dusp14', 'Enpp2'],            # d3
                      'DS2': ['Acta2', 'Tagln', 'Grem2', 'Lrrc15', 'Wif1'],                        # d3
                       }

In [None]:
for cat, genes in dict_mapping_joost.items():
    print(cat)
    sc.pl.umap(boothby_2021_ctrl_mouse_fb, color=['cluster_robust'] + genes, legend_loc='on data', frameon=False, cmap=magma)

In [None]:
fig, axs = plt.subplots(3, 3, figsize=(3 * 4, 3 * 4))

for ax in axs.ravel()[len(list_datasets_mouse) - len(axs) :]:
    ax.set_axis_off()

for adata, name, idx in zip(list_datasets_mouse, list_names_mouse, range(len(list_datasets_mouse))):
    sc.pl.umap(adata, color=['cluster_robust'], legend_loc='on data', show=False, ax = axs.ravel()[idx], 
               title=str(adata.obs['Author'].iloc[0]) + ' ' + str(int(adata.obs['Year'].iloc[0])) + ' mouse', size=15, cmap=magma, frameon=False)


In [None]:
list_genes = dict_mapping_joost['DS1']
for gene_idx, gene in enumerate(list_genes):
    fig, axs = plt.subplots(3, 3, figsize=(3 * 4, 3 * 4))

    for ax in axs.ravel()[len(list_datasets_mouse) - len(axs) :]:
        ax.set_axis_off()

    for adata, name, idx in zip(list_datasets_mouse, list_names_mouse, range(len(list_datasets_mouse))):
        sc.pl.umap(adata, color=[gene], legend_loc='on data', show=False, ax = axs.ravel()[idx], 
                   title=str(adata.obs['Author'].iloc[0]) + ' ' + str(int(adata.obs['Year'].iloc[0])) + ' mouse' + ' ' + gene, size=15, cmap=magma, frameon=False)