# Detecting kranocyte populations in the datasets

In [None]:
import scanpy as sc
import scanpy.external as sce
import pandas as pd
import numpy as np
import os
import triku as tk
import matplotlib.pyplot as plt
import matplotlib as mpl
from tqdm.notebook import tqdm
import ray
import subprocess
from scipy.sparse import csr_matrix
from IPython.display import display, HTML

from tqdm.notebook import tqdm

from bokeh.io import show, output_notebook, reset_output

reset_output()
output_notebook()

In [None]:
from cellassign import assign_cats

help(assign_cats)

In [None]:
# Palettes for UMAP gene expression

magma = [plt.get_cmap('magma')(i) for i in np.linspace(0,1, 80)]
magma[0] = (0.88, 0.88, 0.88, 1)
magma = mpl.colors.LinearSegmentedColormap.from_list("", magma[:65])

seed = 0

In [None]:
mpl.rcParams['figure.dpi'] = 150

In [None]:
A_markers = ['6030408B16Rik', 'Smim41', 'Col9a2', 'Dlk1', 'Shisa3',  'Saa1',  'Nipal1']
A_markers_extra = ['Kcnk2',  # Not specific enough
                   'Adamtsl2',  # Not specific enough
                   'Cst6',  # Teno marker
                   'Sorcs2',  # Not specific enough
                   'Susd5',  # Not specific enough
                   'Rgs17',  # Not specific enough
                   'Gfra2']  # Marks immune population
B_markers = ['Lypd2', 'Wnt6', 'Cldn1', 'Moxd1', 'Mansc4', 'Dleu7', 'Efnb3', 'Stra6', 'Sbspon', 'Ace2', 'Hcn4', 'Cldn22', 'Wnt10a', 'Ocln']  
B_markers_extra = ['Itgb4',  # Expressed by immune
                   'Ralgps2',  # Expressed by immune
                   'Ctxn3',  # Myonuclei
                   'Tenm2',  # Krano A
                   'Sfrp5',  # Glial 
                   'Perp',  # Glial + peris
                   'Krt19',  # APC
                   'Gpm6a',  # Immune
                   'Ctxn3',  # Tenocytes
                   'Crabp2',  # Pericytes / teno
                   'Slc6a13',  # APC
                   'Itga7']  # Immune

In [None]:
list_colors = ['#bcbcbc', '#900C3F', '#286e87']

## Oprescu

In [None]:
adata_oprescu_d0 = sc.read('data/processed/oprescu_d0.h5')

In [None]:
adata_oprescu_d0.obs['id'] = adata_oprescu_d0.obs_names
assign_cats(adata_oprescu_d0, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.6, 
            others_name='-', key_added='krano_type')
adata_oprescu_d0.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['krano_type', 'cell_type', 'Mpzl2'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_oprescu_d0.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_oprescu_d0.var_names], 
           cmap=magma, alpha=0.6)

## Subsample of populations

In [None]:
adata_oprescu_d0_sub = adata_oprescu_d0[adata_oprescu_d0.obs['cell_type'].isin(['Lum+ FAP', 'Prg4+ FAP', 'Endothelial', 'Perivascular', 'Tenocyte', 'Glial cell', 'Neural cell'])]

sc.pp.filter_genes(adata_oprescu_d0_sub, min_counts=1)

sc.pp.pca(adata_oprescu_d0_sub, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_oprescu_d0_sub, random_state=seed, n_neighbors=int(len(adata_oprescu_d0_sub) ** 0.5 // 4), metric='cosine')
tk.tl.triku(adata_oprescu_d0_sub)

sc.tl.umap(adata_oprescu_d0_sub, min_dist=0.7, random_state=seed)
sc.tl.leiden(adata_oprescu_d0_sub, resolution=3, random_state=seed)

assign_cats(adata_oprescu_d0_sub, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.7, others_name='-', key_added='krano_type')

sc.pl.umap(adata_oprescu_d0_sub, color=['leiden', 'n_counts', 'cell_type', 'krano_type'], legend_loc='on data')

In [None]:
adata_oprescu_d0_sub.write_h5ad('data/processed/oprescu_d0_sub.h5')
adata_oprescu_d0.write_h5ad('data/processed/oprescu_d0.h5')

## Scott

In [None]:
adata_scott_d0 = sc.read('data/processed/scott_d0.h5')

In [None]:
adata_scott_d0.obs['id'] = adata_scott_d0.obs_names
assign_cats(adata_scott_d0, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.4, 
            others_name='-', key_added='krano_type')
adata_scott_d0.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_scott_d0, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_scott_d0, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_scott_d0.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_scott_d0, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_scott_d0.var_names], 
           cmap=magma, alpha=0.6)

## Subsample of populations

In [None]:
adata_scott_d0_sub = adata_scott_d0[adata_scott_d0.obs['cell_type'].isin(['Lum+ FAP', 'Prg4+ FAP', 'Endothelial', 'Perivascular', 'Tenocyte', 'Neural cell', 'Glial cell'])]

sc.pp.filter_genes(adata_scott_d0_sub, min_counts=1)

sc.pp.pca(adata_scott_d0_sub, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_scott_d0_sub, random_state=seed, n_neighbors=int(len(adata_scott_d0_sub) ** 0.5 // 4), metric='cosine')
tk.tl.triku(adata_scott_d0_sub)

sc.tl.umap(adata_scott_d0_sub, min_dist=0.5, random_state=seed)
sc.tl.leiden(adata_scott_d0_sub, resolution=3.5, random_state=seed)

assign_cats(adata_scott_d0_sub, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.4, others_name='-', key_added='krano_type')

sc.pl.umap(adata_scott_d0_sub, color=['leiden', 'n_counts', 'cell_type', 'krano_type'], legend_loc='on data')

In [None]:
adata_scott_d0_sub.write_h5ad('data/processed/scott_d0_sub.h5')
adata_scott_d0.write_h5ad('data/processed/scott_d0.h5')

## De Micheli mouse

In [None]:
adata_de_micheli_mouse_d0 = sc.read('data/processed/de_micheli_mouse_d0.h5')

In [None]:
adata_de_micheli_mouse_d0.obs['id'] = adata_de_micheli_mouse_d0.obs_names
assign_cats(adata_de_micheli_mouse_d0, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.55, 
            others_name='-', key_added='krano_type')
adata_de_micheli_mouse_d0.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_de_micheli_mouse_d0.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_de_micheli_mouse_d0.var_names], 
           cmap=magma, alpha=0.6)

## Subsample of populations

In [None]:
adata_de_micheli_mouse_d0_sub = adata_de_micheli_mouse_d0[adata_de_micheli_mouse_d0.obs['cell_type'].isin(['Lum+ FAP', 'Prg4+ FAP', 'Endothelial', 'Perivascular', 'Tenocyte', 'Neural cell', 'Glial cell'])]

sc.pp.filter_genes(adata_de_micheli_mouse_d0_sub, min_counts=1)

sc.pp.pca(adata_de_micheli_mouse_d0_sub, random_state=seed, n_comps=30)
sce.pp.bbknn(adata_de_micheli_mouse_d0_sub, metric='angular', neighbors_within_batch=3)
tk.tl.triku(adata_de_micheli_mouse_d0_sub)

sc.tl.umap(adata_de_micheli_mouse_d0_sub, min_dist=0.7, random_state=seed)
sc.tl.leiden(adata_de_micheli_mouse_d0_sub, resolution=3.5, random_state=seed)

assign_cats(adata_de_micheli_mouse_d0_sub, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.5, others_name='-', key_added='krano_type')

sc.pl.umap(adata_de_micheli_mouse_d0_sub, color=['leiden', 'cell_type', 'krano_type'], legend_loc='on data')

In [None]:
adata_de_micheli_mouse_d0_sub.write_h5ad('data/processed/de_micheli_mouse_d0_sub.h5')
adata_de_micheli_mouse_d0.write_h5ad('data/processed/de_micheli_mouse_d0.h5')

## Giordani

In [None]:
adata_giordani = sc.read('data/processed/giordani.h5')

In [None]:
adata_giordani.obs['id'] = adata_giordani.obs_names
assign_cats(adata_giordani, column_groupby='id', 
            dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.35, 
            others_name='-', key_added='krano_type')
adata_giordani.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_giordani, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_giordani, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_giordani.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_giordani, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_giordani.var_names], 
           cmap=magma, alpha=0.6)

## Subsample of populations

In [None]:
adata_giordani_sub = adata_giordani[adata_giordani.obs['cell_type'].isin(['Lum+ FAP', 'Prg4+ FAP', 'Endothelial', 'Perivascular', 'Tenocyte', 'Neural cell', 'Glial cell'])]

sc.pp.filter_genes(adata_giordani_sub, min_counts=1)

sc.pp.pca(adata_giordani_sub, random_state=seed, n_comps=30)
sce.pp.bbknn(adata_giordani_sub, metric='angular', neighbors_within_batch=3)
tk.tl.triku(adata_giordani_sub)

sc.tl.umap(adata_giordani_sub, min_dist=0.5, random_state=seed)
sc.tl.leiden(adata_giordani_sub, resolution=3.5, random_state=seed)

assign_cats(adata_giordani_sub, column_groupby='id', 
            dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.35, 
            others_name='-', key_added='krano_type')

sc.pl.umap(adata_giordani_sub, color=['leiden', 'cell_type', 'krano_type'], legend_loc='on data')

In [None]:
adata_giordani_sub.write_h5ad('data/processed/giordani_sub.h5')
adata_giordani.write_h5ad('data/processed/giordani.h5')

## Proietti

In [None]:
adata_proietti = sc.read('data/processed/proietti.h5')

In [None]:
adata_proietti.obs['id'] = adata_proietti.obs_names
assign_cats(adata_proietti, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.4, 
            others_name='-', key_added='krano_type')
adata_proietti.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_proietti, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_proietti, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_proietti.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_proietti, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_proietti.var_names], 
           cmap=magma, alpha=0.6)

## Subsample of populations

In [None]:
adata_proietti_sub = adata_proietti[adata_proietti.obs['cell_type'].isin(['Lum+ FAP', 'Prg4+ FAP', 'Endothelial', 'Perivascular', 'Tenocyte', 'Neural cell', 'Glial cell'])]

sc.pp.filter_genes(adata_proietti_sub, min_counts=1)

sc.pp.pca(adata_proietti_sub, random_state=seed, n_comps=30)
sce.pp.bbknn(adata_proietti_sub, metric='angular', neighbors_within_batch=3, batch_key='condition')
tk.tl.triku(adata_proietti_sub)

sc.tl.umap(adata_proietti_sub, min_dist=0.5, random_state=seed)
sc.tl.leiden(adata_proietti_sub, resolution=3.5, random_state=seed)

assign_cats(adata_proietti_sub, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.45, others_name='-', key_added='krano_type')

sc.pl.umap(adata_proietti_sub, color=['leiden', 'cell_type', 'krano_type'], legend_loc='on data')

In [None]:
adata_proietti_sub.write_h5ad('data/processed/proietti_sub.h5')
adata_proietti.write_h5ad('data/processed/proietti.h5')

## Ronzoni

In [None]:
adata_ronzoni = sc.read('data/processed/ronzoni.h5')

In [None]:
adata_ronzoni.obs['id'] = adata_ronzoni.obs_names
assign_cats(adata_ronzoni, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.3, 
            others_name='-', key_added='krano_type')
adata_ronzoni.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_ronzoni, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_ronzoni, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_ronzoni.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_ronzoni, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_ronzoni.var_names], 
           cmap=magma, alpha=0.6)

## Subsample of populations

In [None]:
adata_ronzoni_sub = adata_ronzoni[adata_ronzoni.obs['cell_type'].isin(['Lum+ FAP', 'Prg4+ FAP', 'Endothelial', 'Perivascular', 'Tenocyte', 'Neural cell', 'Glial cell'])]

sc.pp.filter_genes(adata_ronzoni_sub, min_counts=1)

sc.pp.pca(adata_ronzoni_sub, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_ronzoni_sub, random_state=seed, n_neighbors=int(len(adata_ronzoni_sub) ** 0.5 // 4), metric='cosine')
tk.tl.triku(adata_ronzoni_sub)

sc.tl.umap(adata_ronzoni_sub, min_dist=0.5, random_state=seed)
sc.tl.leiden(adata_ronzoni_sub, resolution=3.5, random_state=seed)

assign_cats(adata_ronzoni_sub, column_groupby='id', 
            dict_cats={'A': A_markers, 'B': B_markers}, 
            min_score=0.25, others_name='-', key_added='krano_type')

sc.pl.umap(adata_ronzoni_sub, color=['leiden', 'cell_type', 'krano_type'], legend_loc='on data')

In [None]:
adata_ronzoni_sub.write_h5ad('data/processed/ronzoni_sub.h5')
adata_ronzoni.write_h5ad('data/processed/ronzoni.h5')

## Dell'Orso

In [None]:
adata_dellorso = sc.read('data/processed/dellorso.h5')

In [None]:
adata_dellorso.obs['id'] = adata_dellorso.obs_names
assign_cats(adata_dellorso, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.4, 
            others_name='-', key_added='krano_type')
adata_dellorso.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_dellorso, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_dellorso, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_dellorso.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_dellorso, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_dellorso.var_names], 
           cmap=magma, alpha=0.6)

## Subsample of populations

In [None]:
adata_dellorso_sub = adata_dellorso[adata_dellorso.obs['cell_type'].isin(['Lum+ FAP', 'Prg4+ FAP', 'Epcam+', 'Endothelial', 'Perivascular', 'Tenocyte', 'Neural cell', 'Glial cell'])]

sc.pp.filter_genes(adata_dellorso_sub, min_counts=1)

sc.pp.pca(adata_dellorso_sub, random_state=seed, n_comps=30)
sce.pp.bbknn(adata_dellorso_sub, metric='angular', neighbors_within_batch=3)
tk.tl.triku(adata_dellorso_sub)

sc.tl.umap(adata_dellorso_sub, min_dist=0.8, random_state=seed)
sc.tl.leiden(adata_dellorso_sub, resolution=3.5, random_state=seed)

assign_cats(adata_dellorso_sub, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.2, others_name='-', key_added='krano_type')

sc.pl.umap(adata_dellorso_sub, color=['leiden', 'cell_type', 'krano_type'], legend_loc='on data')

In [None]:
adata_dellorso_sub.write_h5ad('data/processed/dellorso_sub.h5')
adata_dellorso.write_h5ad('data/processed/dellorso.h5')

# Analysis of *interesting* kranocyte populations

## Analysis of Krano A population in Giordani et al.
It is common to see two separate kranocyte populations in general datasets (De Micheli, Giordani and Dell'Orso, for instance), but when the populations of interest are separated they merge into one population, or two similar ones. However, in Giordani we see that these two populations still remain separated. We see that one of them is Nipal1$^+$, and the other one is Saa1$^+$. We are going to see their differences to see if these patterns replicate in other papers.

**This part is not reproducible. Set the clusters to match the populations by hand.**

### Giordani

In [None]:
sc.pl.umap(adata_giordani_sub, color=['krano_type', 'leiden', 'cell_type'] + [i for i in A_markers if i in adata_giordani.var_names], 
           cmap=magma, alpha=0.6, ncols=3, legend_loc='on data')

In [None]:
cells_A_1 = adata_giordani_sub[(adata_giordani_sub.obs['krano_type'] == 'A') & (adata_giordani_sub.obs['leiden'].isin(['19', '27']))].obs_names
cells_A_2 = adata_giordani_sub[(adata_giordani_sub.obs['krano_type'] == 'A') & (adata_giordani_sub.obs['leiden'].isin(['40']))].obs_names

df = pd.Series('-', index=adata_giordani_sub.obs_names)
df.loc[cells_A_1] = 'A1'
df.loc[cells_A_2]  ='A2'

adata_giordani_sub.obs['kranos_A'] = df

In [None]:
sc.tl.rank_genes_groups(adata_giordani_sub, groupby='kranos_A', groups=['A1'], reference='A2')
A1_genes_giordani = list(adata_giordani_sub.uns['rank_genes_groups']['names']['A1'][:150]) 
A2_genes_giordani = list(adata_giordani_sub.uns['rank_genes_groups']['names']['A1'][-150:][::-1]) 

### Dell'Orso

In [None]:
sc.pl.umap(adata_dellorso, color=['krano_type', 'leiden', 'cell_type'] + [i for i in A_markers if i in adata_dellorso_sub.var_names], 
           cmap=magma, alpha=0.6, ncols=3, legend_loc='on data')

In [None]:
cells_A_1 = adata_dellorso[(adata_dellorso.obs['krano_type'] == 'A') & (adata_dellorso.obs['leiden'].isin(['1', '13', '8']))].obs_names
cells_A_2 = adata_dellorso[(adata_dellorso.obs['krano_type'] == 'A') & (adata_dellorso.obs['leiden'].isin(['20']))].obs_names

df = pd.Series('-', index=adata_dellorso.obs_names)
df.loc[cells_A_1] = 'A1'
df.loc[cells_A_2]  ='A2'

adata_dellorso.obs['kranos_A'] = df

In [None]:
sc.tl.rank_genes_groups(adata_dellorso, groupby='kranos_A', groups=['A1'], reference='A2')
A1_genes_dellorso = list(adata_dellorso.uns['rank_genes_groups']['names']['A1'][:150]) 
A2_genes_dellorso = list(adata_dellorso.uns['rank_genes_groups']['names']['A1'][-150:][::-1]) 

### Merge  and selection of genes
We will now select the A1 and A2 genes based on some criteria:
* The gene is clearly expressed in the cell type
* The gene is somewhat *specific* of kranocyte. If the gene is clearly overexpressed, we don't need to apply that criterion.

In [None]:
A1_genes_both = list(np.intersect1d(A1_genes_dellorso, A1_genes_giordani))
A2_genes_both = list(np.intersect1d(A2_genes_dellorso, A2_genes_giordani))

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['krano_type', 'cell_type'] + ['Ngf', 'Ngfr'], cmap=magma, alpha=0.6, ncols=3, legend_loc='on data')

In [None]:
sc.pl.umap(adata_dellorso, color=['krano_type', 'cell_type'] + A1_genes_both, cmap=magma, alpha=0.6, ncols=3, legend_loc='on data')

In [None]:
sc.pl.umap(adata_dellorso, color=['krano_type', 'cell_type'] + A2_genes_both, cmap=magma, alpha=0.6, ncols=3, legend_loc='on data')

In [None]:
sc.pl.umap(adata_giordani_sub, color=['krano_type', 'cell_type'] + A2_genes_both, cmap=magma, alpha=0.6, ncols=3, legend_loc='on data')

## De Micheli A krano differences are due to stress

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['krano_type', 'leiden', 'cell_type'] + [i for i in A_markers if i in adata_dellorso_sub.var_names], 
           cmap=magma, alpha=0.6, ncols=3, legend_loc='on data')

In [None]:
cells_A_1 = adata_de_micheli_mouse_d0[(adata_de_micheli_mouse_d0.obs['krano_type'] == 'A') & (adata_de_micheli_mouse_d0.obs['leiden'].isin(['29']))].obs_names
cells_A_2 = adata_de_micheli_mouse_d0[(adata_de_micheli_mouse_d0.obs['krano_type'] == 'A') & (adata_de_micheli_mouse_d0.obs['leiden'].isin(['28']))].obs_names

df = pd.Series('-', index=adata_de_micheli_mouse_d0.obs_names)
df.loc[cells_A_1] = 'A1'
df.loc[cells_A_2]  ='A2'

adata_de_micheli_mouse_d0.obs['kranos_A'] = df

In [None]:
sc.tl.rank_genes_groups(adata_de_micheli_mouse_d0, groupby='kranos_A', groups=['A1'], reference='A2')
A1_genes_demicheli = list(adata_de_micheli_mouse_d0.uns['rank_genes_groups']['names']['A1'][:35]) 
A2_genes_demicheli = list(adata_de_micheli_mouse_d0.uns['rank_genes_groups']['names']['A1'][-35:][::-1]) 

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['krano_type', 'leiden', 'cell_type'] +
           list(A1_genes_demicheli), 
           cmap=magma, alpha=0.6, ncols=3, legend_loc='on data')

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['krano_type', 'leiden', 'cell_type'] +
           list(A2_genes_demicheli), 
           cmap=magma, alpha=0.6, ncols=3, legend_loc='on data')

# Running datasets against markers

## Kumar et al 2017 PC1 and PC2

In [None]:
list_genes = ['Krano_type'] + ['Rgs5', 'Acta2', 'Cxcl1', 'Cxcl2', 'Cxcl5', 'Il6', 'Il1b']

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

## Kumar et al 2017 Capillary proinflammatory/capillary and contractile/arteriolar PCs

In [None]:
list_genes = ['Krano_type'] + ['Cd274', 'Dlk1', 'Nt5e'] # Cd73 = Nt5e

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

## Birbrair PCs

In [None]:
list_genes = ['Krano_type'] + ['Pdgfrb', 'Mcam', 'Cspg4', 'Nes'] # Cd146 = Mcam, Ng2 = Cspg4

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

## Camps ISC

* ISC1: Ly6c1, Cd55
* ISC2: Gdf10, Meox2, F3/Cd142
* ISC3: Thbs4, Fbln7, Sdc1

In [None]:
list_genes = ['Krano_type'] + ['Cd55', 'F3', 'Sdc1']

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

# Beautiful figs

In [None]:
if not os.path.exists(fig_dir + 'clusters/'): os.makedirs(fig_dir + 'clusters/')

In [None]:
# MPL config
font = {'family' : 'normal',
        'weight' : 'light',
        'size'   : 15}

mpl.rc('font', **font)

In [None]:
def makefig(list_genes, name_order=None, adata_list=[adata_oprescu_d0, adata_scott_d0, adata_de_micheli_mouse_d0, adata_giordani], 
           list_datasets = ['Oprescu', 'Scott', 'De Micheli', 'Giordani']):
    n_cols = len(adata_list)
    fig, axs = plt.subplots(len(list_genes), n_cols, figsize=(6 * n_cols, 4 * len(list_genes)))
    
        
    for idx, gene in enumerate(list_genes):
        try:
            for adata_idx, adata in enumerate(adata_list):
                sc.pl.umap(adata, color=gene, cmap=magma, ax=axs[idx][adata_idx], show=False, legend_fontsize=11)
        except:
            raise
               
    for ax_idx, ax in enumerate(axs.ravel()):
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        if ax_idx % n_cols == 0:
            ax.spines['left'].set_visible(False)
            
        ax.set_xlabel('')
        
        if ax_idx % n_cols == 0:
            ax.set_ylabel(list_genes[ax_idx // n_cols])
        else:
            ax.set_ylabel('')
            
        if ax_idx in range(len(list_datasets)):
            ax.set_title(list_datasets[ax_idx])
        else:
            ax.set_title('')
            
        # legend unification 
        if ax_idx % n_cols == 0:
            dict_legends = {}
        
        try:
            legend = ax.get_legend()
            names, handles = [str(x._text) for x in legend.texts], legend.legendHandles  
            for i in range(len(names)):
                dict_legends[names[i]] = handles[i]
            if ax_idx % n_cols != n_cols - 1:
                ax.get_legend().remove()
            else:
                if name_order is None:
                    ax.legend(dict_legends.values(), dict_legends.keys(), bbox_to_anchor=(1.05, 1), frameon=False, prop={'size': 11})
                else:
                    ax.legend([dict_legends[i] for i in name_order if i in dict_legends.keys()], 
                              [i for i in name_order if i in dict_legends.keys()], bbox_to_anchor=(1.05, 1), frameon=False, prop={'size': 11})
        except:
            pass 
        
    
    plt.tight_layout()
    plt.savefig(fig_dir + 'clusters/' + '-'.join(list_genes) + '.png', dpi=500)
    plt.savefig(fig_dir + 'clusters/' + '-'.join(list_genes) + '.pdf')
    


In [None]:
name_order = ['Endothelial', 'Pericyte', 'Fibroblast', 'FAP', 'Tenocyte', 'Neural cell',
                                                          'Myonuclei', 'MuSC', 'Immune', 'APC / Proliferative ICs', 
                                                          'Monocyte', 'Neutrophil', 'Myeloid', 'B cell', 'T cell', 'A', 'B', 'C', 'Other']

In [None]:
makefig(['cell_type', 'Krano_type'], name_order=name_order)

In [None]:
# Cluster 7
makefig(['cell_type', 'Krano_type', 'Cxcl14', 'G0s2', 'Adamtsl2', 'Saa1', 'Thrsp'], name_order=name_order)

In [None]:
makefig(['Krano_type', 'Cd34', 'S100b'], name_order=name_order)
makefig(['Krano_type', 'Pdgfrb', 'Sox10'], name_order=name_order)
makefig(['Krano_type', 'Ngfr', 'Cspg4'], name_order=name_order)
makefig(['Krano_type', 'Col9a2', 'Shisa3'], name_order=name_order)

In [None]:
makefig(['Krano_type', '6030408B16Rik', 'Col18a1'], name_order=name_order)
makefig(['Krano_type', 'Col9a2', 'Cldn1'], name_order=name_order)
makefig(['Krano_type', 'Dlk1', 'Fetub'], name_order=name_order)
makefig(['Krano_type', 'Gpld1', 'Grin2b'], name_order=name_order)
makefig(['Krano_type', 'Kcnb2', 'Lypd2'], name_order=name_order)
makefig(['Krano_type', 'Mansc4', 'Nipal1'], name_order=name_order)
makefig(['Krano_type', 'Saa1', 'Shisa3'], name_order=name_order)
makefig(['Krano_type', 'Tenm2', 'Trpm6'], name_order=name_order)

In [None]:
makefig(['Tnc', 'Tnmd', 'Nipal1', 'Dlk1'], name_order=name_order, 
        adata_list=[adata_oprescu_d0, adata_oprescu_d2, adata_oprescu_d35, adata_oprescu_d5, adata_oprescu_d10, adata_oprescu_d21], 
        list_datasets=['D0', 'D2', 'D3.5', 'D5', 'D10', 'D21'])