# Detecting kranocyte populations in the datasets

In [None]:
import scanpy as sc
import scanpy.external as sce
import pandas as pd
import numpy as np
import os
import triku as tk
import matplotlib.pyplot as plt
import matplotlib as mpl
from tqdm.notebook import tqdm
import ray
import subprocess
from scipy.sparse import csr_matrix
from IPython.display import display, HTML

from tqdm.notebook import tqdm

from bokeh.io import show, output_notebook, reset_output

reset_output()
output_notebook()

In [None]:
from cellassign import assign_cats

help(assign_cats)

In [None]:
# Palettes for UMAP gene expression

magma = [plt.get_cmap('magma')(i) for i in np.linspace(0,1, 80)]
magma[0] = (0.88, 0.88, 0.88, 1)
magma = mpl.colors.LinearSegmentedColormap.from_list("", magma[:65])

seed = 0

In [None]:
mpl.rcParams['figure.dpi'] = 150

In [None]:
A_markers = ['6030408B16Rik', 'Smim41', 'Adamtsl2', 'Cdh19', 'Cdkn2b', 'Col18a1', 'Col26a1', 
             'Col9a2', 'Dlk1', 'Fetub', 'Gfra2', 'Gm11681', 'Gpld1', 'Greb1', 'Gria1', 
             'Kcnb2', 'Kcnk2', 'Mpzl2', 'Ngfr', 'Plppr4', 
             'Ptgfr', 'Rgs17', 'Saa1', 'Saa2', 'Shisa3', 'Sipa1l1', 'Sorcs2', 'Sox9', 
             'Sphkap', 'Syndig1', 'Trpm6']
B_markers = ['Cldn1', 'Crabp2', 'Dleu7', 'Efnb3', 'Gjb5', 'Grin2b', 'Itgb4', 'Kcnj13', 
             'Kcnj2', 'Lgals7', 'Lypd2', 'Mansc4', 'Moxd1', 'Mpzl2', 'Perp', 'Prodh', 'Ptch1', 
             'Slc6a13',  'Stra6', 'Tec', 'Tenm2', 'Wnt10a', 'Wnt6']  # 'Sox10',

In [None]:
list_colors = ['#bcbcbc', '#900C3F', '#286e87']

## Oprescu

In [None]:
adata_oprescu_d0 = sc.read('data/processed/oprescu_d0.h5')

In [None]:
adata_oprescu_d0.obs['id'] = adata_oprescu_d0.obs_names
assign_cats(adata_oprescu_d0, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.7, 
            others_name='-', key_added='krano_type')
adata_oprescu_d0.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['krano_type', 'cell_type', 'Mpzl2'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_oprescu_d0.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_oprescu_d0.var_names], 
           cmap=magma, alpha=0.6)

## Scott

In [None]:
adata_scott_d0 = sc.read('data/processed/scott_d0.h5')

In [None]:
adata_scott_d0.obs['id'] = adata_scott_d0.obs_names
assign_cats(adata_scott_d0, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.55, 
            others_name='-', key_added='krano_type')
adata_scott_d0.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_scott_d0, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_scott_d0, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_scott_d0.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_scott_d0, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_scott_d0.var_names], 
           cmap=magma, alpha=0.6)

## De Micheli mouse

In [None]:
adata_de_micheli_mouse_d0 = sc.read('data/processed/de_micheli_mouse_d0.h5')

In [None]:
adata_de_micheli_mouse_d0.obs['id'] = adata_de_micheli_mouse_d0.obs_names
assign_cats(adata_de_micheli_mouse_d0, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.55, 
            others_name='-', key_added='krano_type')
adata_de_micheli_mouse_d0.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_de_micheli_mouse_d0.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_de_micheli_mouse_d0.var_names], 
           cmap=magma, alpha=0.6)

## Giordani

In [None]:
adata_giordani = sc.read('data/processed/giordani.h5')

In [None]:
adata_giordani.obs['id'] = adata_giordani.obs_names
assign_cats(adata_giordani, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.55, 
            others_name='-', key_added='krano_type')
adata_giordani.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_giordani, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_giordani, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_giordani.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_giordani, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_giordani.var_names], 
           cmap=magma, alpha=0.6)

## Proietti

In [None]:
adata_proietti = sc.read('data/processed/proietti.h5')

In [None]:
adata_proietti.obs['id'] = adata_proietti.obs_names
assign_cats(adata_proietti, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.5, 
            others_name='-', key_added='krano_type')
adata_proietti.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_proietti, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_proietti, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_proietti.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_proietti, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_proietti.var_names], 
           cmap=magma, alpha=0.6)

## Ronzoni

In [None]:
adata_ronzoni = sc.read('data/processed/ronzoni.h5')

In [None]:
adata_ronzoni.obs['id'] = adata_ronzoni.obs_names
assign_cats(adata_ronzoni, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.5, 
            others_name='-', key_added='krano_type')
adata_ronzoni.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_ronzoni, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_ronzoni, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_ronzoni.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_ronzoni, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_ronzoni.var_names], 
           cmap=magma, alpha=0.6)

## Dell'Orso

In [None]:
adata_dellorso = sc.read('data/processed/dellorso.h5')

In [None]:
adata_dellorso.obs['id'] = adata_dellorso.obs_names
assign_cats(adata_dellorso, column_groupby='id', dict_cats={'A': A_markers, 'B': B_markers}, min_score=0.5, 
            others_name='-', key_added='krano_type')
adata_dellorso.uns['krano_type_colors'] = list_colors

In [None]:
sc.pl.umap(adata_dellorso, color=['krano_type', 'cell_type'] , cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_dellorso, color=['krano_type', 'cell_type'] + [i for i in A_markers if i in adata_dellorso.var_names], 
           cmap=magma, alpha=0.6)

In [None]:
sc.pl.umap(adata_dellorso, color=['krano_type', 'cell_type'] + [i for i in B_markers if i in adata_dellorso.var_names], 
           cmap=magma, alpha=0.6)

# Running datasets against markers

## Kumar et al 2017 PC1 and PC2

In [None]:
list_genes = ['Krano_type'] + ['Rgs5', 'Acta2', 'Cxcl1', 'Cxcl2', 'Cxcl5', 'Il6', 'Il1b']

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

## Kumar et al 2017 Capillary proinflammatory/capillary and contractile/arteriolar PCs

In [None]:
list_genes = ['Krano_type'] + ['Cd274', 'Dlk1', 'Nt5e'] # Cd73 = Nt5e

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

## Birbrair PCs

In [None]:
list_genes = ['Krano_type'] + ['Pdgfrb', 'Mcam', 'Cspg4', 'Nes'] # Cd146 = Mcam, Ng2 = Cspg4

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

## Camps ISC

* ISC1: Ly6c1, Cd55
* ISC2: Gdf10, Meox2, F3/Cd142
* ISC3: Thbs4, Fbln7, Sdc1

In [None]:
list_genes = ['Krano_type'] + ['Cd55', 'F3', 'Sdc1']

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

# Export adatas

In [None]:
os.makedirs(data_dir + '/processed', exist_ok=True)

In [None]:
adata_de_micheli_mouse_d0.write_h5ad(data_dir + '/processed/de_micheli_mouse_D0.h5ad')

In [None]:
adata_oprescu_d0.write_h5ad(data_dir + '/processed/oprescu_D0.h5ad')

In [None]:
adata_giordani.write_h5ad(data_dir + '/processed/giordani_D0.h5ad')

In [None]:
adata_scott_d0.write_h5ad(data_dir + '/processed/scott_D0.h5ad')

# Beautiful figs

In [None]:
if not os.path.exists(fig_dir + 'clusters/'): os.makedirs(fig_dir + 'clusters/')

In [None]:
# MPL config
font = {'family' : 'normal',
        'weight' : 'light',
        'size'   : 15}

mpl.rc('font', **font)

In [None]:
def makefig(list_genes, name_order=None, adata_list=[adata_oprescu_d0, adata_scott_d0, adata_de_micheli_mouse_d0, adata_giordani], 
           list_datasets = ['Oprescu', 'Scott', 'De Micheli', 'Giordani']):
    n_cols = len(adata_list)
    fig, axs = plt.subplots(len(list_genes), n_cols, figsize=(6 * n_cols, 4 * len(list_genes)))
    
        
    for idx, gene in enumerate(list_genes):
        try:
            for adata_idx, adata in enumerate(adata_list):
                sc.pl.umap(adata, color=gene, cmap=magma, ax=axs[idx][adata_idx], show=False, legend_fontsize=11)
        except:
            raise
               
    for ax_idx, ax in enumerate(axs.ravel()):
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        if ax_idx % n_cols == 0:
            ax.spines['left'].set_visible(False)
            
        ax.set_xlabel('')
        
        if ax_idx % n_cols == 0:
            ax.set_ylabel(list_genes[ax_idx // n_cols])
        else:
            ax.set_ylabel('')
            
        if ax_idx in range(len(list_datasets)):
            ax.set_title(list_datasets[ax_idx])
        else:
            ax.set_title('')
            
        # legend unification 
        if ax_idx % n_cols == 0:
            dict_legends = {}
        
        try:
            legend = ax.get_legend()
            names, handles = [str(x._text) for x in legend.texts], legend.legendHandles  
            for i in range(len(names)):
                dict_legends[names[i]] = handles[i]
            if ax_idx % n_cols != n_cols - 1:
                ax.get_legend().remove()
            else:
                if name_order is None:
                    ax.legend(dict_legends.values(), dict_legends.keys(), bbox_to_anchor=(1.05, 1), frameon=False, prop={'size': 11})
                else:
                    ax.legend([dict_legends[i] for i in name_order if i in dict_legends.keys()], 
                              [i for i in name_order if i in dict_legends.keys()], bbox_to_anchor=(1.05, 1), frameon=False, prop={'size': 11})
        except:
            pass 
        
    
    plt.tight_layout()
    plt.savefig(fig_dir + 'clusters/' + '-'.join(list_genes) + '.png', dpi=500)
    plt.savefig(fig_dir + 'clusters/' + '-'.join(list_genes) + '.pdf')
    


In [None]:
name_order = ['Endothelial', 'Pericyte', 'Fibroblast', 'FAP', 'Tenocyte', 'Neural cell',
                                                          'Myonuclei', 'MuSC', 'Immune', 'APC / Proliferative ICs', 
                                                          'Monocyte', 'Neutrophil', 'Myeloid', 'B cell', 'T cell', 'A', 'B', 'C', 'Other']

In [None]:
makefig(['cell_type', 'Krano_type'], name_order=name_order)

In [None]:
# Cluster 7
makefig(['cell_type', 'Krano_type', 'Cxcl14', 'G0s2', 'Adamtsl2', 'Saa1', 'Thrsp'], name_order=name_order)

In [None]:
makefig(['Krano_type', 'Cd34', 'S100b'], name_order=name_order)
makefig(['Krano_type', 'Pdgfrb', 'Sox10'], name_order=name_order)
makefig(['Krano_type', 'Ngfr', 'Cspg4'], name_order=name_order)
makefig(['Krano_type', 'Col9a2', 'Shisa3'], name_order=name_order)

In [None]:
makefig(['Krano_type', '6030408B16Rik', 'Col18a1'], name_order=name_order)
makefig(['Krano_type', 'Col9a2', 'Cldn1'], name_order=name_order)
makefig(['Krano_type', 'Dlk1', 'Fetub'], name_order=name_order)
makefig(['Krano_type', 'Gpld1', 'Grin2b'], name_order=name_order)
makefig(['Krano_type', 'Kcnb2', 'Lypd2'], name_order=name_order)
makefig(['Krano_type', 'Mansc4', 'Nipal1'], name_order=name_order)
makefig(['Krano_type', 'Saa1', 'Shisa3'], name_order=name_order)
makefig(['Krano_type', 'Tenm2', 'Trpm6'], name_order=name_order)

In [None]:
makefig(['Tnc', 'Tnmd', 'Nipal1', 'Dlk1'], name_order=name_order, 
        adata_list=[adata_oprescu_d0, adata_oprescu_d2, adata_oprescu_d35, adata_oprescu_d5, adata_oprescu_d10, adata_oprescu_d21], 
        list_datasets=['D0', 'D2', 'D3.5', 'D5', 'D10', 'D21'])