# AntIzetomic analysis of endothelial cells

In [None]:
import os
import numpy as np
import pandas as pd
import scipy

from anndata import AnnData
import scanpy as sc
import scanpy.external as sce
import scvelo as scv

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import tqdm

from scripts.batch_process import batch_process, preprocess_adata_sub
from cellassign import assign_cats

In [None]:
seed = 10
sc.set_figure_params(dpi=200, dpi_save=300)

In [None]:
from scipy.stats import spearmanr

In [None]:
# Selection of palettes for cluster coloring, and scatter values

magma = [plt.get_cmap('magma')(i) for i in np.linspace(0,1, 80)]
magma[0] = (0.88, 0.88, 0.88, 1)
magma = mpl.colors.LinearSegmentedColormap.from_list("", magma[:65])

# Discrete palette [Combination of BOLD and VIVID from carto colors]
bold_and_vivid = ['#7F3C8D','#11A579','#3969AC','#F2B701','#E73F74','#80BA5A','#E68310','#008695','#CF1C90',
           '#f97b72','#4b4b8f', '#E58606','#5D69B1','#52BCA3','#99C945','#CC61B0','#24796C','#DAA51B',
           '#2F8AC4','#764E9F','#ED645A','#CC3A8E']

prism = ['#5F4690', '#1D6996', '#38A6A5', '#0F8554', '#73AF48', '#EDAD08', '#E17C05', '#CC503E', '#94346E', '#6F4070', '#994E95']
prism = prism[::2] + prism[1::2]
safe = ['#88CCEE', '#CC6677', '#DDCC77', '#117733', '#332288', '#AA4499', '#44AA99', '#999933', '#882255', '#661100', '#6699CC']
vivid = ['#E58606', '#5D69B1', '#52BCA3', '#99C945', '#CC61B0', '#24796C', '#DAA51B', '#2F8AC4', '#764E9F', '#ED645A', '#CC3A8E']
bold = ['#7F3C8D', '#11A579', '#3969AC', '#F2B701', '#E73F74', '#80BA5A', '#E68310', '#008695', '#CF1C90', '#f97b72', '#4b4b8f']
# Diverging palettes
temps = ['#009392', '#39b185', '#9ccb86', '#e9e29c', '#eeb479', '#e88471', '#cf597e']

# Continuous palettes
teal = ['#d1eeea', '#a8dbd9', '#85c4c9', '#68abb8', '#4f90a6', '#3b738f', '#2a5674']

In [None]:
dict_colors_pops = {'#7F3C8D': ['EC1D0', 'EC1D2', 'EC1D4'],  '#FF9800': ['EC2D0', 'EC2D2', 'EC2D4'], 
                    '#80BA5A': ['EC3D0', 'EC3D2', 'EC3AD4'], '#4CAF50': ['EC3BD4'], 
                    '#00BCD4': ['EC4AD0', 'EC4D2', 'EC4D4'], '#3969AC': ['EC4BD0'],
                    '#E73F74': ['EC5D0', 'EC5D2', 'EC5D4'],}

dict_pops_colors = {x: k for k,v in dict_colors_pops.items() for x in v }    

# AnnData loading

In [None]:
dir_adata_save = os.getcwd() + '/data/preprocessed/'

In [None]:
adata_D0 = sc.read_h5ad(dir_adata_save + '/A+M_0.h5ad')

In [None]:
adata_D0

In [None]:
sc.pl.umap(adata_D0, color=['batch', 'cell_types'], ncols=2)

In [None]:
adata_D2 = sc.read_h5ad(dir_adata_save + '/A+M_2.h5ad')

In [None]:
sc.pl.umap(adata_D2, color=['batch', 'cell_types'], ncols=2)

In [None]:
adata_D4 = sc.read_h5ad(dir_adata_save + '/A+M_4.h5ad')

In [None]:
sc.pl.umap(adata_D4, color=['batch', 'cell_types'], ncols=2)

In [None]:
for adata in [adata_D0, adata_D2, adata_D4,]:
    adata.obs_names = [i.replace('-1-', '-') for i in adata.obs_names]

# Discovering cell subtypes

In [None]:
cell_type = 'Endothelial'

D0_sub = adata_D0[adata_D0.obs['cell_types'] == cell_type]
D2_sub = adata_D2[adata_D2.obs['cell_types'] == cell_type]
D4_sub = adata_D4[adata_D4.obs['cell_types'] == cell_type]

## Preprocessing of day 0

In [None]:
preprocess_adata_sub(D0_sub, resolution=0.7, n_HVG=800, min_dist=0.4, seed=seed)
sc.pl.umap(D0_sub, color=['batch', 'leiden'], palette=bold_and_vivid)

In [None]:
dict_endo_D0 = {'EC1D0': ['KCNJ2', 'FN1', 'SEMA3G', 'PELI1'],  # E5D0
                'EC2D0': ['NTHL1', 'TSPAN2', 'C19orf33', 'EDN1'],  # E3D0
                'EC3D0': ['RGS5', 'TPM2', 'MT1M', 'ABCC9'],  # E1D0 
                'EC4AD0': ['ACKR1', 'NRN1', 'SELE', 'CNKSR3', 'HSPB8'],  # E4D0
                'EC4BD0': ['PRCP', 'C2CD4B', 'CCL23', 'ATF3'],  # E2D0 
                'EC5D0': ['CSF3', 'SOD2', 'IFIT1', 'HMOX1'],}  # E0D0

assign_cats(D0_sub, dict_cats=dict_endo_D0, key_added='cell_subcats', min_score=0.5, quantile_gene_sel=0.99)
D0_sub.uns['cell_subcats_colors'] = [dict_pops_colors[i] for i in sorted(set(D0_sub.obs['cell_subcats']))]

sc.pl.umap(D0_sub, color=['leiden', 'cell_subcats'])

In [None]:
sc.tl.rank_genes_groups(D0_sub, groupby='leiden', groups=['5'], reference='rest', method='wilcoxon')
sc.pl.rank_genes_groups_tracksplot(D0_sub, dendrogram=False, n_genes=50)

## Preprocessing of day 2

In [None]:
preprocess_adata_sub(D2_sub, resolution=0.7, n_HVG=1500, min_dist=0.25, seed=seed)
sc.pl.umap(D2_sub, color=['batch', 'leiden'], palette=bold_and_vivid)

In [None]:
dict_endo_D2 = {'EC1D2': ['KCNJ2', 'FN1', 'SEMA3G', 'PELI1'],  # E4D2
                'EC2D2': ['DUSP23', 'IL1R1', 'CCL14', 'TSPAN7'],  # E2D2
                'EC3D2': ['RGS5', 'EDNRB', 'RBP7', 'STEAP4'],  # E0D2
                'EC4D2': ['SLC9A3R2', 'ADGRF5', 'MIDN', 'SOS1'],  # E3D2
                'EC5D2': ['CSF3', 'SOD2', 'CYP1B1', 'VCAM1'],}  # E1D2

assign_cats(D2_sub, dict_cats=dict_endo_D2, key_added='cell_subcats', min_score=0.5, quantile_gene_sel=0.99)
D2_sub.uns['cell_subcats_colors'] = [dict_pops_colors[i] for i in sorted(set(D2_sub.obs['cell_subcats']))]

sc.pl.umap(D2_sub, color=['leiden', 'cell_subcats'])

In [None]:
sc.tl.rank_genes_groups(D2_sub, groupby='leiden', groups=['1'], reference='rest', method='wilcoxon')
sc.pl.rank_genes_groups_tracksplot(D2_sub, dendrogram=False, n_genes=50, use_raw=False)

## Preprocessing of day 4

In [None]:
preprocess_adata_sub(D4_sub, resolution=1.2, n_HVG=1000, min_dist=0.2, seed=seed)
sc.pl.umap(D4_sub, color=['batch', 'leiden'], palette=bold_and_vivid)

In [None]:
dict_endo_D4 = {'EC1D4': ['KCNJ2', 'FN1', 'SEMA3G', 'PELI1'],  # E4D4
                'EC2D4': ['ADAMTS6', 'JAG2', 'EDN1'],  # E2D4
                'EC3AD4': ['RGS5', 'TPM2', 'MT1M', 'ABCC9'],  # E1D4
                'EC3BD4': ['RCAN1', 'RGS16', 'CCND2'],  # E5D4
                'EC4D4': ['ATF3', 'TNFSF10', 'RND1', 'C2CD4B'],  # E3D4
                'EC5D4': ['CSF3', 'SOD2', 'IFIT1', 'HMOX1'],}  # E0D4

assign_cats(D4_sub, dict_cats=dict_endo_D4, key_added='cell_subcats', min_score=0.5, quantile_gene_sel=0.99)
D4_sub.uns['cell_subcats_colors'] = [dict_pops_colors[i] for i in sorted(set(D4_sub.obs['cell_subcats']))]

sc.pl.umap(D4_sub, color=['leiden', 'cell_subcats'])

In [None]:
sc.tl.rank_genes_groups(D4_sub, groupby='cell_subcats', groups=['EC3BD4'], reference='rest', method='wilcoxon')
sc.pl.rank_genes_groups_tracksplot(D4_sub, dendrogram=False, n_genes=50, use_raw=False)