# AntIzetomic analysis of perivascular cells

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc
from anndata import AnnData
import scanpy.external as sce
import matplotlib as mpl
import seaborn as sns
import scipy
from tqdm.notebook import tqdm

import scvelo as scv

from scripts.batch_process import batch_process, preprocess_adata_sub
from cellassign import assign_cats

In [None]:
seed = 10
sc.set_figure_params(dpi=200, dpi_save=300)

In [None]:
from scipy.stats import spearmanr

In [None]:
# Selection of palettes for cluster coloring, and scatter values

magma = [plt.get_cmap('magma')(i) for i in np.linspace(0,1, 80)]
magma[0] = (0.88, 0.88, 0.88, 1)
magma = mpl.colors.LinearSegmentedColormap.from_list("", magma[:65])

# Discrete palette [Combination of BOLD and VIVID from carto colors]
bold_and_vivid = ['#7F3C8D','#11A579','#3969AC','#F2B701','#E73F74','#80BA5A','#E68310','#008695','#CF1C90',
           '#f97b72','#4b4b8f', '#E58606','#5D69B1','#52BCA3','#99C945','#CC61B0','#24796C','#DAA51B',
           '#2F8AC4','#764E9F','#ED645A','#CC3A8E']

prism = ['#5F4690', '#1D6996', '#38A6A5', '#0F8554', '#73AF48', '#EDAD08', '#E17C05', '#CC503E', '#94346E', '#6F4070', '#994E95']
prism = prism[::2] + prism[1::2]
safe = ['#88CCEE', '#CC6677', '#DDCC77', '#117733', '#332288', '#AA4499', '#44AA99', '#999933', '#882255', '#661100', '#6699CC']
vivid = ['#E58606', '#5D69B1', '#52BCA3', '#99C945', '#CC61B0', '#24796C', '#DAA51B', '#2F8AC4', '#764E9F', '#ED645A', '#CC3A8E']
bold = ['#7F3C8D', '#11A579', '#3969AC', '#F2B701', '#E73F74', '#80BA5A', '#E68310', '#008695', '#CF1C90', '#f97b72', '#4b4b8f']
# Diverging palettes
temps = ['#009392', '#39b185', '#9ccb86', '#e9e29c', '#eeb479', '#e88471', '#cf597e']

# Continuous palettes
teal = ['#d1eeea', '#a8dbd9', '#85c4c9', '#68abb8', '#4f90a6', '#3b738f', '#2a5674']

# AnnData loading

In [None]:
dir_adata_save = os.getcwd() + '/data/preprocessed/'

In [None]:
adata_D0 = sc.read_h5ad(dir_adata_save + '/A+M_0.h5ad')

In [None]:
adata_D0

In [None]:
sc.pl.umap(adata_D0, color=['batch', 'cell_types'], palette=bold_and_vivid, ncols=2)

In [None]:
adata_D2 = sc.read_h5ad(dir_adata_save + '/A+M_2.h5ad')

In [None]:
sc.pl.umap(adata_D2, color=['batch', 'cell_types'], palette=bold_and_vivid, ncols=2)

In [None]:
adata_D4 = sc.read_h5ad(dir_adata_save + '/A+M_4.h5ad')

In [None]:
sc.pl.umap(adata_D4, color=['batch', 'cell_types'], palette=bold_and_vivid, ncols=2)

In [None]:
for adata in [adata_D0, adata_D2, adata_D4,]:
    adata.obs_names = [i.replace('-1-', '-') for i in adata.obs_names]

# Discovering cell subtypes

In [None]:
cell_type = 'Perivascular'

D0_sub = adata_D0[adata_D0.obs['cell_types'] == cell_type]
D2_sub = adata_D2[adata_D2.obs['cell_types'] == cell_type]
D4_sub = adata_D4[adata_D4.obs['cell_types'] == cell_type]

for adata in [D0_sub, D2_sub, D4_sub]:
    adata.obs['donor'] = [i[0] for i in adata.obs['batch']]

## Preprocessing of day 0

In [None]:
preprocess_adata_sub(D0_sub, resolution=0.7, n_HVG=700, min_dist=0.5, seed=seed)
sc.pl.umap(D0_sub, color=['batch', 'leiden'], palette=bold_and_vivid)

In [None]:
dict_peri_D0 = {'PC1D0': ['PCP4', 'CNN1', 'DES'],   # P5D0
                'PC2D0': ['RERGL', 'BCAM', 'ADIRF'],   # P3D0
                'PC3D0': ['CYCS', 'CREM', 'HSPD1'],   # P0D0
                'PC4D0': ['RGS5', 'POSTN', 'KCJN8'],   # P2D0
                'PC5D0': ['SSTR2', 'FGF7', 'EDNRB'],   # P4D0 
                'PC6D0': ['CCL19', 'CXCL12', 'CCL2'],}   # P1D0

assign_cats(D0_sub, dict_cats=dict_peri_D0, key_added='cell_subcats', min_score=0.5, quantile_gene_sel=0.8)

sc.pl.umap(D0_sub, color=['leiden', 'cell_subcats'])

In [None]:
sc.tl.rank_genes_groups(D0_sub, groupby='leiden', groups=['4'], reference='rest')
sc.pl.rank_genes_groups_tracksplot(D0_sub, dendrogram=False, n_genes=50, use_raw=False)

## Preprocessing of day 2

In [None]:
preprocess_adata_sub(D2_sub, resolution=2, n_HVG=1000, min_dist=0.2, seed=seed)
sc.pl.umap(D2_sub, color=['batch', 'leiden'], palette=bold_and_vivid)

In [None]:
sc.tl.rank_genes_groups(D2_sub, groupby='leiden', groups=['4'], reference='rest')
sc.pl.rank_genes_groups_tracksplot(D2_sub, n_genes=40, use_raw=False, dendrogram=False)

In [None]:
dict_peri_D2 = {'PC1D2': ['PCP4', 'CNN1', 'DES'],  # P4D2
                'PC2D2': ['RERGL', 'BCAM', 'ADIRF'],  # P1D2
                'PC3D2': ['CYCS', 'CREM', 'HSPD1'],  # P3D2
                'PC4D2': ['RGS5', 'ABCC9', 'NCKAP5'],  # P0D2
                'PC5D2': ['SOD2', 'GJA4', 'SSTR2'],  # Didn't exist
                'PC6D2': ['CCL19', 'CXCL12', 'CCL2'],}  # P2D2

assign_cats(D2_sub, dict_cats=dict_peri_D2, key_added='cell_subcats', min_score=0.5, quantile_gene_sel=0.99)

sc.pl.umap(D2_sub, color=['leiden', 'cell_subcats'])

In [None]:
sc.pl.umap(D2_sub, color=['leiden', 'RGS5', 'RERGL', 'CCL19', 'CYCS', 'DES', 'SSTR2', 'FGF7'], cmap=magma, use_raw=False)

In [None]:
sc.tl.rank_genes_groups(D2_sub, groupby='leiden', groups=['5'], reference='rest')
sc.pl.rank_genes_groups_tracksplot(D2_sub, dendrogram=False, n_genes=50, use_raw=False)

## Preprocessing of day 4

In [None]:
preprocess_adata_sub(D4_sub, resolution=1, n_HVG=1500, min_dist=0.2, seed=seed)
sc.pl.umap(D4_sub, color=['batch', 'leiden'], palette=bold_and_vivid)

In [None]:
dict_peri_D4 = {'PC1D4': ['TPM1', 'DES', 'PCP4'],  # P4D4
                'PC2D4': ['RERGL', 'MUSTN1', 'SDF4'],  # P1D4
                'PC3AD4': ['CYCS', 'CREM', 'MT1A'],  # P0D4
                'PC3BD4': ['IL6', 'INHBA', 'PDK4'],  # P6D4
                'PC4D4': ['RGS5', 'ABCC9', 'NDUFA4L2'],  # P2D4
                'PC5D4': ['FGF7', 'SSTR2', 'SGIP1'],  # P5D4
                'PC6D4': ['CCL19', 'CCL2', 'CCL21'],}  # P3D4

assign_cats(D4_sub, dict_cats=dict_peri_D4, key_added='cell_subcats', min_score=0.5, quantile_gene_sel=0.99)

sc.pl.umap(D4_sub, color=['leiden', 'cell_subcats'])

In [None]:
sc.tl.rank_genes_groups(D4_sub, groupby='cell_subcats', groups=['PC3BD4'], reference='rest')
sc.pl.rank_genes_groups_tracksplot(D4_sub, dendrogram=False, n_genes=50, use_raw=False)

In [None]:
sc.pl.umap(D4_sub, color=['leiden', 'cell_subcats', 'IL6', 'INHBA', 'PDK4'], cmap=magma, use_raw=False)

In [None]:
sc.pl.umap(D4_sub, color=['leiden', 'cell_subcats', 'SOD2', 'GJA4', 'TNFAIP2', 'SSTR2', 'MEDAG'], cmap=magma)