In [None]:
import scanpy as sc
import scanpy.external as sce
import pandas as pd
import numpy as np
import os
import shutil
import triku as tk
import matplotlib.pyplot as plt
import matplotlib as mpl
import ray
import subprocess
from scipy.sparse import csr_matrix
from IPython.display import display, HTML
import mygene as mg

from tqdm import tqdm
# from tqdm.notebook import tqdm

from bokeh.io import show, output_notebook, reset_output

from scipy.sparse import csr_matrix, csc_matrix

reset_output()
output_notebook()

In [None]:
magma = [plt.get_cmap('magma')(i) for i in np.linspace(0,1, 80)]
magma[0] = (0.88, 0.88, 0.88, 1)
magma = mpl.colors.LinearSegmentedColormap.from_list("", magma[:65])

seed = 0

In [None]:
mpl.rcParams['figure.dpi'] = 250

## Create custom db

The database from CellPhone DB is somewhat limited. We are going to extend the database of interactions with external sources. To create the database we are going to create a blank database first, and include the annotations with pandas. Lastly, we are going to regenerate the new file with the 

In [None]:
shutil.rmtree('dbsample')

In [None]:
!cellphonedb database generate  --result-path dbsample

In [None]:
# Create from biomart a dataset of mouse info with columns "UniProtKB Gene Name symbol" and "UniProtKB Gene Name ID"

In [None]:
df_proteins = pd.read_csv('dbsample/protein_generated.csv')
df_genes = pd.read_csv('dbsample/gene_generated.csv')
df_interactions = pd.read_csv('dbsample/interaction_input.csv')
df_biomart = pd.read_csv('data/mart_export.txt', sep='\t')

In [None]:
# Modify gene_generated to gene_inpuit by adding genes in mouse
df_biomart.drop_duplicates(subset='UniProtKB Gene Name symbol', keep='last', ignore_index=True, inplace=True)
df_biomart.index += len(df_genes)
df_biomart.columns = ['gene_name', 'hgnc_symbol', 'uniprot', 'ensembl']
df_genes = df_genes.append(df_biomart)

df_genes.to_csv('dbsample/gene_input_custom.csv', index=None)

In [None]:
# Modify protein_generated to protein_inpuit by adding proteins in mouse
df_biomart.drop_duplicates(subset='uniprot', keep='last', ignore_index=True, inplace=True)
df_biomart.index += len(df_proteins)
df_biomart = df_biomart[['gene_name', 'uniprot']]
df_biomart.columns = ['protein_name', 'uniprot']
df_biomart['protein_name'] = [str(i).upper() + '_MOUSE' for i in df_biomart['protein_name']] 
df_proteins = df_proteins.append(df_biomart)

df_proteins.iloc[df_biomart.index, [2, 3 ,4]] = True # set transmembrane, peripheral and secreted to True

df_proteins.to_csv('dbsample/protein_input_custom.csv', index=None)

In [None]:
# Download mouse and human pairs from http://tcm.zju.edu.cn/celltalkdb/download.php

In [None]:
dict_proteins_uniprot = dict(zip(df_proteins['protein_name'].values, df_proteins['uniprot'].values))

In [None]:
human_new_pairs_celltalkdb = pd.read_csv('data/human_lr_pair.txt', sep='\t')
mouse_new_pairs_celltalkdb = pd.read_csv('data/mouse_lr_pair.txt', sep='\t')

In [None]:
human_new_pairs_celltalkdb = human_new_pairs_celltalkdb[['ligand_gene_symbol', 'receptor_gene_symbol']]
human_new_pairs_celltalkdb.columns = ['protein_name_a', 'protein_name_b']
human_new_pairs_celltalkdb += '_HUMAN'
human_new_pairs_celltalkdb['partner_a'] = [dict_proteins_uniprot[i] if i in dict_proteins_uniprot else np.NaN for i in human_new_pairs_celltalkdb['protein_name_a']]
human_new_pairs_celltalkdb['partner_b'] = [dict_proteins_uniprot[i] if i in dict_proteins_uniprot else np.NaN for i in human_new_pairs_celltalkdb['protein_name_b']]
human_new_pairs_celltalkdb = human_new_pairs_celltalkdb.dropna().reset_index(drop=True)
human_new_pairs_celltalkdb.index += len(df_interactions)
df_interactions = df_interactions.append(human_new_pairs_celltalkdb)

In [None]:
mouse_new_pairs_celltalkdb = mouse_new_pairs_celltalkdb[['ligand_gene_symbol', 'receptor_gene_symbol']]
mouse_new_pairs_celltalkdb.columns = ['protein_name_a', 'protein_name_b']
mouse_new_pairs_celltalkdb = mouse_new_pairs_celltalkdb.apply(lambda x: x.astype(str).str.upper()) + '_MOUSE'
mouse_new_pairs_celltalkdb['partner_a'] = [dict_proteins_uniprot[i] if i in dict_proteins_uniprot else np.NaN for i in mouse_new_pairs_celltalkdb['protein_name_a']]
mouse_new_pairs_celltalkdb['partner_b'] = [dict_proteins_uniprot[i] if i in dict_proteins_uniprot else np.NaN for i in mouse_new_pairs_celltalkdb['protein_name_b']]
mouse_new_pairs_celltalkdb = mouse_new_pairs_celltalkdb.dropna().reset_index(drop=True)
mouse_new_pairs_celltalkdb.index += len(df_interactions)
df_interactions = df_interactions.append(mouse_new_pairs_celltalkdb)

In [None]:
df_interactions['annotation_strategy'] = 'curated'
df_interactions.to_csv('dbsample/interaction_input_custom.csv')

In [None]:
!cellphonedb database generate  --result-path dbsample  --user-gene dbsample/gene_input_custom.csv --user-protein dbsample/protein_input_custom.csv --user-interactions dbsample/interaction_input_custom.csv

In [None]:
list_files = [i for i in os.listdir('dbsample') if i[-2:] == 'db' ]
os.rename(f'dbsample/{list_files[0]}', 'dbsample/customdb.db')

# Check VWF and ANGPT pathways on krano and endo

We are going to select datasets with decent endothelial and krano representation (Giordani, Dell'Orso, and De Micheli).

In [None]:
adata_joined = sc.read('data/processed/joined.h5ad')
adata_joined_sub = sc.read('data/processed/joined_sub.h5ad')

In [None]:
sc.pl.umap(adata_joined, color=['dataset', 'krano_type', 'Cldn5', 'Pecam1', 'Vwf', 'Angpt2'], cmap=magma, ncols=3)

In [None]:
adata_giordani_sub = sc.read('data/processed/giordani_sub.h5')
sc.tl.leiden(adata_giordani_sub, resolution=1.2)

In [None]:
adata_dellorso_sub = sc.read('data/processed/dellorso_sub.h5')
sc.tl.leiden(adata_dellorso_sub, resolution=1.2)

## DEG analysis on VWF$^+$/SELP$^+$ endothelial cells

We are going to focus on WVF$^+$SELP$^+$ endothelial cells. These two are the molecules stores within Weibel-Pañade bodies, and therefore are the ones that might be interacting with the kranocyte. Although De Micheli dataset shows a positive expression of Vwf, it does not express Selp, whereas Giordani and Dell'Orso do it. Just to make sure, we will do the analysis on these two datasets.

### Giordani

In [None]:
sc.pl.umap(adata_giordani_sub, color=['cell_type','leiden','krano_type','Vwf', 'Selp',], cmap=magma, ncols=2, legend_loc='on data')

#### Ligand - receptor pairs 

In [None]:
df = adata_giordani_sub.obs['leiden'].to_frame().astype(str)
df[~ df['leiden'].isin(['9', '20', '19', '15'])] = 0

In [None]:
df.to_csv('results/cpdb/giordani_prueba.txt', sep='\t')

In [None]:
dict_human_symbol_to_gene = dict(zip(df_genes.iloc[:,2], df_genes.iloc[:,3]))

list_genes = []

for i in adata_giordani_sub.var_names:
    try:
        gene = dict_human_symbol_to_gene[i]
    except:
        gene = f'gene_{i}'
    list_genes.append(gene)

In [None]:
df_counts = adata_giordani_sub.to_df().transpose()
df_counts.index = list_genes
df_counts.to_csv('results/cpdb/giordani_counts.txt', sep='\t')

In [None]:
df_leiden = adata_giordani_sub.obs['leiden'].astype(str).to_frame()
df_leiden[~df_leiden.isin(['9', '20', '19', '15'])] = '0'
df_leiden.to_csv('results/cpdb/giordani_meta.txt', sep='\t', header=None)

In [None]:
p_threshold = 0.001

In [None]:
!cellphonedb method statistical_analysis results/cpdb/giordani_meta.txt results/cpdb/giordani_counts.txt --pvalue {p_threshold} --threads 32 --iterations 500 --database dbsample/customdb.db --output-path results/cpdb/out_giordani

In [None]:
df_results = pd.read_csv('results/cpdb/out_giordani/pvalues.txt', sep='\t')

In [None]:
print(',      '.join(sorted(["'" + "', '".join(i.split('_')) + "'" for i in df_results[(df_results['19|9'] < p_threshold) | (df_results['9|19'] < p_threshold)]['interacting_pair']])))

In [None]:
# Celltalk DB pairs (9|19) (KRANO A)
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type',
                                      'Vwf', 'Selp',
                                      'Ackr1', 'Ccl2',      'Ackr1', 'Cxcl1',      'Ackr1', 'Cxcl2',      'Ackr1', 'Cxcl5',      
                                      'Adam10', 'Axl',      'Adam10', 'Lrp1',      'Adam10', 'Tspan5',      'Adam9', 'Itga6',      
                                      'Adam9', 'Itgav',      'Anxa2', 'Cd44',      'Apoe', 'Sdc4',      'Bsg', 'Sele',      
                                      'Cd36', 'Thbs1',      'Cd44', 'Bgn',      'Cd44', 'Bsg',      'Cd44', 'Mif',      
                                      'Cd47', 'Thbs1',      'Cd63', 'Timp1',      'Cd81', 'Gpc3',      'Cd9', 'Hbegf',      
                                      'Cd93', 'Col1a1',      'Col18a1', 'Gpc4',      'Col1a2', 'Cd44',      'Col1a2', 'Cd93',      
                                      'Cxcl10', 'Sdc4',      'Flt1', 'Pgf',      'Flt1', 'Vegfa',      'Hbegf', 'Egfr',      
                                      'Hspg2', 'Itgb1',      'Il6', 'Egfr',      'Il6', 'Il6st',      'Insr', 'Igf1',      
                                      'Itga5', 'Cxcl12',      'Itga5', 'Thbs1',      'Itga6', 'Lamb1',      'Itgav', 'Adam10',      
                                      'Itgav', 'Adam15',      'Itgav', 'Vwf',      'Itgb1', 'Col1a1',      'Itgb1', 'Col1a2',      
                                      'Itgb1', 'Cxcl12',      'Itgb1', 'Lamb1',      'Itgb1', 'Mdk',      'Itgb1', 'Mmp2',      
                                      'Itgb1', 'Thbs1',      'Lama2', 'Itgb1',      'Lama2', 'Itgb4',      'Ldlr', 'Apoe',      
                                      'Lif', 'Il6st',      'Lifr', 'Lif',      'Lrp1', 'Apoe',      'Lrp1', 'Serpine1',      
                                      'Lrp1', 'Thbs1',      'Lrp1', 'Vwf',      'Mfge8', 'Itgav',      'Mmp14', 'Flt1',      
                                      'Ntn1', 'Adora2b',      'Pdgfra', 'Pdgfa',      'Pgf', 'Nrp2',      'Plau', 'Igf2r',      
                                      'Plau', 'Lrp1',      'Plaur', 'Plau',      'Plaur', 'Serpine1',      'Plaur', 'Srpx2',      
                                      'Plaur', 'Vtn',      'Pros1', 'Axl',      'Ptprb', 'Vegfa',      'Sema3b', 'Nrp2',      
                                      'Serpine2', 'Plaur',      'Spp1', 'Cd44',      'Spp1', 'Itgb1',      'Tgm2', 'Sdc4',      
                                      'Vcam1', 'Itgb1',      'Vcan', 'Selp',      'Vcan', 'Tlr2',      'Vegfa', 'Npr1',      
                                      'Vegfa', 'Nrp2'

# Celltalk DB pairs (9|19) (KRANO A)                ], cmap=magma, ncols=4, legend_loc='on data')

In [None]:
print(',      '.join(sorted(["'" + "', '".join(i.split('_')) + "'" for i in df_results[(df_results['19|20'] < p_threshold) | (df_results['20|19'] < p_threshold)]['interacting_pair']])))

In [None]:
# CellphoneDB pairs (20|19) (KRANO A)
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type',
                                     'Ackr1', 'Ccl2',      'Ackr1', 'Cxcl1',      'Ackr1', 'Cxcl2',      'Ackr1', 'Cxcl5',      'Adam10', 'Axl',      'Adam10', 'Lrp1',      'Adam10', 'Tspan5',      'Anxa2', 'Cd44',      'Apoe', 'Sdc4',      'App', 'Cd47',      'App', 'Cd74',      'App', 'Lrp1',      'App', 'Lrp4',      'App', 'Notch2',      'Cav1', 'Bsg',      'Cav1', 'Igfbp5',      'Cd36', 'Thbs1',      'Cd44', 'Bgn',      'Cd44', 'Bsg',      'Cd44', 'Mif',      'Cd47', 'Thbs1',      'Cd63', 'Timp1',      'Cd81', 'Gpc3',      'Cd93', 'Col1a1',      'Col1a2', 'Cd93',      'Dll1', 'Notch2',      'Egfl7', 'Notch2',      'Ephb4', 'Efnb2',      'F3', 'Tfpi',      'Gas6', 'Axl',      'Hras', 'Cav1',      'Hspg2', 'Col13a1',      'Hspg2', 'Itgb1',      'Il6', 'Il6st',      'Insr', 'Igf1',      'Itga5', 'Ccn2',      'Itga5', 'Cxcl12',      'Itga6', 'Lamb1',      'Itgav', 'Adam10',      'Itgav', 'Adam15',      'Itgav', 'Egfl7',      'Itgav', 'Hmgb1',      'Itgav', 'Vwf',      'Itgb1', 'Col1a2',      'Itgb1', 'Cxcl12',      'Itgb1', 'Mdk',      'Itgb1', 'Mmp2',      'Itgb1', 'Thbs1',      'Lama2', 'Itgb1',      'Lama2', 'Itgb4',      'Ldlr', 'Apoe',      'Lif', 'Il6st',      'Lifr', 'Lif',      'Lrp1', 'Apoe',      'Lrp1', 'Ctsd',      'Lrp1', 'Psap',      'Lrp1', 'Vwf',      'Mcam', 'Ntn1',      'Mfge8', 'Itgav',      'Ntn1', 'Adora2b',      'Ntrk2', 'Ntf3',      'Pdgfb', 'Lrp1',      'Pgf', 'Nrp2',      'Psen2', 'Notch2',      'Ptprb', 'Vegfa',      'Scarb2', 'Thbs1',      'Sdc1', 'Thbs1',      'Sema3b', 'Nrp2',      'Spp1', 'Itgb1',      'Tgm2', 'Sdc4',      'Tnfrsf1a', 'Grn',      'Vcan', 'Selp',      'Vegfa', 'Npr1',      'Vegfa', 'Nrp2',      'Vim', 'Cd44'                                     
                                      ], cmap=magma, ncols=4, legend_loc='on data')

In [None]:
print(',      '.join(sorted(["'" + "', '".join(i.split('_')) + "'" for i in df_results[(df_results['9|15'] < p_threshold) | (df_results['15|9'] < p_threshold)]['interacting_pair']])))

In [None]:
# CellphoneDB pairs (9|15) (KRANO B)
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type',
                                      'Abca1', 'Pltp',      'Ackr1', 'Ccl2',      'Ackr1', 'Cxcl1',      'Ackr1', 'Cxcl2',      'Adam10', 'Axl',      'Adam10', 'Lrp1',      'Adam10', 'Tspan17',      'Adam10', 'Tspan5',      'Adam9', 'Itga6',      'Adam9', 'Itgav',      'Adam9', 'Itgb5',      'Anxa2', 'Cd44',      'Apoe', 'Abca1',      'Apoe', 'Sdc4',      'App', 'Lrp1',      'App', 'Lrp4',      'App', 'Notch2',      'Bcam', 'Lama5',      'Bsg', 'Sele',      'Calr', 'Lrp1',      'Cav1', 'Igfbp5',      'Cd14', 'Cd55',      'Cd36', 'Thbs1',      'Cd44', 'Bgn',      'Cd44', 'Bsg',      'Cd47', 'Thbs1',      'Cd63', 'Timp1',      'Cd74', 'Mif',      'Cd9', 'Hbegf',      'Col1a2', 'Cd93',      'Col3a1', 'Itgb1',      'Cxcl10', 'Sdc4',      'Dll1', 'Notch2',      'Eng', 'Tgfb3',      'Fbln1', 'Itgb1',      'Gas6', 'Axl',      'Hbegf', 'Egfr',      'Hmgb1', 'Ly96',      'Hmgb1', 'Thbd',      'Hmgb1', 'Tlr2',      'Hras', 'Tlr2',      'Hsp90b1', 'Tlr2',      'Il6', 'Egfr',      'Il6', 'Il6st',      'Insr', 'Igf1',      'Itga5', 'Cxcl12',      'Itga5', 'Postn',      'Itga5', 'Thbs1',      'Itga6', 'Lamb1',      'Itgav', 'Adam10',      'Itgav', 'Adam15',      'Itgav', 'Vwf',      'Itgb1', 'Col1a2',      'Itgb1', 'Itgb1bp1',      'Itgb1', 'Lama4',      'Itgb1', 'Mdk',      'Itgb1', 'Mmp2',      'Itgb1', 'Postn',      'Itgb1', 'Thbs1',      'Itgb4', 'Lama5',      'Itgb5', 'Col18a1',      'Lama2', 'Itgb1',      'Lama2', 'Itgb4',      'Ldlr', 'Apoe',      'Lgals3', 'Itgb1',      'Lgals3', 'Itgb4',      'Lrp1', 'Apoe',      'Lrp1', 'Ctsd',      'Lrp1', 'Plat',      'Lrp1', 'Serpine1',      'Lrp1', 'Thbs1',      'Lrp1', 'Timp1',      'Lrp1', 'Vwf',      'Lrp6', 'Apoe',      'Lrp6', 'App',      'Ly96', 'S100a8',      'Mfge8', 'Itgav',      'Mmp14', 'Flt1',      'Notch2', 'Jag1',      'Pdgfra', 'Pdgfa',      'Pdgfrb', 'Pdgfa',      'Plau', 'Lrp1',      'Plaur', 'Plau',      'Plaur', 'Serpine1',      'Plaur', 'Vtn',      'Pros1', 'Axl',      'Psen2', 'Notch2',      'Ptprb', 'Ptn',      'Sema3c', 'Nrp2',      'Sema3c', 'Plxnd1',      'Serpine2', 'Plaur',      'Spp1', 'Cd44',      'Spp1', 'Itgb1',      'Tek', 'Angptl1',      'Tgfb1', 'Eng',      'Tgfbr3', 'Tgfb3',      'Tgm2', 'Sdc4',      'Thy1', 'Fyn',      'Tlr2', 'Bgn',      'Tnfrsf1a', 'Grn',      'Tnfrsf1b', 'Grn',      'Traf1', 'Tnfsf9',      'Vcam1', 'Itgb1',      'Vcan', 'Selp',      'Vcan', 'Tlr2',      'Vim', 'Cd44'                                   
                                      ], cmap=magma, ncols=4, legend_loc='on data')

In [None]:
print(',      '.join(sorted(["'" + "', '".join(i.split('_')) + "'" for i in df_results[(df_results['20|15'] < p_threshold) | (df_results['15|20'] < p_threshold)]['interacting_pair']])))

In [None]:
df_results[(df_results['15|20'] > 0.3) | (df_results['20|15'] > 0.3)]['interacting_pair']

In [None]:
# CellphoneDB pairs (20|15) (KRANO B)
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type',
                                      'Vwf', 'Selp',
                                      'Abca1', 'Pltp',      'Ackr1', 'Ccl2',      'Ackr1', 'Cxcl1',      'Ackr1', 'Cxcl2',      'Adam10', 'Axl',      'Adam10', 'Lrp1',      'Adam10', 'Tspan17',      'Adam10', 'Tspan5',      'Adam9', 'Itga6',      'Apoe', 'Abca1',      'Apoe', 'Sdc4',      'App', 'Cd47',      'App', 'Cd74',      'App', 'Lrp1',      'App', 'Lrp4',      'App', 'Notch2',      'Cav1', 'Bsg',      'Cav1', 'Igfbp5',      'Cd14', 'Cd55',      'Cd36', 'Thbs1',      'Cd44', 'Bgn',      'Cd44', 'Bsg',      'Cd47', 'Thbs1',      'Cd63', 'Timp1',      'Cd81', 'Gpc3',      'Cd9', 'Hbegf',      'Cd93', 'Col1a1',      'Col1a2', 'Cd93',      'Col3a1', 'Itgb1',      'Col4a2', 'Cd93',      'Dll1', 'Notch2',      'Egfl7', 'Notch2',      'Eng', 'Tgfb3',      'F3', 'Tfpi',      'Fbln1', 'Itgb1',      'Gas6', 'Axl',      'Hras', 'Cav1',      'Hspg2', 'Col13a1',      'Il6', 'Il6st',      'Insr', 'Igf1',      'Itga6', 'Lamb1',      'Itgav', 'Adam10',      'Itgav', 'Adam15',      'Itgav', 'Egfl7',      'Itgav', 'Hmgb1',      'Itgav', 'Vwf',      'Itgb1', 'Adam15',      'Itgb1', 'Mdk',      'Itgb1', 'Postn',      'Itgb1', 'Thbs1',      'Jam2', 'Itgb1',      'Lama2', 'Itgb1',      'Lama2', 'Itgb4',      'Ldlr', 'Apoe',      'Lgals3', 'Itgb1',      'Lgals3', 'Itgb4',      'Lrp1', 'Apoe',      'Lrp1', 'Ccn2',      'Lrp1', 'Ctsd',      'Lrp1', 'Hsp90b1',      'Lrp1', 'Psap',      'Lrp1', 'Vwf',      'Lrp6', 'Apoe',      'Lrp6', 'App',      'Mcam', 'Ntn1',      'Mcfd2', 'Lman1',      'Mfge8', 'Itgav',      'Mfge8', 'Itgb1',      'Mfge8', 'Itgb5',      'Mfge8', 'Pdgfrb',      'Mmp14', 'Flt1',      'Notch2', 'Jag1',      'Ntrk2', 'Ntf3',      'Pdgfb', 'Lrp1',      'Pdgfb', 'Pdgfrb',      'Pdgfra', 'Pdgfb',      'Plau', 'Lrp1',      'Psen2', 'Notch2',      'Ptprb', 'Ptn',      'Scarb2', 'Thbs1',      'Sdc1', 'Ptn',      'Sdc1', 'Thbs1',      'Sdc2', 'Ptn',      'Sema3c', 'Nrp2',      'Sema3c', 'Plxnd1',      'Spp1', 'Itgb1',      'Tek', 'Angptl1',      'Tgfb1', 'Eng',      'Tgm2', 'Sdc4',      'Tnfrsf1a', 'Grn',      'Vcam1', 'Itgb1',      'Vcan', 'Selp',      'Vim', 'Cd44'                                    
                                      ], cmap=magma, ncols=4, legend_loc='on data')

In [None]:
df_results[(df_results['15|19'] > 0.3) | (df_results['19|15'] > 0.3)]['interacting_pair']

#### VWF+ population differences

In [None]:
# DEGs of VWF population
sc.tl.rank_genes_groups(adata_giordani_sub, groupby='leiden', groups=['9', '20'])

In [None]:
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type'] + list(adata_giordani_sub.uns['rank_genes_groups']['names']['9'][:200])
                                      , cmap=magma, ncols=4, legend_loc='on data')

In [None]:
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type'] + list(adata_giordani_sub.uns['rank_genes_groups']['names']['20'][:200])
                                      , cmap=magma, ncols=4, legend_loc='on data')

In [None]:
# DEGs of VWF population
sc.tl.rank_genes_groups(adata_giordani_sub, groupby='leiden', groups=['9', '20'], reference='rest')

In [None]:
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type'] + list(adata_giordani_sub.uns['rank_genes_groups']['names']['9'][200:400])
                                      , cmap=magma, ncols=4, legend_loc='on data')

In [None]:
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type'] + list(adata_giordani_sub.uns['rank_genes_groups']['names']['20'][200:400])
                                      , cmap=magma, ncols=4, legend_loc='on data')

In [None]:
# DEGs overexpressed in large VWF
large_VWF_genes = ['Adamts9', 'Adgrl2', 'Arid5b', 'Bcr', 'Cemip2', 'Ch25h', 'Chd7', 'Cpne8', 'Crim1', 'Csf2rb2', 'Csf3', 'Cxcl10', 'Cxcl16', 'Ehd3', 'Emp1', 'Entpd1', 'Erg', 'Etv6', 'Flnb', 'Frmd4a', 'Gabpb1', 'Gcnt2', 'Git1', 'Il13ra1', 'Il1rl1', 'Il6', 'Insr', 'Itprid2', 'Jak2', 'Lcn2', 'Man1a', 'Mapkapk2', 'Nav2', 'Ndst1', 'Nt5e', 'Odc1', 'Olr1', 'Plaur', 'Plscr1', 'Ptpn14', 'Ptpre', 'Rab8b', 'Ralgds', 'Rcan1', 'Rfk', 'Rnf213', 'Sele', 'Selp', 'Sema3f', 'Sema6a', 'Shb', 'Slco2a1', 'Slfn2', 'St3gal4', 'Stom', 'Tifa', 'Tll1', 'Tm4sf1', 'Tmem51', 'Tnfrsf11a', 'Tnip1', 'Traf1', 'Trim30a', 'Tspan5', 'Upp1', 'Zfp46', 'Zfp703',] 
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type'] + large_VWF_genes, cmap=magma, ncols=4, legend_loc='on data')

In [None]:
# DEGs overexpressed in small VWF
small_VWF_genes = ['Amigo2', 'Ctnnal1', 'Ctsh', 'Ehd4', 'Eln', 'Lhx6', 'Nuak1', 'Plvap', 'Prpf40b', 'Prss23', 'Rasa4', 'Sorbs2', 'Tmem176b', 'Vamp5', ]
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type'] + small_VWF_genes, cmap=magma, ncols=4, legend_loc='on data')

In [None]:
# DEGs similar in both VWF populations
similar_VWF_genes = ['Acer3', 'Ackr1', 'Clu', 'Cpe', 'Ctnnal1', 'Fam174b', 'Il6st', 'Lifr', 'Lmo2', 'Lrg1', 'Nr2f2', 'Nt5e', 'Pdia5', 'Rbp1', 'Sncg', 'Spint2', 'Tagln', 'Vcam1', 'Vwf',] 
                                      
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type'] + similar_VWF_genes, cmap=magma, ncols=4, legend_loc='on data')

In [None]:
sc.pl.umap(adata_dellorso_sub, color=['cell_type','krano_type'] + [i for i in large_VWF_genes if i in adata_dellorso_sub.var_names], cmap=magma, ncols=4, legend_loc='on data')

In [None]:
sc.pl.umap(adata_dellorso_sub, color=['cell_type','krano_type'] + [i for i in small_VWF_genes if i in adata_dellorso_sub.var_names], cmap=magma, ncols=4, legend_loc='on data')

In [None]:
sc.pl.umap(adata_dellorso_sub, color=['cell_type','krano_type'] + [i for i in similar_VWF_genes if i in adata_dellorso_sub.var_names], cmap=magma, ncols=4, legend_loc='on data')

#### Check GO terms and markers

In [None]:
# “negative angiogenic regulators” (GO: 0016525) 
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type', 
                                      'Adamts1', 'Adgrb1', 'Adrb2', 'Ago1', 'Agt', 'Amot', 'Angpt2', 'Angpt4', 'Apoh', 'Atf2', 
                                      'Atp2b4', 'Ccn6', 'Cd160', 'Cd36', 
                                      'Cldn5', 'Cnmd', 'Col4a2', 'Col4a3', 'Creb3l1', 'Cx3cr1', 'Cxcl10', 'Cxcr3', 'Dab2ip', 
                                      'Dcn', 'E2f2', 'Ecscr', 'Efna3', 'Epha2', 'Epn1', 'Epn2', 'Flt1', 'Foxj2', 'Foxo4', 
                                      'Gpr4', 'Gtf2i', 'Hgs', 'Hoxa5', 'Hrg', 'Il17f', 'Klf2', 'Klf4', 'Krit1', 'Lif', 'Mecp2', 
                                       'Naxe', 'Nf1', 'Ngfr', 'Optc', 'Pde3b', 'Pgk1', 'Pik3cb',  'Pml', 
                                      'Pparg', 'Prl7d1', 'Ptn', 'Ptprm', 'Rgcc', 'Rock1', 'Rock2', 'Sars', 'Sema6a', 'Serpine1', 
                                      'Serpinf1', 'Sh2b3', 'Shc1', 'Sparc', 'Spred1', 'Spry2', 'Stab1', 'Stard13', 'Stat1', 'Sulf1', 
                                      'Synj2bp', 'Tafa5', 'Tcf4', 'Tek', 'Tgfb2', 'Thbs1', 'Thbs2', 'Tie1', 'Tnf', 'Tnmd', 'Vash1',
                                      ] , cmap=magma, ncols=4, legend_loc='on data')

In [None]:
# “blood vessel remodeling genes” (GO: 0001974)
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type', 
'Adra1b', 'Atg5', 'Atp7a', 'Axl', 'Bak1', 'Bax', 'Cbs', 'Chd7', 'Ednra', 'Eln', 'Epas1', 'Erg', 'Ext1', 'Fgf10', 'Fgf8', 'Foxc2', 'Hoxa3', 'Hrg', 'Igf1', 'Il18', 'Itga4', 'Jag1', 'Lif', 'Mdm2', 'Mef2c', 'Nfatc3', 'Nos2', 'Sema3c', 'Tbx1', 'Tgfb2', 'Tgfbr3', 'Vegfa', 
   ] , cmap=magma, ncols=4, legend_loc='on data')                                    
                                      

In [None]:
# Markers that ander said that should turn out negative. 
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type', 'Lif', 'Vegfa', 'Fgf2', 'Pdgfb', 'Dll4', 'Kdr', 'Angpt2'] 
                                      , cmap=magma, ncols=4, legend_loc='on data')

In [None]:
# Markers that ander said that should turn out negative. 
sc.pl.umap(adata_dellorso_sub, color=['cell_type','krano_type', 'Lif', 'Vegfa', 'Fgf2', 'Pdgfb', 'Dll4', 'Kdr', 'Angpt2'] 
                                      , cmap=magma, ncols=4, legend_loc='on data')

In [None]:
# Markers that ander said that should turn out negative. 
sc.pl.umap(adata_de_micheli_sub, color=['cell_type','krano_type', 'Lif', 'Vegfa', 'Fgf2', 'Pdgfb', 'Dll4', 'Kdr', 'Angpt2'] 
                                      , cmap=magma, ncols=4, legend_loc='on data')

#### Analysis of Cd82+ population (on all datasets)
It is expresed on Satellite, T cells, Glial cells and **krano B**

In [None]:
adata_oprescu = sc.read('data/processed/oprescu_d0.h5')
sc.pl.umap(adata_oprescu, color=['cell_type','krano_type', 'Cd82', 'Mpz', 'Plp1', 'Mbp'], cmap=magma, ncols=3, legend_loc='on data')

In [None]:
adata_oprescu_sub = sc.read('data/processed/oprescu_d0_sub.h5')
sc.pl.umap(adata_oprescu_sub, color=['cell_type','krano_type', 'Cd82', 'Mpz', 'Plp1', 'Mbp'], cmap=magma, ncols=3, legend_loc='on data')

In [None]:
adata_de_micheli = sc.read('data/processed/de_micheli_mouse_d0.h5')
sc.pl.umap(adata_de_micheli, color=['cell_type','krano_type', 'Cd82', 'Mpz', 'Plp1', 'Mbp'], cmap=magma, ncols=3, legend_loc='on data')

In [None]:
adata_de_micheli_sub = sc.read('data/processed/de_micheli_mouse_d0_sub.h5')
sc.pl.umap(adata_de_micheli_sub, color=['cell_type','krano_type', 'Cd82', 'Mpz', 'Plp1', 'Mbp'], cmap=magma, ncols=3, legend_loc='on data')

In [None]:
adata_dellorso = sc.read('data/processed/dellorso.h5')
sc.pl.umap(adata_dellorso, color=['cell_type','krano_type', 'Cd82', 'Mpz', 'Plp1', 'Mbp'], cmap=magma, ncols=3, legend_loc='on data')

In [None]:
sc.pl.umap(adata_dellorso_sub, color=['cell_type','krano_type', 'Cd82', 'Mpz', 'Plp1', 'Mbp'], cmap=magma, ncols=3, legend_loc='on data')

In [None]:
adata_giordani = sc.read('data/processed/giordani.h5')
sc.pl.umap(adata_giordani, color=['cell_type','krano_type', 'Cd82', 
                                 'Mpz', 'Plp1', 'Mbp'], cmap=magma, ncols=3, legend_loc='on data')

In [None]:
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type', 'Cd82', 'Mpz', 'Plp1', 'Mbp'], cmap=magma, ncols=3, legend_loc='on data')

#### Analysis of Giordani secondary Cd82+ population [they are satellite cells]

In [None]:
giordani_CD82_markers = ['Cd82', 'Asb5', 'Arl4d', 'Myod1', 'Slc39a14', 'Clcf1', 'Cdh15']

In [None]:
sc.pl.umap(adata_giordani, color=['cell_type','krano_type'] + giordani_CD82_markers, cmap=magma, ncols=3, legend_loc='on data')

In [None]:
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type'] + giordani_CD82_markers, cmap=magma, ncols=3, legend_loc='on data')

### Common pairs

In [None]:
# Marcadores de Starke y Scheppke
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type',
                                      'Vwf', 'Selp',
                                      'Itgav', 'Itgb3', 'Jag1', 
                                      'Flt1', # VEGFR1 
                                      'Kdr', # VEGFR2
                                      'Angpt2', 
                                      ], cmap=magma, ncols=4, legend_loc='on data')

In [None]:
# Degs Vwf
sc.pl.umap(adata_giordani_sub, color=['cell_type','krano_type',
                                      'Selp', 'Lrg1', 'Csf3', 'Ackr1', 'Upp1', 'Spint2', 'Vcam1', 'Plvap', 'Vwf', 'Hs3st1', 
                                      'Il1r1', 'Ch25h', 'Stom', 'Sncg', 'Calcrl', 'Slco2a1', 'Sele', 'Csf2rb2', 'Chd7', 'Sema6a', 'Atp8b1', 
                                      'Olr1', 'Tll1', 'Tnfrsf11a', 'Zfp46', 'Tagln', 'Hif1a', 'Ptpn14', 'Tspan5', 'Adgrl2', 'Ptgs1', 'Traf1', 'Acer3', 'Rfk', 
                                      'Tmem51', 'Entpd1', 'Ehd3', 'Insr', 'Nt5e', 'Csf2rb', 'Lifr', 'Pdia5', 'Ralgds', 'Ehd3', 'Chp2', 'Pip4k2a', 'Hrh1' 
                                      ], cmap=magma, ncols=4, legend_loc='on data')

In [None]:
# Degs Interesantes
sc.pl.umap(adata_dellorso_sub, color=['cell_type','krano_type',
                                        'Cpe', 'Bcam', 'Nr2f2', 'Cp', 'Plscr1'
                                      ], cmap=magma, ncols=4, legend_loc='on data')