In [None]:
import scanpy as sc
import scanpy.external as sce
import pandas as pd
import numpy as np
import os
import triku as tk
import matplotlib.pyplot as plt
import matplotlib as mpl
from tqdm.notebook import tqdm
import ray
import subprocess

In [None]:
seed = 10

In [None]:
data_dir = 'data/'
fig_dir = 'figures/'

In [None]:
# Palettes for UMAP gene expression

magma = [plt.get_cmap('magma')(i) for i in np.linspace(0,1, 80)]
magma[0] = (0.88, 0.88, 0.88, 1)
magma = mpl.colors.LinearSegmentedColormap.from_list("", magma[:65])

# Oprescu 2020

In [None]:
link = 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE138nnn/GSE138826/suppl/GSE138826%5Fexpression%5Fmatrix%2Etxt%2Egz'

In [None]:
!wget {link} -P {data_dir}/oprescu

In [None]:
adata_oprescu = sc.read_text(data_dir+'/oprescu/GSE138826_expression_matrix.txt.gz').transpose()

In [None]:
adata_oprescu.write_loom(data_dir+'/oprescu/adata_oprescu.loom')

In [None]:
adata_oprescu = sc.read_loom(data_dir+'/oprescu/adata_oprescu.loom')

In [None]:
adata_oprescu.obs['batch'] = [i.split('_')[0] for i in adata_oprescu.obs_names]

In [None]:
# Basic QC filtering
adata_oprescu.var['mt'] = adata_oprescu.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata_oprescu, qc_vars=['mt'], percent_top=None, inplace=True)

In [None]:
sc.pl.violin(adata_oprescu, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

sc.pl.scatter(adata_oprescu, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata_oprescu, x='total_counts', y='n_genes_by_counts', color='batch')

In [None]:
sc.pp.filter_cells(adata_oprescu, min_genes=250)
sc.pp.filter_genes(adata_oprescu, min_cells=1)

In [None]:
adata_oprescu.X = np.asarray(adata_oprescu.X.todense())

In [None]:
adata_oprescu_d0 = adata_oprescu[adata_oprescu.obs['batch'] == 'Noninjured'].copy()
adata_oprescu_d05 = adata_oprescu[adata_oprescu.obs['batch'] == 'X0.5.DPI'].copy()
adata_oprescu_d2 = adata_oprescu[adata_oprescu.obs['batch'] == 'X2.DPI'].copy()
adata_oprescu_d35 = adata_oprescu[adata_oprescu.obs['batch'] == 'X3.5.DPI'].copy()
adata_oprescu_d5 = adata_oprescu[adata_oprescu.obs['batch'] == 'X5.DPI'].copy()
adata_oprescu_d10 = adata_oprescu[adata_oprescu.obs['batch'] == 'X10.DPI'].copy()
adata_oprescu_d21 = adata_oprescu[adata_oprescu.obs['batch'] == 'X21.DPI'].copy()

In [None]:
adata_oprescu_d0.X = np.array(adata_oprescu_d0.X).copy()
adata_oprescu_d05.X = np.array(adata_oprescu_d05.X).copy()
adata_oprescu_d2.X = np.array(adata_oprescu_d2.X).copy()
adata_oprescu_d35.X = np.array(adata_oprescu_d35.X).copy()
adata_oprescu_d5.X = np.array(adata_oprescu_d5.X).copy()
adata_oprescu_d10.X = np.array(adata_oprescu_d10.X).copy()
adata_oprescu_d21.X = np.array(adata_oprescu_d21.X).copy()

In [None]:
set(adata_oprescu.obs['batch'].values)

In [None]:
sc.pp.filter_genes(adata_oprescu, min_counts=1)
sc.pp.log1p(adata_oprescu)
sc.pp.normalize_per_cell(adata_oprescu)

In [None]:
tk.tl.triku(adata_oprescu, n_procs=1, random_state=seed)
sc.pp.pca(adata_oprescu, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_oprescu, random_state=seed, knn=len(adata_oprescu) ** 0.5 // 2, metric='cosine')

In [None]:
sc.tl.umap(adata_oprescu, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_oprescu, resolution=1, random_state=seed)
sc.pl.umap(adata_oprescu, color=['leiden', 'batch', 'n_counts'], legend_loc='on data')

## Oprescu Day 0

In [None]:
sc.pp.filter_cells(adata_oprescu_d0, min_genes=200)

In [None]:
sc.pp.filter_genes(adata_oprescu_d0, min_counts=1)
sc.pp.log1p(adata_oprescu_d0)
sc.pp.normalize_per_cell(adata_oprescu_d0)

In [None]:
tk.tl.triku(adata_oprescu_d0, n_procs=1, random_state=seed)
sc.pp.pca(adata_oprescu_d0, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_oprescu_d0, random_state=seed, knn=len(adata_oprescu_d0) ** 0.5 // 2, metric='cosine')

In [None]:
sc.tl.umap(adata_oprescu_d0, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_oprescu_d0, resolution=1.8, random_state=seed)
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'n_counts'], legend_loc='on data')

In [None]:
adata_oprescu_d0

In [None]:
sc.tl.rank_genes_groups(adata_oprescu_d0, groupby='leiden', groups=['24'], reference='rest')
sc.pl.rank_genes_groups_tracksplot(adata_oprescu_d0, dendrogram=False, n_genes=100)

### Subpopulation characterization (based on paper markers)

In the paper they describe the following set of cells:
* APCs: H2-Eb1, H2-Ab1
* M1 MACs: Fabp5, Pf4
* M2 MACs: Cxcl3, Ccl6 (lower in neutrophils and M1)
* EC: Pecam1, Kdr
* FAPs: Ly6a, Pdgfra
* Fibroblasts: Col3a1, Col6a1
* Monocytes: Csf1r, Adgre1 (similar to myeloid)
* MuSC: Pax7, Myod1
* Myeloid: Clec12a, Acp5 (Mixture of APC, M1, M2, Monocytes and ICs)
* Neutrophils: S100a8, S100a9 
* Pericytes: Rgs5, Notch3
* Proliferative ICs: Mik67, Top2a
* Myonuclei: Tnnc2, Myh4
* T-cells: Ccr7
* Tenocytes: Scx, Tnmd

We are going to use these markers to characterize the populations as they do, and then we will "redo" the analysis.

In [None]:
adata_oprescu_d0.obs['cell_type'] = 'Other'

In [None]:
df = adata_oprescu_d0.obs['cell_type'].copy()

In [None]:
# APCS
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'H2-Eb1', 'H2-Ab1'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['26'])] = 'APC'

In [None]:
# M1
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Fabp5', 'Pf4'], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# M2
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Cxcl3', 'Ccl6'], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# EC
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Pecam1', 'Kdr'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['19'])] = 'Endothelial'

In [None]:
# FAPs
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Ly6a', 'Pdgfra', 'Lum'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['23', '0', '5', '3', '15', '4', '7', '9', '14', '8', '10', '11', '12', '30', '27'])] = 'FAP'

In [None]:
# Fibros
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Col3a1', 'Col6a1'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['24'])] = 'Fibroblast'

In [None]:
# Monocytes
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Csf1r', 'Adgre1'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['25'])] = 'Monocyte'

In [None]:
# MuSC
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Pax7', 'Myod1'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['32', '16'])] = 'MuSC'

In [None]:
# Myeloid
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Clec12a', 'Acp5'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['21'])] = 'Myeloid'

In [None]:
# Neutrophils
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'S100a8', 'S100a9'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['20', '31'])] = 'Neutrophil'

In [None]:
# Pericytes
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Rgs5', 'Notch3', 'Myl9'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['29'])] = 'Pericyte'

In [None]:
# Proliferative ICs
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Mki67', 'Top2a'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['22', '26'])] = 'APC / Proliferative ICs'

In [None]:
# Myonuclei
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Tnnc2', 'Myh4'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['1', '2', '6', '13', '17', '28'])] = 'Myonuclei'

In [None]:
# T-cells
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Ccr7'], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# Tenocytes
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Scx', 'Tnmd', 'Mkx', 'Col12a1'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_oprescu_d0.obs['leiden'].isin(['18'])] = 'Tenocyte'

In [None]:
adata_oprescu_d0.obs['cell_type'] = df

In [None]:
adata_oprescu_d0.uns['cell_type_colors'] = ['#87ceeb', '#c70039', '#8b008b', '#663399', '#1e90ff', '#006400', '#4169e1', '#2e8b57', 
                                            '#6495ed', '#8B0000', '#ff7f50']

The analysis of populations is quite poor. Populations 22 26 25 31 20 21 are not entirely wel characterized. FAP/Fibro and Myonuclei populations have to be further characterized. We will get DEGs of some of the populations to see if we can further narrow down each of the populations. We will also apply, for the fibroblasts, the Axis/Cluster genes previously obtained to map them to previous populations.

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Dpp4', 'Sfrp2', 'Pi16', 'Wisp2', 'Slpi', 'Col18a1', 'Apcdd1', 'Comp'], 
           legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['Apoe', 'Ccl2', 'Itm2a', 'Myc', 'Gpc3', 'Sod2', 'Igfbp3'], 
           legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['Col11a1', 'Postn', 'Crabp1', 'Coch', 'Apod', 'Sfrp4'], 
           legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# Schwann + Endo + Peri Markers
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Sox10', 'S100b', 'Mpz', 'Ptn', 'Scx', 'Rgs5', 'Myl9', 'Vwf', 'Pecam1'], 
           legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.tl.rank_genes_groups(adata_oprescu_d0, groupby='leiden', groups=['19'], reference='rest', method='wilcoxon')
sc.pl.rank_genes_groups_tracksplot(adata_oprescu_d0, dendrogram=False, n_genes=100)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['leiden'
                                   ], 
           legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Cd34', 'Pdgfrb', 'Ngfr', 'Sox10', 'S100b', 'Mcam', 'Mpz', 'Mbp'
                                   , 'Egr2', 'Pdgfra', 'Rgs5', 'Glb1'], 
           legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=, 
           legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.tl.rank_genes_groups(adata_oprescu_d0, groupby='leiden', groups=['29'], reference='rest')
sc.pl.rank_genes_groups(adata_oprescu_d0)

### Analysis of FAPs

In [None]:
# We will include populations 23,  27 and 30, although they might not be FAPs in reality.  Population 29 will be kept out because it
# explodes de UMAP

adata_oprescu_d0_FAPs = adata_oprescu_d0[adata_oprescu_d0.obs['cell_type'].isin(['FAP'])]

In [None]:
sc.pp.filter_genes(adata_oprescu_d0_FAPs, min_cells=1)
tk.tl.triku(adata_oprescu_d0_FAPs, n_procs=1, random_state=seed)
sc.pp.pca(adata_oprescu_d0_FAPs, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_oprescu_d0_FAPs, random_state=seed, knn=len(adata_oprescu_d0) ** 0.5 // 2, metric='cosine')

In [None]:
# del adata_oprescu_d0_FAPs.uns['leiden_colors']
sc.tl.umap(adata_oprescu_d0_FAPs, min_dist=0.05, random_state=seed)
sc.tl.leiden(adata_oprescu_d0_FAPs, resolution=0.4, random_state=seed)
sc.pl.umap(adata_oprescu_d0_FAPs, color=['leiden', 'n_counts'], legend_loc='on data')

In [None]:
adata_oprescu_d0.obs['leiden_FAP'] = '-'
adata_oprescu_d0.obs['leiden_FAP'] = adata_oprescu_d0_FAPs.obs['leiden']
adata_oprescu_d0.obs['leiden_FAP'] = adata_oprescu_d0.obs['leiden_FAP'].astype(str)

sc.pl.umap(adata_oprescu_d0, color=['leiden', 'leiden_FAP'], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.tl.rank_genes_groups(adata_oprescu_d0_FAPs, groupby='leiden', method='wilcoxon', n_genes=150)
sc.pl.rank_genes_groups_tracksplot(adata_oprescu_d0_FAPs, n_genes=150, dendrogram=False)

In [None]:
# 0
genes_0 = ['Mme', 'Fbln7', 'Crlf1', 'Nmb', 'Gfra1', 'Col6a6', 'Sept4', 'P2ry1', 
           'Fscn1', 'Rgs10', 'Tspan9', 'Hlf', 'Cldn15', 'Spry4']

sc.pl.umap(adata_oprescu_d0, color=['leiden'] + genes_0, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 1
genes_1 = ['Fn1', 'Anxa3', 'Dpp4', 'Sema3c', 'Efhd1', 'Stmn4', 'Procr', 'Limch1', 'Has1', 
           'Cmah', 'Il18', 'Nhsl1', 'Car8', 'Fez1', 'Mgll', 'Lurap1l', 'Aif1l', 'Sytl2', 'Rorb', 
           'Uchl1', 'Fam167a', 'Wnt2']

sc.pl.umap(adata_oprescu_d0, color=['leiden'] + genes_1, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 2
genes_2 = ['Cfh', 'Prg4', 'Gdf10', 'Fmo2', 'Fbln1', 'Cilp', 'Tmem176b', 'Icam1', 'Clu', 
           'Fxyd6', 'Hmcn1', 'Cdh11', 'C2', 'Tmem176a', 'Arhgdib', 'Klf5']

sc.pl.umap(adata_oprescu_d0, color=['leiden'] + genes_2, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 3
genes_3 = ['Sox9', 'Col9a2', 'Shisa3', 'Gfra2', 'Mpzl1', 'Dlk1', 'Sipa1l1', 'Trpm6', 
           'Rgs17', 'Cdh19', 'Nkd1', 'Plxdc1', 'Saa1', 'Cd300lg', 'Gpld1', 'Sphkap', 
           'Ngfr', 'S100b', 'Nipal1']

sc.pl.umap(adata_oprescu_d0, color=['leiden'] + genes_3, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 4
genes_4 = ['Ckm', 'Pgam2', 'Eno3', 'Tcap', 'Atp2a1', 'Pvalb', 'Cox6a2', 'Myoz1', 'Car3', 'Actn3', 
           'Neb', 'Ankrd23', 'Myh4', 'Mybpc2', 'Ldb3', 'Tmod4', 'Ttn', 'Ak1', 'Cox7a1', 'Sh3bgr', 
           'Myot', 'Adssl1', 'Mylk4', 'Eef1a2', 'Apobec2', 'Txlnb', 'Nrap', 'Jsrp1', 
           'Trim54', 'Trdn', 'Fabp3', 'Ckmt2', 'Mlf1', 'Asb2', 'Atcayos', 'Casq1', 'Pdlim3', 
           'Jph1', 'Fitm1', 'Mylk2', 'Mettl21c']

sc.pl.umap(adata_oprescu_d0, color=['leiden'] + genes_4, legend_loc='on data', ncols=3, cmap=magma)

### Analysis of Myonuclei

In [None]:
adata_oprescu_d0_Myonuclei = adata_oprescu_d0[adata_oprescu_d0.obs['cell_type'].isin(['Myonuclei'])]

In [None]:
sc.pp.filter_genes(adata_oprescu_d0_Myonuclei, min_cells=1)
tk.tl.triku(adata_oprescu_d0_Myonuclei, n_procs=1, random_state=seed)
sc.pp.pca(adata_oprescu_d0_Myonuclei, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_oprescu_d0_Myonuclei, random_state=seed, knn=len(adata_oprescu_d0) ** 0.5 // 2, metric='cosine')

In [None]:
sc.tl.umap(adata_oprescu_d0_Myonuclei, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_oprescu_d0_Myonuclei, resolution=0.2, random_state=seed)
sc.pl.umap(adata_oprescu_d0_Myonuclei, color=['leiden', 'n_counts'], legend_loc='on data')

In [None]:
adata_oprescu_d0.obs['leiden_Myonuclei'] = '-'
adata_oprescu_d0.obs['leiden_Myonuclei'] = adata_oprescu_d0_Myonuclei.obs['leiden']
adata_oprescu_d0.obs['leiden_Myonuclei'] = adata_oprescu_d0.obs['leiden_Myonuclei'].astype(str)

sc.pl.umap(adata_oprescu_d0, color=['leiden', 'leiden_Myonuclei'], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.tl.rank_genes_groups(adata_oprescu_d0_Myonuclei, groupby='leiden', method='wilcoxon')
sc.pl.rank_genes_groups_tracksplot(adata_oprescu_d0_Myonuclei, n_genes=100, dendrogram=False)

In [None]:
# 0 + 1
genes_0_1 = ['Ankrd2', 'Lmod2', 'Pdk4', 'Perm1', 'Ptpn3', 'Csrp3', 'Smtnl1', 
           'Mtfp1', 'Gm15543',]

sc.pl.umap(adata_oprescu_d0, color=['leiden_Myonuclei'] + genes_0_1, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 2 
genes_2 = ['Mpeg1', 'Lcp1', 'Plek', 'H2-Eb1', 'Ucp2', 'Bst2', 'Il1b', 'Cotl1', 'C1qc', 'Cd68', 
           'Ms4a7', 'Wfdc17']

sc.pl.umap(adata_oprescu_d0, color=['leiden_Myonuclei'] + genes_2, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 2 + 3 + 4
genes_2_3_4 = ['Fcer1g', 'Ctss', 'Tyrobp', 'Lgals3', 'Arpc1b', 'Btg1', 'Cyba', 'H2-K1', 'Nfkbia', ]

sc.pl.umap(adata_oprescu_d0, color=['leiden_Myonuclei'] + genes_2_3_4, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 2 + 4
genes_2_4 = ['Slfn2', 'C1qa', 'Msn', 'Spp1', 'S100a4', 'Laptm5', 'Tagln2', 'Tgfbi', 'Basp1',
             'Tmbim6']

sc.pl.umap(adata_oprescu_d0, color=['leiden_Myonuclei'] + genes_2_4, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 5
genes_5 = ['Serping1', 'Col4a1', 'Col6a2', 'Mmp2', 'Pcolce', 'Tnxb', 'Plxdc2', 'Fbln2', 'Col4a2', 'Rhoj', 
           'Clec3b', 'Sptbn1', 'Lhfp', 'Ltbp4', 'Ebf1', 'Rarres2', 'Itih5', 'Lamc1', 'Plpp3', 'Tuba1a', 
           'Col5a3', 'Selm', 'Pmp22', 'Nid1', 'Ccl11', 'Col6a3', 'Cd34', 'Cald1', 'Igfbp6', 'Camk2n1', 
           'Cd302', 'Adamts5', 'Abca8a', 'Col15a1', 'Dpt', 'Cpq', 'Entpd2', 'Smoc2', 'Htra3', 'Cygb']

sc.pl.umap(adata_oprescu_d0, color=['leiden_Myonuclei'] + genes_5, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['leiden'], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# I had doubts about the cluster 28, because iot expressed FAP markers. But if we extract the DEGs directly from the cluster 28 they appear as similar

sc.tl.rank_genes_groups(adata_oprescu_d0, groupby='leiden', method='wilcoxon', n_genes=200, groups=['28'], 
                        reference='rest')

sc.pl.rank_genes_groups(adata_oprescu_d0)

sc.pl.umap(adata_oprescu_d0, color=['leiden_Myonuclei'] + ['Cxcl14', 'Col15a1', 'F3', 'Myh4', 'Eno3'], legend_loc='on data', ncols=3, cmap=magma)

### Analysis of dispersed clusters

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['leiden'], legend_loc='on data')

In [None]:
adata_oprescu_d0_dispersed = adata_oprescu_d0[adata_oprescu_d0.obs['leiden'].isin(
    ['19', '22', '26', '25', '31', '20', '21', '32', '16'])]

In [None]:
sc.pp.filter_genes(adata_oprescu_d0_dispersed, min_cells=1)
tk.tl.triku(adata_oprescu_d0_dispersed, n_procs=1, random_state=seed)
sc.pp.pca(adata_oprescu_d0_dispersed, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_oprescu_d0_dispersed, random_state=seed, knn=len(adata_oprescu_d0) ** 0.5 // 2, metric='cosine')

In [None]:
del adata_oprescu_d0_dispersed.uns['leiden_colors']
sc.tl.umap(adata_oprescu_d0_dispersed, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_oprescu_d0_dispersed, resolution=0.2, random_state=seed)
sc.pl.umap(adata_oprescu_d0_dispersed, color=['leiden', 'n_counts'], legend_loc='on data')

In [None]:
adata_oprescu_d0.obs['leiden_dispersed'] = '-'
adata_oprescu_d0.obs['leiden_dispersed'] = adata_oprescu_d0_dispersed.obs['leiden']
adata_oprescu_d0.obs['leiden_dispersed'] = adata_oprescu_d0.obs['leiden_dispersed'].astype(str)

sc.pl.umap(adata_oprescu_d0, color=['leiden', 'leiden_dispersed'], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.tl.rank_genes_groups(adata_oprescu_d0_dispersed, groupby='leiden', method='wilcoxon')
sc.pl.rank_genes_groups_tracksplot(adata_oprescu_d0_dispersed, n_genes=100, dendrogram=False)

In [None]:
# 0  
genes_0 = ['Col1a2', 'Col1a1', 'Dcn', 'Ebf1', 'Nid1', 'Igfbp6', 'Smoc2', 'Rnase4', 'Clec3b', 'Serpinf1', 
           'Pcolce', 'Col6a3', 'Col6a2', 'Lum', 'Fbn1', 'Mfap5', 'Plpp3', 'Mgp', 'Col5a3', 'Timp3', 'Apod',
           'Aspn', 'Ccl11', 'Cxcl14', 'Adamts5', 'C1s1', 'Vcan', 'Dpep1', 'Ly6c1', 'Prss23', 'Abi3bp', 'Fndc1']

sc.pl.umap(adata_oprescu_d0, color=['leiden_dispersed'] + genes_0, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 1  Satellitle / MuscSC
genes_1 = ['Chodl', 'Pdlim4', 'Peg3', 'Pax7', 'Crlf1', 'Gpx3', 'Dag1', 'Cnn3', 'Des', 'Fxyd6', 'Sdpr', 
           'Fgfr4', 'Tln2', 'Map1b', 'Clmn', 'Vcam1', 'Cdh15', 'Edn3', 'Nppc', 'Myf5', 'Ncam1', 'Maff', 
           'Sema6a', 'Rapsn', 'Pde10a', 'Sytl2', 'Jsrp1', 'Asb5', 'Dmd', 'Pxdc1', 'Heyl', 'Cfl2', 'Prox1', 
           'Ank1', 'Myod1', 'Flnc', 'Gxylt2', 'Ank3', 'Bmp4', 'Tnik', 'Chrdl2', 'Slc10a6', 'Palmd', 'Ptprz1']

sc.pl.umap(adata_oprescu_d0, color=['leiden_dispersed'] + genes_1, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 2 Monocyte?
genes_2 = ['Cd14', 'Cd74', 'H2-Eb1', 'H2-Aa', 'H2-Ab1', 'H2-DMa', 'H2-DMb1', 'Naaa', 'Plekho1', 'Clec4b1', 
           'Eps8', 'Mgl2', 'Shtn1', 'Tep1', 'Fgd2', 'Pepd', 'Mrc1', 'Slamf9', 'Fcrls', 'March1', 'Batf3', 
           'Slamf7', 'Tnip3', 'Cbfa2t3', 'Clec10a', 'Tbxas1', 'Sdc3', 'Retnla', 'P2ry6']

sc.pl.umap(adata_oprescu_d0, color=['leiden_dispersed'] + genes_2, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 3 Neutrophil
genes_3 = ['S100a9', 'Msrb1', 'S100a8', 'Csf3r', 'Il1b', 'Clec4d', 'Lst1', 'Slpi', 'Cd9', 'Litaf', 'Ccl6', 
           'Gmfg', 'Il1r2', 'Slc16a3', 'Ccr1', 'Clec4e', 'Spi1', 'Pglyrp1', 'Sorl1', 'Hp', 'Mxd1', 'Grina', 
           'Gsr', 'Trem1', 'C5ar1', 'Ncf2', 'Slc7a11', 'Retnlg', 'Mmp9', 'Entpd1', 'Cxcr4', 'Lmnb1', 'Cxcr2']

sc.pl.umap(adata_oprescu_d0, color=['leiden_dispersed'] + genes_3, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 4  T-cells
genes_4 = ['Il7r', 'Ets1', 'H2-Q7', 'Itk', 'Ctla2a', 'Skap1', 'Ptprcap', 'Saraf', 'Cd69', 'Gata3', 'Cd3g', 
           'Fkbp3', 'Arg1', 'Sept1', 'Tnfrsf18', 'Rora', 'Uhrf2', 'Tmem64', 'Inpp4b', 'Bcl11b']

sc.pl.umap(adata_oprescu_d0, color=['leiden_dispersed'] + genes_4, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 5 Immune
genes_5 = ['Plac8', 'Pou2f2', 'Gngt2', 'Napsa', 'Ifitm6', 'Clec4a3', 'Clec4a1', 'Klra2', 'Ms4a4c', 'Ceacam1']

sc.pl.umap(adata_oprescu_d0, color=['leiden_dispersed'] + genes_5, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 6  Mast cell-related
genes_6 = ['Cpa3', 'Serpinb1a', 'Mcpt4', 'Cma1', 'Cyp11a1', 'Ccl2', 'Ccl7', 'Gata2', 'Kit', 'Lat2',
           'Mrgprb1']

sc.pl.umap(adata_oprescu_d0, color=['leiden_dispersed'] + genes_6, legend_loc='on data', ncols=3, cmap=magma)

In [None]:
# 7  NK cells 
genes_7 = ['Nkg7', 'Ccl5', 'Klrk1', 'Txk', 'Ms4a4b', 'Klre1', 'Klrc2', 'Ctsw', 'Klrd1', 'Gzma', 'Il2rb', 
           'Klrb1c', 'Ncr1', 'Fasl', 'Serpinb6b', 'Gimap5', 'Gimap9', 'Xcl1', 'Ctla2b', 'Gimap7', 'Gzmb', 
           'Serpinb9', 'Eomes', 'Prf1', 'Ifng', 'Cst7', 'Arap2', 'Klra7', 'Klra1', 'Car2']

sc.pl.umap(adata_oprescu_d0, color=['leiden_dispersed'] + genes_7, legend_loc='on data', ncols=3, cmap=magma)

### Focusing on the possible kranocyte clusters

In [None]:
adata_oprescu_d0_putative = adata_oprescu_d0[adata_oprescu_d0.obs['leiden'].isin(['23', '24', '29', '19'])]

In [None]:
sc.pp.filter_genes(adata_oprescu_d0_putative, min_cells=1)
tk.tl.triku(adata_oprescu_d0_putative, n_procs=1, random_state=seed)
sc.pp.pca(adata_oprescu_d0_putative, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_oprescu_d0_putative, random_state=seed, knn=len(adata_oprescu_d0) ** 0.5 // 2, metric='cosine')

In [None]:
sc.tl.umap(adata_oprescu_d0_putative, min_dist=0.2, random_state=seed)
sc.tl.leiden(adata_oprescu_d0_putative, resolution=1, random_state=seed)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden', 'n_counts'], legend_loc='on data')

In [None]:
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden', 'Cd34', 'S100b', 'Sox10'], 
           legend_loc='on data', cmap=magma, ncols=2)

In [None]:
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden', 'S100b', 'Mpz', 'Mbp', 
                                             'Sox10', 'Ngfr', 'Egr2', 'Cd34', 'Pdgfrb', 'Mcam', 'Pdgfra', 
                                             'Glb1', 'Rgs5', 'Nipal1', 'Trpm6'], 
           legend_loc='on data', cmap=magma)

In [None]:
adata_oprescu_d0.obs['leiden_putative'].values[0]

In [None]:
adata_oprescu_d0.obs['leiden_putative']

In [None]:
adata_oprescu_d0.obs['leiden_putative'] = '-'
adata_oprescu_d0.obs['leiden_putative'] = adata_oprescu_d0_putative.obs['leiden']
adata_oprescu_d0.obs['leiden_putative'] = adata_oprescu_d0.obs['leiden_putative'].astype(str)
adata_oprescu_d0.obs['leiden_putative'] = ['-1' if i == 'nan' else i for i in adata_oprescu_d0.obs['leiden_putative']]

sc.pl.umap(adata_oprescu_d0, color=['leiden', 'leiden_putative'],  ncols=2, cmap=magma, legend_loc='on data')
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'], legend_loc='on data', cmap=magma)

In [None]:
sc.tl.rank_genes_groups(adata_oprescu_d0, n_genes=150, groupby='leiden_putative')
sc.pl.rank_genes_groups_tracksplot(adata_oprescu_d0, n_genes=50, dendrogram=False)

In [None]:
# 0 + 3 (+1)
genes = ['Adamts5','Angptl1','Bmp4','Cmklr1','Col6a6','Cygb','Dpt','Fbln7','Gpm6b','Gstm1','Hmcn2',
'Hmcn2','Htra3','Lbp','Loxl2','Mme','Nid2','Nmb','Nrep','Pdgfrl','Plau','Prss23','Tnxb',]

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma, 
           save='0+3+_1_all.png', show=False)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma, 
           save='0+3+_1_zoom.png', show=False)

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma)

In [None]:
Untitled 2.pdf# 0 (+3) + 1
genes = ['Aldh1a1','Ank2','Ccl11','Cdk8','Col15a1','Col1a1','Col6a3','Col8a1','Crispld2',
'Cxcl14','Dpep1','Dpysl3','Fkbp10','Hspg2','Itih5','Lamc1','Lhfp','Medag',
'Ndn','Pcdh7','Pdgfra','Pdlim4','Podn','Smoc2','Spon1','Tmed1','Tmem45a',]

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma, 
           save='0+_3+1_all.png', show=False)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma, 
           save='0+_3+1_zoom.png', show=False)

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma)

In [None]:
# 1
genes = [
'6030408B16Rik','Adamtsl2','Bmp7','Capn1','Capn6','Cd300lg','Col18a1','Col9a2','Cpxm2',
'Crispld1','Cst6','Dlk1','Fgfrl1','Gfra2','Gpld1','Inhba','Matn2','Mdk',
'Nipal1','Nkd1','Rasgrp2','Rgs17','Saa1','Scml4','Sfrp1','Shisa3',
'Sipa1l1','Sorcs2','Sorl1','Sox9','Sphkap','Srpx','Thrsp','Trf',]

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma, 
           save='1_all.png', show=False)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma, 
           save='1_zoom.png', show=False)

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma)

In [None]:
# 2+4
genes = ['Adgre1', 'Aif1', 'Aprt', 'Arl4c', 'Bach1', 'Btg1', 'C1qb', 'Ccl3', 'Ccl4', 'Ccl6', 'Ccl9', 
'Ccrl2', 'Cd53', 'Cd74', 'Cd83', 'Coro1a', 'Ctsc', 'Ctss', 'Fabp5', 'Fcer1g', 'H2-Aa', 'H2-Ab1', 
'Id2', 'Lgals3', 'Ly86', 'Lyz2', 'Postn', 'Ptgs2', 'Rel', 'Rhog', 'Rplp1', 'Rps11', 'Rps14', 'Rps24', 
'Sirpa', 'Srgn', 'Tgfb1', 'Tyrobp', 'Ucp2', ]

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma, 
           save='2+4_all.png', show=False)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma, 
           save='2+4_zoom.png', show=False)

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma)

In [None]:
# 5
genes = ['Adgrf5', 'Bcam', 'Cd200', 'Cd93', 'Cdh5', 'Ceacam1', 'Cldn5', 'Clu', 
'Cracr2b', 'Cyyr1', 'Ecscr', 'Efna1', 'Egfl7', 'F11r', 'Fabp4', 'Fam101b', 'Flt1', 'Ggta1', 
'Gper1', 'Icam2', 'Kank3', 'Ldb2', 'Mecom', 'Mmrn2', 'Myct1', 'Pdgfb', 'Pecam1', 'Podxl', 
'Ptprb', 'Rasip1', 'S1pr1', 'Sema3g', 'She', 'Sox17', 'Sox18', 'Stmn2', 'Tie1', 'Tinagl1', 'Tspan13', 'Tspan7', 
'Vegfc', ]

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma, 
           save='5_all.png', show=False)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma, 
           save='5_zoom.png', show=False)

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma)

In [None]:
# 7
genes = ['Abcc9', 'Adcy6', 'Casq2', 'Cox4i2', 'Cxcl12','Gucy1a3', 'Hopx', 'Kcnj8', 
'Myh11', 'Myl9', 'Myo1b', 'Ndufa4l2', 'Notch3', 'Pdgfrb', 'Rasl11a', 'Rgs4', 'Rgs5', 'Tbc1d1', ]

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma, 
           save='7_all.png', show=False)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma, 
           save='7_zoom.png', show=False)

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma)

In [None]:
# 8
genes = ['Aatk','Arhgap19','Bcas1','Cadm4','Cldn19','Cmtm5','Ddn','Drp2','Dusp15','Elovl7',
'Ephb6','Fa2h','Fxyd3','Gjc3','Kcna1','Mag','Mal','Mboat1','Mbp','Mpz',
'Mt3','Otop1','Pllp','Plp1','Pou3f1','Prx','Ptprf','S100b','Sema3b','Slc36a2','Smco3','Snca',
'Sostdc1','Sox10','Stmn1','Tspan15','Ugt8a','Vat1l',]

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma, 
           save='8_all.png', show=False)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma, 
           save='8_zoom.png', show=False)

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma)

In [None]:
# 9
genes = ['Adgrg3', 'Ccl21a', 'Fgd6', 'Flt4', 'Gm21541', 'Il7', 'Klhl4', 
'Lyve1', 'Mmrn1', 'Mrc1', 'Pard6g', 'Slc45a3', 'Tspan18']

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma, 
           save='9_all.png', show=False)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma, 
           save='9_zoom.png', show=False)

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma)

In [None]:
# Pôssible lymph node (cluster 9)
genes = ['Cldn5', 'Vwf', 'Lyve1']

sc.pl.umap(adata_oprescu_d0, color=['leiden_putative'] + genes, cmap=magma)
sc.pl.umap(adata_oprescu_d0_putative, color=['leiden'] + genes, legend_loc='on data', cmap=magma)

### Filtering kranocyte genes

In [None]:
sc.tl.rank_genes_groups(adata_oprescu_d0, groupby='leiden', n_genes=1500)

We are interested in finding genes specific of cluster 23. To do that we are going to filter out genes with expression in the rest of clusters similar to it (0, 3, 5, 15, 4, 24).

In [None]:
genes_pos, ratio = [], []
for gene in tqdm(adata_oprescu_d0.uns['rank_genes_groups']['names']['23']):
    list_means = []
    for group in ['0', '3', '4', '5', '15', '24']:
        list_means.append(np.mean(adata_oprescu_d0.X[adata_oprescu_d0.obs['leiden'].isin(
        [group]), adata_oprescu_d0.var_names == gene].ravel()))
    
    r = np.mean(adata_oprescu_d0.X[adata_oprescu_d0.obs['leiden'].isin(['23']), 
                   adata_oprescu_d0.var_names == gene].ravel()) / max(list_means)
    
    ratio.append(r)
    genes_pos.append(gene)

In [None]:
selected = np.array(ratio) > 10**0.6

In [None]:
plt.scatter(np.random.random(len(ratio)), np.log10(ratio), color=['#007ab7' if i else '#ababab' for i in selected])

In [None]:
selected_genes_krano_oprescu = np.array(genes_pos)[np.array(selected).astype(bool)]
np.array(sorted(selected_genes_krano_oprescu))

In [None]:
sc.pl.umap(adata_oprescu_d0, color=selected_genes_krano_oprescu, legend_loc='on data', cmap=magma, ncols=2)

### Selecting putative clusters at injury states

In [None]:
for adata in [adata_oprescu_d05, adata_oprescu_d2, adata_oprescu_d35, adata_oprescu_d5, adata_oprescu_d10,
              adata_oprescu_d21]:
    sc.pp.filter_cells(adata, min_genes=200)

In [None]:
for adata in [adata_oprescu_d05, adata_oprescu_d2, adata_oprescu_d35, adata_oprescu_d5, adata_oprescu_d10,
              adata_oprescu_d21]:
    sc.pp.filter_genes(adata, min_counts=1)
    sc.pp.log1p(adata)
    sc.pp.normalize_per_cell(adata)

In [None]:
for adata in [adata_oprescu_d05, adata_oprescu_d2, adata_oprescu_d35, adata_oprescu_d5, adata_oprescu_d10,
              adata_oprescu_d21]:
    tk.tl.triku(adata, n_procs=1, random_state=seed)
    sc.pp.pca(adata, random_state=seed, n_comps=30)
    sc.pp.neighbors(adata, random_state=seed, knn=len(adata) ** 0.5 // 2, metric='cosine')

In [None]:
sc.tl.umap(adata_oprescu_d05, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_oprescu_d05, resolution=1.8, random_state=seed)
sc.pl.umap(adata_oprescu_d05, color=['leiden', 'n_counts'], legend_loc='on data')

In [None]:
sc.tl.umap(adata_oprescu_d2, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_oprescu_d2, resolution=1.8, random_state=seed)
sc.pl.umap(adata_oprescu_d2, color=['leiden', 'n_counts'], legend_loc='on data')

In [None]:
sc.tl.umap(adata_oprescu_d35, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_oprescu_d35, resolution=1.8, random_state=seed)
sc.pl.umap(adata_oprescu_d35, color=['leiden', 'n_counts'], legend_loc='on data')

In [None]:
sc.tl.umap(adata_oprescu_d5, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_oprescu_d5, resolution=1.8, random_state=seed)
sc.pl.umap(adata_oprescu_d5, color=['leiden', 'n_counts'], legend_loc='on data')

In [None]:
sc.tl.umap(adata_oprescu_d10, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_oprescu_d10, resolution=1.8, random_state=seed)
sc.pl.umap(adata_oprescu_d10, color=['leiden', 'n_counts'], legend_loc='on data')

In [None]:
sc.tl.umap(adata_oprescu_d21, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_oprescu_d21, resolution=1.8, random_state=seed)
sc.pl.umap(adata_oprescu_d21, color=['leiden', 'n_counts'], legend_loc='on data')

#### Determining the dynamics of kranocytes / tenocytes during healing

From literature it is known that kranocytes express Tnc during 0.5 and 2 DPI. At day 0, Tnc is a marker of tenocytes. Therefore, we are going to map Tnc+ clusters alongside putative kranocyte clusters to see the evolution of the cluster expression.

In [None]:
teno_markers = ['Tnc', 'Tnmd', 'Scx', 'Mkx', 'Col12a1', 'Fmod', 'Comp']
krano_markers_d0 = ['Shisa3', 'Saa1', '6030408B16Rik', 'Sox9', 'Gpld1', 'Dlk1', 'Nipal1']

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['leiden'] + teno_markers, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_oprescu_d0, color=['leiden'] + krano_markers_d0, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
sc.pl.umap(adata_oprescu_d05, color=['leiden'] + teno_markers, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_oprescu_d05, color=['leiden'] + krano_markers_d0, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
sc.pl.umap(adata_oprescu_d2, color=['leiden'] + teno_markers, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_oprescu_d2, color=['leiden'] + krano_markers_d0, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
sc.pl.umap(adata_oprescu_d35, color=['leiden'] + teno_markers, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_oprescu_d35, color=['leiden'] + krano_markers_d0, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
sc.pl.umap(adata_oprescu_d5, color=['leiden'] + teno_markers, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_oprescu_d5, color=['leiden'] + krano_markers_d0, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
sc.pl.umap(adata_oprescu_d10, color=['leiden'] + teno_markers, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_oprescu_d10, color=['leiden'] + krano_markers_d0, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
sc.pl.umap(adata_oprescu_d21, color=['leiden'] + teno_markers, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_oprescu_d21, color=['leiden'] + krano_markers_d0, legend_loc='on data', cmap=magma, ncols=3)

# Scott 2019

In [None]:
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976778/suppl/GSM2976778%5Fqsnt%5Fbarcodes%2Etsv%2Egz -P {data_dir}/scott
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976778/suppl/GSM2976778%5Fqsnt%5Fgenes%2Etsv%2Egz -P {data_dir}/scott
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976778/suppl/GSM2976778%5Fqsnt%5Fmatrix%2Emtx%2Egz -P {data_dir}/scott

In [None]:
adata_scott_d0 = sc.read_mtx(data_dir+'scott/GSM2976778_qsnt_matrix.mtx.gz').transpose()
barcodes = pd.read_csv(data_dir+'scott/GSM2976778_qsnt_barcodes.tsv.gz', sep='\t', header=None)[0].values
features = pd.read_csv(data_dir+'scott/GSM2976778_qsnt_genes.tsv.gz', sep='\t', header=None)[1].values

adata_scott_d0.var_names = features
adata_scott_d0.obs_names = barcodes

In [None]:
adata_scott_d0.var_names_make_unique()

In [None]:
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976779/suppl/GSM2976779%5Fd1%5Fbarcodes%2Etsv%2Egz -P {data_dir}/scott
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976779/suppl/GSM2976779%5Fd1%5Fgenes%2Etsv%2Egz -P {data_dir}/scott
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976779/suppl/GSM2976779%5Fd1%5Fmatrix%2Emtx%2Egz -P {data_dir}/scott

In [None]:
adata_scott_d1 = sc.read_mtx(data_dir+'scott/GSM2976779_d1_matrix.mtx.gz').transpose()
barcodes = pd.read_csv(data_dir+'scott/GSM2976779_d1_barcodes.tsv.gz', sep='\t', header=None)[0].values
features = pd.read_csv(data_dir+'scott/GSM2976779_d1_genes.tsv.gz', sep='\t', header=None)[1].values

adata_scott_d1.var_names = features
adata_scott_d1.obs_names = barcodes

In [None]:
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976780/suppl/GSM2976780%5Fd2%5Fbarcodes%2Etsv%2Egz -P {data_dir}/scott
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976780/suppl/GSM2976780%5Fd2%5Fgenes%2Etsv%2Egz -P {data_dir}/scott
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976780/suppl/GSM2976780%5Fd2%5Fmatrix%2Emtx%2Egz -P {data_dir}/scott

In [None]:
adata_scott_d2 = sc.read_mtx(data_dir+'scott/GSM2976780_d2_matrix.mtx.gz').transpose()
barcodes = pd.read_csv(data_dir+'scott/GSM2976780_d2_barcodes.tsv.gz', sep='\t', header=None)[0].values
features = pd.read_csv(data_dir+'scott/GSM2976780_d2_genes.tsv.gz', sep='\t', header=None)[1].values

adata_scott_d2.var_names = features
adata_scott_d2.obs_names = barcodes

In [None]:
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976781/suppl/GSM2976781%5Fd4%5Fbarcodes%2Etsv%2Egz -P {data_dir}/scott
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976781/suppl/GSM2976781%5Fd4%5Fgenes%2Etsv%2Egz -P {data_dir}/scott
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976781/suppl/GSM2976781%5Fd4%5Fmatrix%2Emtx%2Egz -P {data_dir}/scott

In [None]:
adata_scott_d4 = sc.read_mtx(data_dir+'scott/GSM2976781_d4_matrix.mtx.gz').transpose()
barcodes = pd.read_csv(data_dir+'scott/GSM2976781_d4_barcodes.tsv.gz', sep='\t', header=None)[0].values
features = pd.read_csv(data_dir+'scott/GSM2976781_d4_genes.tsv.gz', sep='\t', header=None)[1].values

adata_scott_d4.var_names = features
adata_scott_d4.obs_names = barcodes

In [None]:
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976782/suppl/GSM2976782%5Fd14%5Fbarcodes%2Etsv%2Egz -P {data_dir}/scott
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976782/suppl/GSM2976782%5Fd14%5Fgenes%2Etsv%2Egz -P {data_dir}/scott
!wget https://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2976nnn/GSM2976782/suppl/GSM2976782%5Fd14%5Fmatrix%2Emtx%2Egz -P {data_dir}/scott

In [None]:
adata_scott_d14 = sc.read_mtx(data_dir+'scott/GSM2976782_d14_matrix.mtx.gz').transpose()
barcodes = pd.read_csv(data_dir+'scott/GSM2976782_d14_barcodes.tsv.gz', sep='\t', header=None)[0].values
features = pd.read_csv(data_dir+'scott/GSM2976782_d14_genes.tsv.gz', sep='\t', header=None)[1].values

adata_scott_d14.var_names = features
adata_scott_d14.obs_names = barcodes

## Adata Scott D0

In [None]:
adata_scott_d0.X = np.asarray(adata_scott_d0.X.todense())

In [None]:
# Basic QC filtering
adata_scott_d0.var['mt'] = adata_scott_d0.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata_scott_d0, qc_vars=['mt'], percent_top=None, inplace=True)

In [None]:
sc.pl.violin(adata_scott_d0, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

sc.pl.scatter(adata_scott_d0, x='total_counts', y='pct_counts_mt')


In [None]:
# adata_oprescu = adata_oprescu[adata_oprescu.obs.n_genes_by_counts < 5000, :]
# adata_oprescu = adata_oprescu[adata_oprescu.obs.pct_counts_mt < 30, :]

In [None]:
sc.pp.filter_cells(adata_scott_d0, min_genes=250)

In [None]:
sc.pp.filter_genes(adata_scott_d0, min_counts=1)
sc.pp.log1p(adata_scott_d0)
sc.pp.normalize_per_cell(adata_scott_d0)

In [None]:
tk.tl.triku(adata_scott_d0, n_procs=1, random_state=seed)
sc.pp.pca(adata_scott_d0, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_scott_d0, random_state=seed, knn=len(adata_scott_d0) ** 0.5 // 2, metric='cosine')

In [None]:
sc.tl.umap(adata_scott_d0, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_scott_d0, resolution=2, random_state=seed)
sc.pl.umap(adata_scott_d0, color=['leiden', 'n_counts'], legend_loc='on data')

### Subpopulation characterization

In [None]:
adata_scott_d0.obs['cell_type'] = 'Other'

In [None]:
df = adata_scott_d0.obs['cell_type'].copy()

In [None]:
# Endothelial
sc.pl.umap(adata_scott_d0, color=['leiden', 'Fabp4', 'Vwf'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_scott_d0.obs['leiden'].isin(['16'])] = 'Endothelial'

In [None]:
# LUM+ FAP
sc.pl.umap(adata_scott_d0, color=['leiden', 'Apod', 'Lum'], legend_loc='on data', ncols=3, cmap=magma)

# PRG4+ FAP
sc.pl.umap(adata_scott_d0, color=['leiden', 'Prg4', 'Fbn1'], legend_loc='on data', ncols=3, cmap=magma)

df[adata_scott_d0.obs['leiden'].isin(['17', '6', '3', '4', '13', '8', '0', '7', '11', '15', '1', '9', '2', '5'])] = 'FAP'

In [None]:
# Pericyte
sc.pl.umap(adata_scott_d0, color=['leiden', 'Rgs5', 'Ndufa4l2'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_scott_d0.obs['leiden'].isin(['10'])] = 'Pericyte'

In [None]:
# Tenocyte
sc.pl.umap(adata_scott_d0, color=['leiden', 'Tnc', 'Tnmd'], legend_loc='on data', ncols=3, cmap=magma)
df[adata_scott_d0.obs['leiden'].isin(['12', '14'])] = 'Tenocyte'

In [None]:
sc.tl.rank_genes_groups(adata_scott_d0, groupby='leiden', method='wilcoxon')
sc.pl.rank_genes_groups_tracksplot(adata_scott_d0, dendrogram=False, n_genes=50)

In [None]:
adata_scott_d0.obs['cell_type'] = df

In [None]:
adata_scott_d0.uns['cell_type_colors'] = ['#c70039', '#8b008b', '#8B0000', '#ff7f50']

In [None]:
sc.pl.umap(adata_scott_d0, color=['leiden', 'cell_type'], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_scott_d0, color=['leiden', 'Pdgfra', 'Lum', 'Dcn', 'Vim', 'Dpp4', 'Sfrp2', 'Apoe', 'Ccl19', 
                                    'Wisp2', 'Wif1', 'Col18a1', 'Postn', 'Col11a1', 'Crabp1', 'Coch'], legend_loc='on data', ncols=2, cmap=magma)

In [None]:
sc.tl.rank_genes_groups(adata_scott_d0, groupby='leiden', method='wilcoxon', groups=['6', '17'], reference='rest', n_genes=2000)
sc.pl.rank_genes_groups_tracksplot(adata_scott_d0, dendrogram=False, n_genes=100)

In [None]:
genes_pos, ratio = [], []
for gene in tqdm(set(list(adata_scott_d0.uns['rank_genes_groups']['names']['5']) + 
                 list(adata_scott_d0.uns['rank_genes_groups']['names']['13']))):
    list_means = []
    for group in ['6', '8', '2', '10', '11', '7', '9', '1', '12', '3', '4']:
        list_means.append(np.mean(adata_scott_d0.X[adata_scott_d0.obs['leiden'].isin(
        [group]), adata_scott_d0.var_names == gene].ravel()))
    
    r = np.mean(adata_scott_d0.X[adata_scott_d0.obs['leiden'].isin(['5', '13']), 
                   adata_scott_d0.var_names == gene].ravel()) / max(list_means)
    
    ratio.append(r)
    genes_pos.append(gene)

In [None]:
selected = np.array(ratio) > 10**0.25

In [None]:
plt.scatter(np.random.random(len(ratio)), np.log10(ratio), color=['#007ab7' if i else '#ababab' for i in selected])

In [None]:
selected_genes_9_scott = np.array(genes_pos)[np.array(selected).astype(bool)]
np.array(sorted(selected_genes_9_scott))

In [None]:
selected_genes = np.intersect1d(selected_genes_9_scott, selected_genes_18_oprescu)

In [None]:
selected_genes

In [None]:
sc.pl.umap(adata_scott_d0, color=selected_genes, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=selected_genes, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
# Genes that are exclusive of cluster 18 in Oprescu
filtered_genes = ['Bricd5', 'Col9a2', 'Dlk1', 'Grin2b', 'Mpzl2', 'Saa1', 'Shisa3', 'Tenm2']


sc.pl.umap(adata_oprescu_d0, color=['leiden'] + filtered_genes, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_scott_d0, color=['leiden'] + filtered_genes, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['Nipal1', 'Trpm6', 'S100b', 'Gpld1', 'Plxdc1', 'Gfra2',
                                  'Cd38', 'Cd300lg'], legend_loc='on data', cmap=magma, ncols=3)

# De Micheli et al 2020 (mouse)

## Adata download and preprocessing

In [None]:
!wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE143nnn/GSE143437/suppl/GSE143437%5FDeMicheli%5FMuSCatlas%5Frawdata%2Etxt%2Egz -P {data_dir}/demicheli_mouse
!wget https://ftp.ncbi.nlm.nih.gov/geo/series/GSE143nnn/GSE143437/suppl/GSE143437%5FDeMicheli%5FMuSCatlas%5Fmetadata%2Etxt%2Egz -P {data_dir}/demicheli_mouse

In [None]:
adata_de_micheli_mouse = sc.read_text(data_dir+'demicheli_mouse/GSE143437_DeMicheli_MuSCatlas_rawdata.txt.gz').transpose()
adata_de_micheli_mouse.obs['batch'] = ['_'.join(i.split('_')[:2]) for i in adata_de_micheli_mouse.obs_names]
adata_de_micheli_mouse.obs_names = [i.split('_')[-1] for i in adata_de_micheli_mouse.obs_names]
adata_de_micheli_mouse.obs_names_make_unique()

In [None]:
adata_de_micheli_mouse.write_loom(data_dir+'demicheli_mouse/adata_demicheli_mouse.loom')

In [None]:
adata_de_micheli_mouse = sc.read_loom(data_dir+'demicheli_mouse/adata_demicheli_mouse.loom')

In [None]:
# We'll remove some RPSs and RPLs
not_RPS = [i for i in adata_de_micheli_mouse.var_names if not (('Rps' in i) | ('Rpl' in i))]

In [None]:
adata_de_micheli_mouse = adata_de_micheli_mouse[:, not_RPS]

In [None]:
# Basic QC filtering
adata_de_micheli_mouse.var['mt'] = adata_de_micheli_mouse.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata_de_micheli_mouse, qc_vars=['mt'], percent_top=None, inplace=True)

In [None]:
sc.pl.violin(adata_de_micheli_mouse, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

sc.pl.scatter(adata_de_micheli_mouse, x='n_genes_by_counts', y='pct_counts_mt')
sc.pl.scatter(adata_de_micheli_mouse, x='total_counts', y='n_genes_by_counts')

In [None]:
adata_de_micheli_mouse = adata_de_micheli_mouse[adata_de_micheli_mouse.obs.n_genes_by_counts < 6000, :]
adata_de_micheli_mouse = adata_de_micheli_mouse[adata_de_micheli_mouse.obs.total_counts < 30000, :]
adata_de_micheli_mouse = adata_de_micheli_mouse[adata_de_micheli_mouse.obs.pct_counts_mt < 10, :]

In [None]:
sc.pl.violin(adata_de_micheli_mouse, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

sc.pl.scatter(adata_de_micheli_mouse, x='n_genes_by_counts', y='pct_counts_mt')
sc.pl.scatter(adata_de_micheli_mouse, x='total_counts', y='n_genes_by_counts')

In [None]:
sc.pp.filter_genes(adata_de_micheli_mouse, min_counts=1)

In [None]:
adata_de_micheli_mouse.X = np.array(adata_de_micheli_mouse.X.todense())

In [None]:
set(adata_de_micheli_mouse.obs.batch)

In [None]:
adata_de_micheli_mouse_d0 = adata_de_micheli_mouse[adata_de_micheli_mouse.obs['batch'].isin(['D0_A', 'D0_B', 'D0_Cv3'])].copy()
adata_de_micheli_mouse_d2 = adata_de_micheli_mouse[adata_de_micheli_mouse.obs['batch'].isin(['D2_C', 'D2_D'])].copy()
adata_de_micheli_mouse_d5 = adata_de_micheli_mouse[adata_de_micheli_mouse.obs['batch'].isin(['D5_A', 'D5_B', 'D5_C'])].copy()
adata_de_micheli_mouse_d7 = adata_de_micheli_mouse[adata_de_micheli_mouse.obs['batch'].isin(['D7_C', 'D7_D'])].copy()

In [None]:
for adata in [adata_de_micheli_mouse_d0, adata_de_micheli_mouse_d2, 
              adata_de_micheli_mouse_d5, adata_de_micheli_mouse_d7]:

    sc.pp.filter_genes(adata, min_counts=1)
    sc.pp.log1p(adata)
    sc.pp.normalize_total(adata)

In [None]:
for adata in [adata_de_micheli_mouse_d0, adata_de_micheli_mouse_d2, 
              adata_de_micheli_mouse_d5, adata_de_micheli_mouse_d7]:
    
    sc.pp.pca(adata, random_state=seed, n_comps=30)
    sce.pp.bbknn(adata, metric='angular')
    tk.tl.triku(adata, n_procs=1, random_state=seed)

In [None]:
for adata in [adata_de_micheli_mouse_d0, adata_de_micheli_mouse_d2, 
              adata_de_micheli_mouse_d5, adata_de_micheli_mouse_d7]:

    sc.tl.umap(adata, min_dist=0.2, random_state=seed)
    sc.tl.leiden(adata, resolution=1, random_state=seed)
    sc.pl.umap(adata, color=['leiden', 'batch', 'total_counts'], legend_loc='on data', ncols=2)

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden'] + teno_markers, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden'] + krano_markers_d0, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d2, color=['leiden'] + teno_markers, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_de_micheli_mouse_d2, color=['leiden'] + krano_markers_d0, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d5, color=['leiden'] + teno_markers, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_de_micheli_mouse_d5, color=['leiden'] + krano_markers_d0, legend_loc='on data', cmap=magma, ncols=3)

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d7, color=['leiden'] + teno_markers, legend_loc='on data', cmap=magma, ncols=3)
sc.pl.umap(adata_de_micheli_mouse_d7, color=['leiden'] + krano_markers_d0, legend_loc='on data', cmap=magma, ncols=3)

## Population characterization

In [None]:
sc.tl.rank_genes_groups(adata_de_micheli_mouse_d0, groupby='leiden', method='wilcoxon')

In [None]:
sc.pl.rank_genes_groups_tracksplot(adata_de_micheli_mouse_d0, dendrogram=False, n_genes=30)

In [None]:
sc.tl.rank_genes_groups(adata_de_micheli_mouse_d0, groupby='leiden', method='wilcoxon', groups=['8'], reference='rest')
sc.pl.rank_genes_groups_tracksplot(adata_de_micheli_mouse_d0, dendrogram=False, n_genes=100)

In [None]:
adata_de_micheli_mouse_d0.obs['cell_type'] = 'Other'

In [None]:
df = adata_de_micheli_mouse_d0.obs['cell_type'].copy()

In [None]:
# 0 FAPs
genes = ['Pdgfra', 'Col1a2', 'Mfap5']

sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden'] + genes, legend_loc='on data', cmap=magma, ncols=2)
df[adata_de_micheli_mouse_d0.obs['leiden'].isin(['2', '6', '3', '7', '11'])] = 'FAP'

In [None]:
# 6 Tenocytes
genes = ['Tnmd', 'Fmod', 'Comp']

sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden'] + genes, legend_loc='on data', cmap=magma, ncols=2)
df[adata_de_micheli_mouse_d0.obs['leiden'].isin(['13'])] = 'Tenocyte'

In [None]:
# 1, 2 EC
genes = ['Cav1', 'Pecam1']

sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden'] + genes, legend_loc='on data', cmap=magma, ncols=3)
df[adata_de_micheli_mouse_d0.obs['leiden'].isin(['0', '1', '5', '8', '9', '14', '19'])] = 'Endothelial'

In [None]:
# 2 Pericytes
genes = ['Rgs5', 'Myl9']

sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden'] + genes, legend_loc='on data', cmap=magma, ncols=3)
df[adata_de_micheli_mouse_d0.obs['leiden'].isin(['16'])] = 'Pericyte'

In [None]:
# 4 Skeletal Muscle
genes = ['Acta1', 'Ckm']

sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden'] + genes, legend_loc='on data', cmap=magma, ncols=3)
df[adata_de_micheli_mouse_d0.obs['leiden'].isin(['4', '10'])] = 'Myonuclei'

In [None]:
# 7, 9 Neural cells
genes = ['Mpz', 'Ptn']

sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden'] + genes, legend_loc='on data', cmap=magma, ncols=3)
df[adata_de_micheli_mouse_d0.obs['leiden'].isin(['15', '17'])] = 'Neural cell'

In [None]:
# 5 Immune
genes = ['H2-Aa', 'Cd74']

sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden'] + genes, legend_loc='on data', cmap=magma, ncols=3)
df[adata_de_micheli_mouse_d0.obs['leiden'].isin(['12'])] = 'Immune'

In [None]:
# 8 Satellite cells
genes = ['Chodl', 'Pax7']

sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden'] + genes, legend_loc='on data', cmap=magma, ncols=3)
df[adata_de_micheli_mouse_d0.obs['leiden'].isin(['18'])] = 'MuSC'

In [None]:
adata_de_micheli_mouse_d0.obs['cell_type'] = df

In [None]:
adata_de_micheli_mouse_d0.uns['cell_type_colors'] = ['#c70039', '#8b008b', '#4682b4', '#006400', '#2e8b57', '#ffd700', '#8B0000', '#ff7f50']

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=[ 'cell_type'])

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden', 'cell_type'], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden'] + ['Col9a2', 'Shisa3', 'Plxdc1', 'Nipal1', '6030408B16Rik'], legend_loc='on data', cmap=magma, ncols=3)

### Characterization of FAPs in D0

In [None]:
adata_de_micheli_mouse_d0_FAPs = adata_de_micheli_mouse_d0[adata_de_micheli_mouse_d0.obs['leiden'].isin(['0'])]

In [None]:
sc.pp.filter_genes(adata_de_micheli_mouse_d0_FAPs, min_cells=1)
tk.tl.triku(adata_de_micheli_mouse_d0_FAPs, n_procs=1, random_state=seed)
sc.pp.pca(adata_de_micheli_mouse_d0_FAPs, random_state=seed, n_comps=30)
sce.pp.bbknn(adata_de_micheli_mouse_d0_FAPs, metric='angular')

In [None]:
# del adata_oprescu_d0_FAPs.uns['leiden_colors']
sc.tl.umap(adata_de_micheli_mouse_d0_FAPs, min_dist=0.05, random_state=seed)
sc.tl.leiden(adata_de_micheli_mouse_d0_FAPs, resolution=1, random_state=seed)
sc.pl.umap(adata_de_micheli_mouse_d0_FAPs, color=['leiden', 'batch'], legend_loc='on data')

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0_FAPs, color=['leiden', 'Col9a2', 'Shisa3', 
                                                         'Plxdc1', 'Nipal1', '6030408B16Rik'], cmap=magma, legend_loc='on data')

In [None]:
adata_de_micheli_mouse_d0.uns['leiden_FAP_colors'] = adata_de_micheli_mouse_d0_FAPs.uns['leiden_colors']


In [None]:
adata_de_micheli_mouse_d0.obs['leiden_FAP'] = '-'
adata_de_micheli_mouse_d0.obs['leiden_FAP'] = adata_de_micheli_mouse_d0_FAPs.obs['leiden']
adata_de_micheli_mouse_d0.obs['leiden_FAP'] = adata_de_micheli_mouse_d0.obs['leiden_FAP'].astype(str)

sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden', 'leiden_FAP'], legend_loc='on data', ncols=3, cmap=magma)
sc.pl.umap(adata_de_micheli_mouse_d0[adata_de_micheli_mouse_d0.obs.leiden == '0'], color=['leiden_FAP'], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.tl.rank_genes_groups(adata_de_micheli_mouse_d0_FAPs, groupby='leiden', method='wilcoxon', n_genes=150)
sc.pl.rank_genes_groups_tracksplot(adata_de_micheli_mouse_d0_FAPs, n_genes=150, dendrogram=False)

# De Micheli et al 2020 (human)

This is a human dataset, consisting of 10 patients. To remove batch effects, we will directly run kallisto in all samples at once, and then run the analysis on them.

In [None]:
de_micheli_dir = os.getcwd() + '/data/demicheli'
os.makedirs(de_micheli_dir, exist_ok=True)

In [None]:
SRA_list = """
SRR10897760
SRR10897761
SRR10897762
SRR10897763
SRR10897764
SRR10897765
SRR10897766
SRR10897767
SRR10897768
SRR10897769
"""

with open(de_micheli_dir + '/accession.txt', 'w') as f:
    f.write(SRA_list)
    
df = pd.DataFrame({'name': ['de_micheli'], 'technology': ['10xv3'], 'targetnumcells': [5000]})

df.to_csv(de_micheli_dir + '/metadata.tab', sep='\t', index=None)

In [None]:
!fastq-dump SRR10897768 --split-files --gzip  # This is just one. Run from 60 to 69

In [None]:
from time import sleep

In [None]:
sleep(3600 * 4)
for SRA_idx, SRA in enumerate(SRA_list.split('\n')[1:-1]):
    os.rename(de_micheli_dir + f'/{SRA}_2.fastq.gz', 
              de_micheli_dir + f'/de_micheli_L00{SRA_idx+1}_R1_001.fastq.gz')
    os.rename(de_micheli_dir + f'/{SRA}_3.fastq.gz', 
              de_micheli_dir + f'/de_micheli_L00{SRA_idx+1}_R2_001.fastq.gz')

In [None]:
!loompy fromfq {de_micheli_dir}/de_micheli.loom de_micheli /media/seth/SETH_DATA/SETH_Alex/Programs/human_GRCh38_gencode.v31.600 {de_micheli_dir}/metadata.tab \
{de_micheli_dir}/de_micheli_L001_R1_001.fastq.gz {de_micheli_dir}/de_micheli_L001_R2_001.fastq.gz \
{de_micheli_dir}/de_micheli_L002_R1_001.fastq.gz {de_micheli_dir}/de_micheli_L002_R2_001.fastq.gz \
{de_micheli_dir}/de_micheli_L003_R1_001.fastq.gz {de_micheli_dir}/de_micheli_L003_R2_001.fastq.gz \
{de_micheli_dir}/de_micheli_L004_R1_001.fastq.gz {de_micheli_dir}/de_micheli_L004_R2_001.fastq.gz \
{de_micheli_dir}/de_micheli_L005_R1_001.fastq.gz {de_micheli_dir}/de_micheli_L005_R2_001.fastq.gz \
{de_micheli_dir}/de_micheli_L006_R1_001.fastq.gz {de_micheli_dir}/de_micheli_L006_R2_001.fastq.gz \
{de_micheli_dir}/de_micheli_L007_R1_001.fastq.gz {de_micheli_dir}/de_micheli_L007_R2_001.fastq.gz \
{de_micheli_dir}/de_micheli_L008_R1_001.fastq.gz {de_micheli_dir}/de_micheli_L008_R2_001.fastq.gz \
{de_micheli_dir}/de_micheli_L009_R1_001.fastq.gz {de_micheli_dir}/de_micheli_L009_R2_001.fastq.gz \
{de_micheli_dir}/de_micheli_L0010_R1_001.fastq.gz {de_micheli_dir}/de_micheli_L0010_R2_001.fastq.gz 

We will also load the matrix file because it contains the batches, and it will be useful for us to 
label some of the batches and map them later on. This will help us remove possible small spurious clusters.

In [None]:
link = 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE143nnn/GSE143704/suppl/GSE143704%5FDeMicheli%5FHumanMuscleAtlas%5Frawdata%2Etxt%2Egz'

In [None]:
!wget {link} -P {data_dir}/demicheli

## Adata load and preprocessing

In [None]:
adata_de_micheli_batch = sc.read_text(data_dir+'/demicheli/GSE143704_DeMicheli_HumanMuscleAtlas_rawdata.txt.gz').transpose()

In [None]:
adata_de_micheli_batch.obs['batch'] = [i.split('_')[1] for i in adata_de_micheli_batch.obs_names]

In [None]:
adata_de_micheli = sc.read_loom(de_micheli_dir + '/de_micheli.loom')
adata_de_micheli.var_names_make_unique()

In [None]:
# Basic QC filtering
adata_de_micheli.var['mt'] = adata_de_micheli.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata_de_micheli, qc_vars=['mt'], percent_top=None, inplace=True)

In [None]:
sc.pl.violin(adata_de_micheli, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

sc.pl.scatter(adata_de_micheli, x='n_genes_by_counts', y='pct_counts_mt')
sc.pl.scatter(adata_de_micheli, x='total_counts', y='n_genes_by_counts')

In [None]:
adata_de_micheli = adata_de_micheli[adata_de_micheli.obs.n_genes_by_counts < 3500, :]
adata_de_micheli = adata_de_micheli[adata_de_micheli.obs.total_counts < 15000, :]
adata_de_micheli = adata_de_micheli[adata_de_micheli.obs.pct_counts_mt < 80, :]
adata_de_micheli = adata_de_micheli[~ ((adata_de_micheli.obs.pct_counts_mt < 10) & 
                                       (adata_de_micheli.obs.pct_counts_mt < 400)), :]

In [None]:
sc.pl.violin(adata_de_micheli, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

sc.pl.scatter(adata_de_micheli, x='n_genes_by_counts', y='pct_counts_mt')
sc.pl.scatter(adata_de_micheli, x='total_counts', y='n_genes_by_counts')

In [None]:
sc.pp.filter_genes(adata_de_micheli, min_counts=1)
sc.pp.log1p(adata_de_micheli)
sc.pp.normalize_total(adata_de_micheli)

In [None]:
tk.tl.triku(adata_de_micheli, n_procs=1, random_state=seed)
sc.pp.pca(adata_de_micheli, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_de_micheli, random_state=seed, knn=len(adata_de_micheli) ** 0.5 // 2, metric='cosine')

In [None]:
sc.tl.umap(adata_de_micheli, min_dist=0.1, random_state=seed)
sc.tl.leiden(adata_de_micheli, resolution=1, random_state=seed)
sc.pl.umap(adata_de_micheli, color=['leiden', 'total_counts'], legend_loc='on data', ncols=1)

In [None]:
sc.pl.umap(adata_de_micheli, color=['SHISA3', 'NIPAL1', 'SAA1', 'COL9A2', 'S100B', 'GFRA2', 'CD300LG'], 
           legend_loc='on data', ncols=2, cmap=magma)

In [None]:
adata_de_micheli.obs_names = [i.split('_')[-1] for i in adata_de_micheli.obs_names]
adata_de_micheli_batch.obs_names = [i.split('_')[-1] for i in adata_de_micheli_batch.obs_names]
micheli_both = list(set(adata_de_micheli.obs_names) & set(adata_de_micheli_batch.obs_names))

In [None]:
len(adata_de_micheli), len(adata_de_micheli_batch), len(micheli_both)

In [None]:
adata_de_micheli_batch

In [None]:
adata_de_micheli.obs['batch'] = ['nan'] * len(adata_de_micheli)
df_batches = adata_de_micheli_batch.obs['batch'].loc[micheli_both] 
adata_de_micheli.obs['batch'].loc[micheli_both] = df_batches[~df_batches.index.duplicated(keep='first')].astype(str)

In [None]:
sc.pl.umap(adata_de_micheli, color=['leiden', 'batch'], ncols=1)

In [None]:
sc.pl.umap(adata_de_micheli, color=['SHISA3', 'NIPAL1', 'SAA1', 'COL9A2', 'S100B', 'GFRA2', 'CD300LG'], 
           legend_loc='on data', ncols=2, cmap=magma)

In [None]:
sc.pp.pca(adata_de_micheli, random_state=seed, n_comps=30)
sce.pp.bbknn(adata_de_micheli, metric='angular')
tk.tl.triku(adata_de_micheli, n_procs=1, random_state=seed)

In [None]:
sc.tl.umap(adata_de_micheli, min_dist=0.1, random_state=seed)

In [None]:
sc.pl.umap(adata_de_micheli, color=['leiden', 'batch'], ncols=1, legend_loc='on data')

In [None]:
sc.tl.leiden(adata_de_micheli, resolution=2, random_state=seed)
sc.pl.umap(adata_de_micheli, color=['leiden', 'batch', 'total_counts'], ncols=1)

In [None]:
sc.pl.umap(adata_de_micheli, color=['CSPG4', 'CD34', 'PDGFRA', 'PDGFRB', 'HLA-B', 'RGS5', 'MYL9', 'NDUFA4L2', 
                                    'PAX7', 'ACTA1', 'AIF1'], 
           legend_loc='on data', ncols=1, cmap=magma)

# Giordani 2020

In [None]:
giordani_dir = os.getcwd() + '/data/giordani'
os.makedirs(giordani_dir, exist_ok=True)

In [None]:
SRA_list = """
SRR8352705
SRR8352706
"""

with open(giordani_dir + '/accession.txt', 'w') as f:
    f.write(SRA_list)

df = pd.DataFrame({'name': ['bamtofastq'], 'technology': ['10xv2'], 'targetnumcells': [5000]})

df.to_csv(giordani_dir + '/metadata.tab', sep='\t', index=None)

In [None]:
import time

In [None]:
time.sleep(7200)

In [None]:
!cd {giordani_dir} && aria2c -x 16 https://sra-pub-src-1.s3.amazonaws.com/SRR8352706/20180917_uninjured_wt_possorted_genome_bam.bam.1

In [None]:
!cd {giordani_dir} && aria2c -x 16 https://sra-pub-src-1.s3.amazonaws.com/SRR8352705/20171018_uninjured_wt_possorted_genome_bam.bam.1

In [None]:
program_dir = '/media/seth/SETH_DATA/SETH_Alex/Programs/'

In [None]:
!cd {program_dir} && bamtofastq-1.3.2 {giordani_dir}/20171018_uninjured_wt_possorted_genome_bam.bam.1 {giordani_dir}

In [None]:
!cd {program_dir} && bamtofastq-1.3.2 {giordani_dir}/20180917_uninjured_wt_possorted_genome_bam.bam.1.1 {giordani_dir}

In [None]:
bamdata_str_20171018 = ''
for ROOT, DIR, FILES in os.walk(giordani_dir + f'/bam_20171018'):
    for R1_file in FILES:
        if "_R1_" in R1_file:
            R2_file = R1_file.replace('_R1_', '_R2_')
            bamdata_str_20171018 += f"{ROOT}/{R1_file} {ROOT}/{R2_file} "

bamdata_str_20180917 = ''
for ROOT, DIR, FILES in os.walk(giordani_dir + f'/bam_20180917'):
    for R1_file in FILES:
        if "_R1_" in R1_file:
            R2_file = R1_file.replace('_R1_', '_R2_')
            bamdata_str_20180917 += f"{ROOT}/{R1_file} {ROOT}/{R2_file} "


In [None]:
!loompy fromfq {giordani_dir}/giordani_20171018.loom bamtofastq /media/seth/SETH_DATA/SETH_Alex/Programs/mouse_GRCm38_gencode.v31 {giordani_dir}/metadata.tab {bamdata_str_20171018}

In [None]:
!loompy fromfq {giordani_dir}/giordani_20180917.loom bamtofastq /media/seth/SETH_DATA/SETH_Alex/Programs/mouse_GRCm38_gencode.v31 {giordani_dir}/metadata.tab {bamdata_str_20180917}

In [None]:
adata_giordani_20180917 = sc.read_loom(data_dir+'/giordani/giordani_20180917.loom')
adata_giordani_20180917.var_names_make_unique()
adata_giordani_20171018 = sc.read_loom(data_dir+'/giordani/giordani_20171018.loom')
adata_giordani_20171018.var_names_make_unique()

In [None]:
adata_giordani = sc.AnnData.concatenate(adata_giordani_20180917, adata_giordani_20171018)

In [None]:
# Basic QC filtering
adata_giordani.var['mt'] = adata_giordani.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata_giordani, qc_vars=['mt'], percent_top=None, inplace=True)

In [None]:
len(np.sum(adata_giordani.X.todense(), 1).ravel().tolist()[0])

In [None]:
h = plt.hist(np.sum(adata_giordani.X.todense(), 1).ravel().tolist()[0], bins=100)

In [None]:
h = plt.hist(np.sum(adata_giordani.X.todense(), 0).ravel().tolist()[0], bins=10)

In [None]:
sc.pl.violin(adata_giordani, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

sc.pl.scatter(adata_giordani, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata_giordani, x='total_counts', y='n_genes_by_counts', color='batch')

In [None]:
sc.pp.filter_cells(adata_giordani, min_genes=450)
sc.pp.filter_genes(adata_giordani, min_cells=1)
adata_giordani = adata_giordani[adata_giordani.obs['pct_counts_mt'] < 15]

In [None]:
sc.pp.filter_genes(adata_giordani, min_cells=1)
sc.pp.log1p(adata_giordani)
sc.pp.normalize_per_cell(adata_giordani)

In [None]:
tk.tl.triku(adata_giordani, n_procs=1, random_state=seed)
sc.pp.pca(adata_giordani, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_giordani, random_state=seed, knn=len(adata_giordani) ** 0.5 // 2, metric='cosine')

In [None]:
sc.tl.umap(adata_giordani, min_dist=0.3, random_state=seed)
sc.tl.leiden(adata_giordani, resolution=1.3, random_state=seed)
sc.pl.umap(adata_giordani, color=['leiden', 'batch', 'n_genes_by_counts'], legend_loc='on data')

In [None]:
sc.pl.umap(adata_giordani, color=['leiden', 'Col9a2', 'Shisa3', 'Smim41', 'S100b', 'Sox10'], legend_loc='on data', cmap=magma)

In [None]:
B_markers = ['Crabp2', 'Dleu7', 'Efnb3', 'Gjb5', 'Grin2b', 'Itgb4', 'Kcnj13', 
             'Kcnj2', 'Lgals7', 'Lypd2', 'Mansc4', 'Moxd1', 'Mpzl2', 'Perp', 'Prodh', 'Ptch1', 
             'Slc6a13', 'Stra6', 'Tec', 'Tenm2', 'Wnt10a', 'Wnt6', 'Mpz']

sc.pl.umap(adata_giordani, color=[i for i in B_markers if i in adata_giordani.var_names], legend_loc='on data', cmap=magma)

# Determining CD34 / Sox10 / S100 / Col9a2 / Shisa3 populations, and their transmembrane markers

When analysing populations we have observed two/three different patterns of cells based on these 5 markers:
* (A) CD34$^+$/S100$^+$/**Sox10**$^-$/Shisa3$^+$/**Col9a2**$^+$ populations. ***These populations are FAP-like cells.***
* (B) CD34$^+$/S100$^+$/**Sox10**$^+$/Shisa3$^+$?/**Col9a2**$^-$ populations. ***These are Schwann-like cells.***
* (C) CD34$^+$/**S100**$^-$/**Sox10**$^-$/Shisa3$^+$/**Col9a2**$^+$ populations. We will ignore these cells so far.

In this section we are going to isolate these populations and characterize them. In Scott et al. only the Sox10$^-$ / Col9a2$^+$ is available.

**How will we work in this section?**
We are going to run DEGs on each of the possible populations (A, B, C) and get the first 700 DEGs from the 
analysis, for each group. DEGs will be manually filtered on a first screening from the tracksplot. 
After that, we will filter out some of these genes based on the UMAPs: if a gene is too widely expressed or is not really specific for the cluster of interest (it can be for more than 1 cluster from A, B, or C), it *must* be excluded. From there, we will create a more refined version of the list of markers.

Then, we will run each marker list against the rest of datasets, in case some gene has gone unnoticed and is coexpressed in other datasets. Marker lists will be updated accordingly. For example, if marker X was not detected (or was skipped) for De Micheli dataset but it was detected in Oprescu, and it is expressed in both according to the criteria, marker X will be added to a general marker list. With this method we will get a list of markers for each cluster in each dataset.

## Oprescu

In this UMAP we would be interested in clusters 23 and 29, which have Col9a2 and Shisa3 expression. From cluster 29, only a few cells are interesting, and we will keep them.

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Cd34', 'Sox10', 'S100b', 'Col9a2', 'Shisa3', 'Mpz', 'Ptn'], 
           cmap=magma, ncols=2, legend_loc='on data')

In [None]:
adata_oprescu_d0_sub = adata_oprescu_d0[adata_oprescu_d0.obs['leiden'].isin(['23', '29'])].copy()

In [None]:
sc.pp.filter_genes(adata_oprescu_d0_sub, min_cells=1)
tk.tl.triku(adata_oprescu_d0_sub, n_procs=1, random_state=seed)
sc.pp.pca(adata_oprescu_d0_sub, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_oprescu_d0_sub, random_state=seed, knn=len(adata_oprescu_d0_sub) ** 0.5 // 2, metric='cosine')

After cluster isolation, we reject cluster 2 because it is negative for Col9a2, Shisa3, Sox10 and S100b. From that set, we will discern the clusters 0+1 as the type A kranocytes (Sox10$^-$/Col9a2$^+$/S100$^+$), cluster 3 (partially) (Sox10$^+$/Col9a2$^-$/S100$^+$) as the type B kranocytes and cluster 4 (Sox10$^-$/Col9a2$^+$/S100$^-$) as the type C.

In [None]:
sc.tl.umap(adata_oprescu_d0_sub, min_dist=0.2, random_state=seed)
sc.tl.leiden(adata_oprescu_d0_sub, resolution=1, random_state=seed, key_added='leiden_sub')
sc.pl.umap(adata_oprescu_d0_sub, color=['leiden', 'leiden_sub', 'Cd34', 'Sox10', 'S100b', 
                                        'Col9a2', 'Shisa3', 'Mpz'], cmap=magma, legend_loc='on data', ncols=3)

In [None]:
adata_oprescu_d0.obs['Krano_type'] = 'Other'
adata_oprescu_d0.obs['Krano_type'].loc[adata_oprescu_d0_sub[adata_oprescu_d0_sub.obs['leiden_sub'].isin(['0', '1'])].obs_names.values] = 'A'
adata_oprescu_d0.obs['Krano_type'].loc[adata_oprescu_d0_sub[adata_oprescu_d0_sub.obs['leiden_sub'].isin(['3'])].obs_names.values] = 'B'
adata_oprescu_d0.obs['Krano_type'].loc[adata_oprescu_d0_sub[adata_oprescu_d0_sub.obs['leiden_sub'].isin(['4'])].obs_names.values] = 'C'
adata_oprescu_d0.uns['Krano_type_colors'] = ["#007ab7", "#b7007a", "#e3b10f", "#bcbcbc",]

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['Krano_type'], cmap=magma, ncols=2)

In [None]:
sc.tl.rank_genes_groups(adata_oprescu_d0, groupby='Krano_type',groups=['A', 'B', 'C'], reference='rest')
sc.pl.rank_genes_groups_tracksplot(adata_oprescu_d0, dendrogram=False, n_genes=700, )

In [None]:
markers_A_oprescu = ['6030408B16Rik', 'Agt', 'Arhgdig', 
                     'Cd300lg', 'Cd38', 'Cdh19', 'Cdkn2b', 'Ch25h', 'Col26a1', 'Col9a2', 
                     'Dlk1', 'Fetub', 'Gfra2', 'Gli1',  'Gm11681', 'Greb1', 'Gria1', 'Grin2b', 
                     'Kank4', 'Kcnb2', 'Mpzl2', 'Ngfr', 'Nipal1', 'Plxdc1', 'Rasgrp2', 'Reln', 
                     'Saa1', 'Sdc3', 'Shisa3', 'Sipa1l1', 'Sox9', 'Tenm2', 'Trpm6', ]

markers_B_oprescu = ['9530059O14Rik', 'Aatk', 'Cldn19', 'Cmtm5', 'Ddn', 'Dusp15', 'Elovl7', 
                     'Ephb6', 'Fa2h', 'Fxyd3', 'Gjb1', 'Gjc3', 'Gpr37l1', 'Hepacam', 
                     'Kcna1', 'Kcnk1', 'Kif1a', 'Mag', 'Mal', 'Mansc4', 'Moxd1', 'Mpz', 'Mt3', 'Nes', 
                     'Pllp', 'Plp1', 'Pou3f1', 'Prx', 'Rimklb', 'S100b', 'Sbspon', 
                     'Sfrp5', 'Slc36a2', 'Slco4a1', 'Smco3', 'Snca', 'Sox10',  
                     'Tenm2', 'Tspan15', 'Ugt8a', 'Vat1l', 'Wnt10a', 'Wnt6', ]

markers_C_oprescu = ['Ano1', 'Cdkn2a', 'Cdkn2b', 'Clic6', 'Col9a2', 'Gjb5', 'Nipal1', 
                     'Rasgrf2', 'Shisa3', 'Spata18', 'Tenm2', ]

In [None]:
sc.pl.umap(adata_oprescu_d0, color=['Krano_type'] + markers_B_oprescu, 
           cmap=magma, ncols=3, legend_loc='on data')

## Scott

In this dataset type A kranos appear (Sox10$^+$/Col9a2$^-$/S100$^+$) merged at the bottom of cluster 6, and top of 13. We will simply isolate them. Also, type B kranos appear at the bottom of the cluster 17.

In [None]:
sc.pl.umap(adata_scott_d0, color=['leiden', 'Cd34', 'Sox10', 'S100b', 'Col9a2', 'Shisa3', 'Ptn'], 
           cmap=magma, ncols=2, legend_loc='on data')

In [None]:
adata_scott_d0_sub = adata_scott_d0[adata_scott_d0.obs['leiden'].isin(['6', '17'])].copy()

In [None]:
sc.pp.filter_genes(adata_scott_d0_sub, min_cells=1)
tk.tl.triku(adata_scott_d0_sub, n_procs=1, random_state=seed)
sc.pp.pca(adata_scott_d0_sub, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_scott_d0_sub, random_state=seed, knn=len(adata_scott_d0_sub) ** 0.5 // 2, metric='cosine')

When selectiong clusters 6 and 17 from the dataset, the new clustering shows that most of the cells of interest are located in cluster 6 (B) and clusters 5 and 7 (A). It is true that it is not exactly like that (part of cluster 6 should belong to A) but I cannot further subdivided some clusters.

In [None]:
sc.tl.umap(adata_scott_d0_sub, min_dist=0.05, random_state=seed)
sc.tl.leiden(adata_scott_d0_sub, resolution=1.5, random_state=seed, key_added='leiden_sub')
sc.pl.umap(adata_scott_d0_sub, color=['leiden', 'leiden_sub', 'Cd34', 'S100b', 
                                        'Col9a2', 'Shisa3', 'Lypd2', 'Itgb4'], cmap=magma, legend_loc='on data', ncols=3)

In [None]:
adata_scott_d0.obs['Krano_type'] = 'Other'
adata_scott_d0.obs['Krano_type'].loc[adata_scott_d0_sub[adata_scott_d0_sub.obs['leiden_sub'].isin(['5', '7'])].obs_names.values] = 'A'
adata_scott_d0.obs['Krano_type'].loc[adata_scott_d0_sub[adata_scott_d0_sub.obs['leiden_sub'].isin(['6'])].obs_names.values] = 'B'
adata_scott_d0.uns['Krano_type_colors'] = ["#007ab7", "#b7007a", "#bcbcbc"]

In [None]:
sc.pl.umap(adata_scott_d0, color=['Krano_type'], cmap=magma, ncols=2)

In [None]:
sc.tl.rank_genes_groups(adata_scott_d0, groupby='Krano_type',groups=['A', 'B'], reference='rest')
sc.pl.rank_genes_groups_tracksplot(adata_scott_d0, dendrogram=False, n_genes=700, )

In [None]:
markers_A_scott = ['6030408B16Rik', 'Adamtsl2', 'Aspa', 'Col9a2',  'Dlk1', 'Fam213a', 
                   'Gm3336', 'Gprasp2', 'Grin2b', 'Hmgcs2', 'Kcnk2', 'Pla2g7', 'Plxnc1', 
                   'Rgs17', 'Saa1', 'Sbspon', 'Shisa3', 'Sipa1l1', 'Slc27a1', 'Stra6', 
                   'Thrsp', 'Trpm6',  ]

markers_B_scott = ['Col23a1', 'Itga6', 'Itgb4', 'Lypd2', 'Moxd1', 'Mpzl2', 'Perp', 'Prodh', 
                   'Ptch1', 'Slc2a1', 'Sostdc1', 'Tenm2',]

In [None]:
sc.pl.umap(adata_scott_d0, color=['Krano_type'] + markers_A_scott, 
           cmap=magma, ncols=3, legend_loc='on data')

## De Micheli

In this dataset we seem to find the type A (Sox10$^-$/Col9a2$^+$/S100$^+$) within clusters 2 and 6 (we assume they are S100$^+$ because there is a general low expression within the clusters), and a set of cells near cluster 15 (Sox10$^+$/Col9a2$^-$/S100$^+$) as the type B kranocytes. Clusters 15 and 17 are, respectively, Schwann and Neural/Glial cells. However, the cells near the Schwann cluster are Cd34$^+$, Shisa3$^+$ and have a smaller expression of Sox10 and Mpz, which might be indicative of another type of cell type, related to a Schwann cell. These findings are more or less consistent with the B type from Oprescu and Scott.

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['leiden', 'Cd34', 'Sox10', 'S100b', 'Col9a2', 'Shisa3', 
                                             'Mpz', 'Ptn'], 
           cmap=magma, ncols=2, legend_loc='on data')

In [None]:
adata_de_micheli_mouse_d0_sub = adata_de_micheli_mouse_d0[adata_de_micheli_mouse_d0.obs['leiden'].isin(['2', '6', '15', '17'])].copy()

In [None]:
sc.pp.filter_genes(adata_de_micheli_mouse_d0_sub, min_cells=1)
tk.tl.triku(adata_de_micheli_mouse_d0_sub, n_procs=1, random_state=seed)
sc.pp.pca(adata_de_micheli_mouse_d0_sub, random_state=seed, n_comps=30)
sc.pp.neighbors(adata_de_micheli_mouse_d0_sub, random_state=seed, knn=len(adata_de_micheli_mouse_d0_sub) ** 0.5 // 2, metric='cosine')

After reclustering, we see that subcluster 5 are the type A kranocytes, while subcluster 8 are the type B kranocytes.

In [None]:
sc.tl.umap(adata_de_micheli_mouse_d0_sub, min_dist=0.2, random_state=seed)
sc.tl.leiden(adata_de_micheli_mouse_d0_sub, resolution=1.3, random_state=seed, key_added='leiden_sub')
sc.pl.umap(adata_de_micheli_mouse_d0_sub, color=['leiden', 'leiden_sub', 'Cd34', 'Sox10', 'S100b', 
                                        'Col9a2', 'Shisa3'], cmap=magma, legend_loc='on data', ncols=3)

In [None]:
adata_de_micheli_mouse_d0.obs['Krano_type'] = 'Other'
adata_de_micheli_mouse_d0.obs['Krano_type'].loc[adata_de_micheli_mouse_d0_sub[adata_de_micheli_mouse_d0_sub.obs['leiden_sub'].isin(['5'])].obs_names.values] = 'A'
adata_de_micheli_mouse_d0.obs['Krano_type'].loc[adata_de_micheli_mouse_d0_sub[adata_de_micheli_mouse_d0_sub.obs['leiden_sub'].isin(['8'])].obs_names.values] = 'B'
adata_de_micheli_mouse_d0.uns['Krano_type_colors'] = ["#007ab7", "#b7007a", "#bcbcbc"]

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['Krano_type'], cmap=magma, ncols=2)

In [None]:
sc.tl.rank_genes_groups(adata_de_micheli_mouse_d0, groupby='Krano_type',groups=['A', 'B'], reference='rest')
sc.pl.rank_genes_groups_tracksplot(adata_de_micheli_mouse_d0, dendrogram=False, n_genes=700, )

In [None]:
markers_A_de_micheli = ['6030408B16Rik', 'Adamtsl2', 'Bmp7', 'Capn6', 
                        'Col18a1', 'Col9a2', 'Dlk1', 'Fetub', 'Gfra2', 'Gli1', 'Gm11681', 
                        'Gpld1', 'Inhba', 'Mdfi', 'Mest', 'Morc4', 'Nipal1', 'Plppr4', 
                        'Rgs17', 'Saa1', 'Saa2', 'Shisa3', 'Sorcs2', 'Sox9', 'Sphkap', 
                        'Syndig1', 'Trpm6']

markers_B_de_micheli = ['Cldn1', 'Crabp2', 'Dleu7', 'Efnb3', 'Gfra3', 'Gjb5', 'Grin2b', 
                        'Kcnj13', 'Kcnj2', 'Lgals7', 'Lypd2', 'Mansc4', 'Moxd1', 
                        'Perp', 'RP23-291B1.2', 'Shisa3', 'Slc6a13', 'Spink1', 'Srcin1', 'Tec', 'Tenm2', 
                        'Trim46', 'Wnt10a', 'Wnt6']

In [None]:
sc.pl.umap(adata_de_micheli_mouse_d0, color=['Krano_type'] + markers_B_de_micheli, 
           cmap=magma, ncols=3, legend_loc='on data')

## Filtering common markers

Now that all markers are filtered, we are going to plot all A/B markers in all datasets. If we see that the pattern is correct in 2-3 datasets, then we add it to the list. 

We will try to create a conservative list, that is, a list where markers are as specific to the designed regions as possible. This does not mean that a marker expressed in other cell types is not valid, but we will probably exclude it from this list, so as to have specific markers of these putative cell types, and not others.

In [None]:
# All A markers combined. Some of these genes will be excluded because they are only expressed in one dataset,
# or are not as specific as they should.

for i in sorted(set(markers_A_de_micheli + markers_A_oprescu + markers_A_scott)):
    print(i)
    fig, axs = plt.subplots(1, 3, figsize=(18, 4))
    sc.pl.umap(adata_de_micheli_mouse_d0, color=i, cmap=magma, ax=axs[0], show=False)
    sc.pl.umap(adata_oprescu_d0, color=i, cmap=magma, ax=axs[1], show=False)
    sc.pl.umap(adata_scott_d0, color=i, cmap=magma, ax=axs[2])

In [None]:
# All B markers combined. Some of these genes will be excluded because they are only expressed in one dataset,
# or are not as specific as they should.

for i in sorted(set(markers_B_de_micheli + markers_B_oprescu + markers_B_scott)):
    print(i)
    fig, axs = plt.subplots(1, 3, figsize=(18, 4))
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=i, cmap=magma, ax=axs[0], show=False)
        sc.pl.umap(adata_oprescu_d0, color=i, cmap=magma, ax=axs[1], show=False)
        sc.pl.umap(adata_scott_d0, color=i, cmap=magma, ax=axs[2])
    except:
        pass

## Plotting A and B filtered markers

These marker should be either specific of the cluster of interest, or should be highly expressed in the cluster.

In [None]:
A_markers = ['6030408B16Rik', 'Adamtsl2', 'Cdh19', 'Cdkn2b', 'Col18a1', 'Col26a1', 
             'Col9a2', 'Dlk1', 'Fetub', 'Gfra2', 'Gm11681', 'Gpld1', 'Greb1', 'Gria1', 
             'Kcnb2', 'Kcnk2', 'Mpzl2', 'Ngfr', 'Plppr4', 
             'Ptgfr', 'Rgs17', 'Saa1', 'Saa2', 'Shisa3', 'Sipa1l1', 'Sorcs2', 'Sox9', 
             'Sphkap', 'Syndig1', 'Trpm6']
B_markers = ['Cldn1', 'Crabp2', 'Dleu7', 'Efnb3', 'Gjb5', 'Grin2b', 'Itgb4', 'Kcnj13', 
             'Kcnj2', 'Lgals7', 'Lypd2', 'Mansc4', 'Moxd1', 'Mpzl2', 'Perp', 'Prodh', 'Ptch1', 
             'Slc6a13', 'Stra6', 'Tec', 'Tenm2', 'Wnt10a', 'Wnt6']

In [None]:
for i in ['Krano_type'] + A_markers:
    print(i)
    fig, axs = plt.subplots(1, 3, figsize=(18, 4))
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=i, cmap=magma, ax=axs[0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=i, cmap=magma, ax=axs[1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=i, cmap=magma, ax=axs[2], legend_loc='on data')
    except:
        pass

In [None]:
for i in ['Krano_type'] + B_markers:
    print(i)
    fig, axs = plt.subplots(1, 3, figsize=(18, 4))
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=i, cmap=magma, ax=axs[0], show=False)
        sc.pl.umap(adata_oprescu_d0, color=i, cmap=magma, ax=axs[1], show=False)
        sc.pl.umap(adata_scott_d0, color=i, cmap=magma, ax=axs[2])
    except:
        pass

## Detecting membrane markers

The next step is to mark which genes are expressed in the membrane, so that they can be selected via FACS (or whatever extra analysis that could be done). 
To do that, we will download the Swiss-Prot proteome table, where the celullar location of the protein is included. 

The table can be downloaded from here:
https://www.uniprot.org/uniprot/?query=*&fil=organism%3A%22Mus+musculus+%28Mouse%29+%5B10090%5D%22+AND+reviewed%3Ayes

The columns to be selected are "Gene names" and "Subcellular location".

In [None]:
# Load the table
uniprot_df = pd.read_csv(data_dir+'/Uniprot_table.tab', sep='\t')

# Process table to have a gene per row:
list_genes = []
list_locs = []

for i in range(len(uniprot_df)):
    gene_str = uniprot_df['Gene names'].iloc[i]
    loc_str = uniprot_df['Subcellular location [CC]'].iloc[i]
    if type(loc_str) == str:
        loc_str = loc_str.replace('SUBCELLULAR LOCATION:', '')
    else:
        loc_str = ''

    try:
        list_genes_i = gene_str.split(' ')    
        for gene in list_genes_i:
            list_genes.append(gene)
            list_locs.append(loc_str)
    except:
        pass

uniprot_df = pd.DataFrame({'Gene': list_genes, 'Location': list_locs}).set_index('Gene', drop=True)
uniprot_df = uniprot_df.groupby(level=0).transform('sum').drop_duplicates()

In [None]:
df_A_markers = pd.DataFrame({'Gene': A_markers, 'Location': [''] * len(A_markers)})

for A_idx, A in enumerate(A_markers):
    if A in uniprot_df.index.values:
        df_A_markers['Location'].iloc[A_idx] = uniprot_df.loc[A, 'Location']
        
        
df_B_markers = pd.DataFrame({'Gene': B_markers, 'Location': [''] * len(B_markers)})

for B_idx, B in enumerate(B_markers):
    if B in uniprot_df.index.values:
        df_B_markers['Location'].iloc[B_idx] = uniprot_df.loc[B, 'Location']

In [None]:
pd.set_option('display.max_colwidth', -1)

In [None]:
df_A_markers 

In [None]:
df_B_markers

In [None]:
# Marcadores gliales que cambian
for i in ['Krano_type'] + ['Ngfr', 'Gpc1', 'Tubb2b', 'Tubb5', 'Cryab', 'Tuba1a', 'Tnc', 'Plk2', 
                          'Tgfbi', 'Lgals1', 'Lgals3', 'Syt4', 'Ucn2', 'Gas1', 'Mmp19', 'Vim', 'Arbp1', 
                          'Col18a1', 'Cpe', 'Uchl1', 'Gadd45a', 'Igfbp5', 'Atf3', 'Tmem158', 
                          'Apod', 'Psap', 'Stmn1', 'Epha5', 'Entpd2', 'Nav2', 'Oaf', 'Fgf5']:
    print(i)
    fig, axs = plt.subplots(1, 3, figsize=(18, 4))
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=i, cmap=magma, ax=axs[0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=i, cmap=magma, ax=axs[1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=i, cmap=magma, ax=axs[2], legend_loc='on data')
    except:
        pass

In [None]:
# Marcadores FAPs que cambian
for i in ['Krano_type'] + ['Timp1', 'Sh3bgrl3', 'Lgals1', 'Spp1', 'Inhba', 'Ctgrc1', 'Ccl9', 'Ank', 'Tgfbi', 
                           'Fn1', 'Tnc', 'Ier3', 'Il11', 'Trf', 'Aldh1a3', 'Bgn', 'Mgp', 'Igfbp4', 
                           'Lgals3', 'Crif1', 'Serpine2', 'Scd1', 'Igfbp7', 'Thy1', 'Pdgfa', 'Postn', 
                           'Ptgs2', '1810011O10Rik', 'Rdh10', 'Neat1', 'Igf1', 'Sdc1', 'Cxcl14', 'Cxcl5']:
    print(i)
    fig, axs = plt.subplots(1, 3, figsize=(18, 4))
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=i, cmap=magma, ax=axs[0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=i, cmap=magma, ax=axs[1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=i, cmap=magma, ax=axs[2], legend_loc='on data')
    except:
        pass

# Running datasets against markers

## Kumar et al 2017 PC1 and PC2

In [None]:
list_genes = ['Krano_type'] + ['Rgs5', 'Acta2', 'Cxcl1', 'Cxcl2', 'Cxcl5', 'Il6', 'Il1b']

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

## Kumar et al 2017 Capillary proinflammatory/capillary and contractile/arteriolar PCs

In [None]:
list_genes = ['Krano_type'] + ['Cd274', 'Dlk1', 'Nt5e'] # Cd73 = Nt5e

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

## Birbrair PCs

In [None]:
list_genes = ['Krano_type'] + ['Pdgfrb', 'Mcam', 'Cspg4', 'Nes'] # Cd146 = Mcam, Ng2 = Cspg4

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

## Camps ISC

* ISC1: Ly6c1, Cd55
* ISC2: Gdf10, Meox2, F3/Cd142
* ISC3: Thbs4, Fbln7, Sdc1

In [None]:
list_genes = ['Krano_type'] + ['Cd55', 'F3', 'Sdc1']

fig, axs = plt.subplots(len(list_genes), 3, figsize=(18, 4 * len(list_genes)))

for idx, gene in enumerate(list_genes):
    try:
        sc.pl.umap(adata_de_micheli_mouse_d0, color=gene, cmap=magma, ax=axs[idx][0], show=False, legend_loc='on data')
        sc.pl.umap(adata_oprescu_d0, color=gene, cmap=magma, ax=axs[idx][1], show=False, legend_loc='on data')
        sc.pl.umap(adata_scott_d0, color=gene, cmap=magma, ax=axs[idx][2], show=False, legend_loc='on data')
    except:
        pass

# Beautiful figs

In [None]:
if not os.path.exists(fig_dir + 'clusters/'): os.makedirs(fig_dir + 'clusters/')

In [None]:
# MPL config
font = {'family' : 'normal',
        'weight' : 'light',
        'size'   : 15}

mpl.rc('font', **font)

In [None]:
def makefig(list_genes, name_order=None, adata_list=[adata_oprescu_d0, adata_scott_d0, adata_de_micheli_mouse_d0], 
           list_datasets = ['Oprescu', 'Scott', 'De Micheli']):
    n_cols = len(adata_list)
    fig, axs = plt.subplots(len(list_genes), n_cols, figsize=(6 * n_cols, 4 * len(list_genes)))
    
        
    for idx, gene in enumerate(list_genes):
        try:
            for adata_idx, adata in enumerate(adata_list):
                sc.pl.umap(adata, color=gene, cmap=magma, ax=axs[idx][adata_idx], show=False, legend_fontsize=11)
        except:
            raise
               
    for ax_idx, ax in enumerate(axs.ravel()):
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        if ax_idx % n_cols == 0:
            ax.spines['left'].set_visible(False)
            
        ax.set_xlabel('')
        
        if ax_idx % n_cols == 0:
            ax.set_ylabel(list_genes[ax_idx // n_cols])
        else:
            ax.set_ylabel('')
            
        if ax_idx in range(len(list_datasets)):
            ax.set_title(list_datasets[ax_idx])
        else:
            ax.set_title('')
            
        # legend unification 
        if ax_idx % n_cols == 0:
            dict_legends = {}
        
        try:
            legend = ax.get_legend()
            names, handles = [str(x._text) for x in legend.texts], legend.legendHandles  
            for i in range(len(names)):
                dict_legends[names[i]] = handles[i]
            if ax_idx % n_cols != n_cols - 1:
                ax.get_legend().remove()
            else:
                if name_order is None:
                    ax.legend(dict_legends.values(), dict_legends.keys(), bbox_to_anchor=(1.05, 1), frameon=False, prop={'size': 11})
                else:
                    ax.legend([dict_legends[i] for i in name_order if i in dict_legends.keys()], 
                              [i for i in name_order if i in dict_legends.keys()], bbox_to_anchor=(1.05, 1), frameon=False, prop={'size': 11})
        except:
            pass 
        
    
    plt.tight_layout()
    plt.savefig(fig_dir + 'clusters/' + '-'.join(list_genes) + '.png', dpi=500)
    plt.savefig(fig_dir + 'clusters/' + '-'.join(list_genes) + '.pdf')
    


In [None]:
name_order = ['Endothelial', 'Pericyte', 'Fibroblast', 'FAP', 'Tenocyte', 'Neural cell',
                                                          'Myonuclei', 'MuSC', 'Immune', 'APC / Proliferative ICs', 
                                                          'Monocyte', 'Neutrophil', 'Myeloid', 'A', 'B', 'C', 'Other']

In [None]:
makefig(['cell_type', 'Krano_type'], name_order=name_order)

In [None]:
makefig(['Krano_type', 'Cd34', 'S100b'], name_order=name_order)
makefig(['Krano_type', 'Pdgfrb', 'Sox10'], name_order=name_order)
makefig(['Krano_type', 'Ngfr', 'Cspg4'], name_order=name_order)
makefig(['Krano_type', 'Col9a2', 'Shisa3'], name_order=name_order)

In [None]:
makefig(['Krano_type', '6030408B16Rik', 'Col18a1'], name_order=name_order)
makefig(['Krano_type', 'Col9a2', 'Cldn1'], name_order=name_order)
makefig(['Krano_type', 'Dlk1', 'Fetub'], name_order=name_order)
makefig(['Krano_type', 'Gpld1', 'Grin2b'], name_order=name_order)
makefig(['Krano_type', 'Kcnb2', 'Lypd2'], name_order=name_order)
makefig(['Krano_type', 'Mansc4', 'Nipal1'], name_order=name_order)
makefig(['Krano_type', 'Saa1', 'Shisa3'], name_order=name_order)
makefig(['Krano_type', 'Tenm2', 'Trpm6'], name_order=name_order)

In [None]:
makefig(['Tnc', 'Tnmd', 'Nipal1', 'Dlk1'], name_order=name_order, 
        adata_list=[adata_oprescu_d0, adata_oprescu_d2, adata_oprescu_d35, adata_oprescu_d5, adata_oprescu_d10, adata_oprescu_d21], 
        list_datasets=['D0', 'D2', 'D3.5', 'D5', 'D10', 'D21'])