In [None]:
import scanpy as sc
import scanpy.external as sce
import pandas as pd
import numpy as np
import os
import shutil
import triku as tk
import matplotlib.pyplot as plt
import matplotlib as mpl
import subprocess
from scipy.sparse import csr_matrix
from IPython.display import display, HTML
import mygene as mg

from tqdm import tqdm
# from tqdm.notebook import tqdm

from bokeh.io import show, output_notebook, reset_output

from scipy.sparse import csr_matrix, csc_matrix

reset_output()
output_notebook()

In [None]:
from cellassign import assign_cats

In [None]:
magma = [plt.get_cmap('magma')(i) for i in np.linspace(0,1, 80)]
magma[0] = (0.88, 0.88, 0.88, 1)
magma = mpl.colors.LinearSegmentedColormap.from_list("", magma[:65])

seed = 0

In [None]:
mpl.rcParams['figure.dpi'] = 100

## Oprescu adata load

In [None]:
data_dir = 'data/'
oprescu_dir = data_dir + '/oprescu'

In [None]:
adata_oprescu = sc.read_loom(oprescu_dir + '/adata_oprescu.loom')

In [None]:
adata_oprescu.obs['batch'] = [i.split('_')[0] for i in adata_oprescu.obs_names]

In [None]:
# Basic QC filtering
adata_oprescu.var['mt'] = adata_oprescu.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata_oprescu, qc_vars=['mt'], percent_top=None, inplace=True)

In [None]:
sc.pl.violin(adata_oprescu, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

sc.pl.scatter(adata_oprescu, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata_oprescu, x='total_counts', y='n_genes_by_counts', color='batch')

In [None]:
sc.pp.filter_cells(adata_oprescu, min_genes=150)

In [None]:
adata_oprescu_d0 = adata_oprescu[adata_oprescu.obs['batch'] == 'Noninjured'].copy()
adata_oprescu_d05 = adata_oprescu[adata_oprescu.obs['batch'] == 'X0.5.DPI'].copy()
adata_oprescu_d2 = adata_oprescu[adata_oprescu.obs['batch'] == 'X2.DPI'].copy()
adata_oprescu_d35 = adata_oprescu[adata_oprescu.obs['batch'] == 'X3.5.DPI'].copy()
adata_oprescu_d5 = adata_oprescu[adata_oprescu.obs['batch'] == 'X5.DPI'].copy()
adata_oprescu_d10 = adata_oprescu[adata_oprescu.obs['batch'] == 'X10.DPI'].copy()
adata_oprescu_d21 = adata_oprescu[adata_oprescu.obs['batch'] == 'X21.DPI'].copy()

In [None]:
for adata_oprescu in [adata_oprescu_d0, adata_oprescu_d05, adata_oprescu_d2, adata_oprescu_d35, adata_oprescu_d5, adata_oprescu_d10, adata_oprescu_d21]:
    print(adata_oprescu_d0)
    sc.pp.filter_genes(adata_oprescu, min_counts=1)
    sc.pp.normalize_per_cell(adata_oprescu)
    sc.pp.log1p(adata_oprescu)
    
    sc.pp.pca(adata_oprescu, random_state=seed, n_comps=30)
    sc.pp.neighbors(adata_oprescu, random_state=seed, n_neighbors=int(len(adata_oprescu) ** 0.5 // 2), metric='cosine')
    tk.tl.triku(adata_oprescu)
    
    sc.tl.umap(adata_oprescu, min_dist=0.1, random_state=seed)
    sc.tl.leiden(adata_oprescu, resolution=1, random_state=seed)
    sc.pl.umap(adata_oprescu, color=['leiden', 'batch', 'n_counts'], legend_loc='on data')

In [None]:
dict_cats_general = {'Lum+ FAP': ['Apod', 'Lum', 'Ly6a', 'Pdgfra', 'Mfap5', 'Dcn'], 
                     'Prg4+ FAP': ['Prg4', 'Fbn1', 'Ly6a', 'Pdgfra', 'Mfap5', 'Dcn'], 
                     'Endothelial': ['Pecam1', 'Kdr', 'Fabp4', 'Cav1', 'Cdh5', 'Tek'], 
                     'Pericyte': ['Rgs5', 'Notch3', 'Myl9', 'Ndufa4l2', 'Itga7', 'Myh11', 'Pln', 'Abcc9'], 
                     'Satellite cell': ['Pax7', 'Myod1', 'Chodl', 'Vcam1', 'Sdc4', 'Myf5',], 
                     'Myonuclei': ['Tnnc2', 'Myh4', 'Acta1', 'Ckm', 'Tpm2', 'Eno3', 'Slc25a4'], 
                     'Tenocyte': ['Scx', 'Tnmd', 'Mkx', 'Col12a1', 'Col1a1', 'Tnc', 'Fmod', 'Comp'], 
                     'Neural cell': ['Mpz', 'Ptn', 'S100b'], 
                     'Glial cell': ['Plp1', 'Kcna1', 'S100b', 'Mbp', 'Mpz',],
                     'Guide cell': ['Ncam2'],
                     'Immune': ['H2-Aa', 'Cd74'], 
                     'APC': ['H2-Eb1', 'H2-Ab1'],
                     'APC / Proliferative ICs': ['Mki67', 'Top2a'], 
                     'B cell': ['Cd19', 'Cd22', 'Ms4a1', 'Ptprc'], 
                     'T cell': ['Cd3d', 'Cd3e', 'Cd3g', 'Cd8a', 'Cd4', 'Ptprc', 'Cd28'], 
                     'Monocyte': ['Csf1r', 'Adgre1'], 
                     'Macrophage': ['Itgam', 'Csf1r', 'Adgre1', 'Itgb1', 'Cd68'],
                     'Myeloid': ['Clec12a', 'Acp5'], 
                     'Neutrophil': ['S100a8', 'S100a9', 'Itgam', 'Cd14', ], 
                     'Epcam+': ['Epcam']}

In [None]:
A_markers = ['6030408B16Rik', 'Col9a2', 'Dlk1', 'Shisa3',  'Saa1',  'Nipal1']
A_markers_extra = ['Kcnk2',  # Not specific enough
                   'Adamtsl2',  # Not specific enough
                   'Cst6',  # Teno marker
                   'Sorcs2',  # Not specific enough
                   'Susd5',  # Not specific enough
                   'Rgs17',  # Not specific enough
                   'Gfra2']  # Marks immune population
B_markers = ['Lypd2', 'Wnt6', 'Cldn1', 'Moxd1', 'Mansc4', 'Dleu7', 'Efnb3', 'Stra6', 'Sbspon', 'Ace2', 'Hcn4', 'Cldn22', 'Wnt10a', 'Ocln']  

### A_markers

In [None]:
fig, axs = plt.subplots(4, 1, figsize=(1, 4))


In [None]:
sc.pl.umap(adata_oprescu_d0, color=['leiden', 'Tnc', 'Tnmd', 'Pdgfra', 'Lum', 'Prg4', 'Pdpn'] + [i for i in A_markers if i in adata_oprescu_d0.var_names], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_oprescu_d05, color=['leiden', 'Tnc', 'Tnmd', 'Pdgfra', 'Lum', 'Prg4', 'Pdpn'] + [i for i in A_markers if i in adata_oprescu_d05.var_names], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_oprescu_d2, color=['leiden', 'Tnc', 'Tnmd', 'Pdgfra', 'Lum', 'Prg4', 'Pdpn'] + [i for i in A_markers if i in adata_oprescu_d2.var_names], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_oprescu_d35, color=['leiden', 'Tnc', 'Tnmd', 'Pdgfra', 'Lum', 'Prg4', 'Pdpn'] + [i for i in A_markers if i in adata_oprescu_d35.var_names], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_oprescu_d5, color=['leiden', 'Tnc', 'Tnmd', 'Pdgfra', 'Lum', 'Prg4', 'Pdpn'] + [i for i in A_markers if i in adata_oprescu_d5.var_names], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_oprescu_d10, color=['leiden', 'Tnc', 'Tnmd', 'Pdgfra', 'Lum', 'Prg4', 'Pdpn'] + [i for i in A_markers if i in adata_oprescu_d10.var_names], legend_loc='on data', ncols=3, cmap=magma)

In [None]:
sc.pl.umap(adata_oprescu_d21, color=['leiden', 'Tnc', 'Tnmd', 'Pdgfra', 'Lum', 'Prg4', 'Pdpn'] + [i for i in A_markers if i in adata_oprescu_d21.var_names], legend_loc='on data', ncols=3, cmap=magma)