In [None]:
import scanpy as sc
import scanpy.external as sce
import pandas as pd
import numpy as np
import os
import triku as tk
import matplotlib.pyplot as plt
import matplotlib as mpl
from tqdm.notebook import tqdm
import ray
import subprocess

from IPython.display import display, HTML

from tqdm.notebook import tqdm

from bokeh.io import show, output_notebook, reset_output

reset_output()
output_notebook()

In [None]:
seed = 10

In [None]:
data_dir = 'data/'
fig_dir = 'figures/'

In [None]:
# Palettes for UMAP gene expression

magma = [plt.get_cmap('magma')(i) for i in np.linspace(0,1, 80)]
magma[0] = (0.88, 0.88, 0.88, 1)
magma = mpl.colors.LinearSegmentedColormap.from_list("", magma[:65])

In [None]:
from cellassign import assign_cats

In [None]:
de_micheli_mouse = sc.read('data/processed/de_micheli_mouse_d0.h5')
de_micheli_human = sc.read('data/processed/de_micheli_human.h5')

In [None]:
df_mouse2human = pd.read_csv('data/mouse_2_human.txt', sep=',')
df_mouse2human = df_mouse2human.dropna(how='any')
df_mouse2human = df_mouse2human[df_mouse2human['Gene name'].isin(de_micheli_mouse.var_names) & 
                                df_mouse2human['Human gene name'].isin(de_micheli_human.var_names)]

In [None]:
de_micheli_mouse = de_micheli_mouse[:, df_mouse2human['Gene name'].values]
de_micheli_human = de_micheli_human[:, df_mouse2human['Human gene name'].values]

In [None]:
de_micheli_human.var_names = de_micheli_mouse.var_names

In [None]:
de_micheli_mouse.var_names_make_unique()
de_micheli_human.var_names_make_unique()

In [None]:
de_micheli = sc.AnnData.concatenate(de_micheli_mouse, de_micheli_human, batch_key='organism', 
                                    batch_categories=['mouse', 'human'], join='outer')

In [None]:
sc.pp.pca(de_micheli, random_state=seed, n_comps=30)
sce.pp.bbknn(de_micheli, batch_key='organism',)
tk.tl.triku(de_micheli, n_procs=1, random_state=seed, use_adata_knn=True)

In [None]:
sc.tl.umap(de_micheli, min_dist=0.3, random_state=seed)
sc.tl.leiden(de_micheli, resolution=2, random_state=seed)
sc.pl.umap(de_micheli, color=['leiden', 'batch', 'organism', 'total_counts'], ncols=2)

In [None]:
A_markers = ['6030408B16Rik', 'Adamtsl2', 'Cdh19', 'Cdkn2b', 'Col18a1', 'Col26a1', 
             'Col9a2', 'Dlk1', 'Fetub', 'Gfra2', 'Gm11681', 'Gpld1', 'Greb1', 'Gria1', 
             'Kcnb2', 'Kcnk2', 'Mpzl2', 'Ngfr', 'Plppr4', 
             'Ptgfr', 'Rgs17', 'Saa1', 'Saa2', 'Shisa3', 'Sipa1l1', 'Sorcs2', 'Sox9', 
             'Sphkap', 'Syndig1', 'Trpm6']
B_markers = ['Cldn1', 'Crabp2', 'Dleu7', 'Efnb3', 'Gjb5', 'Grin2b', 'Itgb4', 'Kcnj13', 
             'Kcnj2', 'Lgals7', 'Lypd2', 'Mansc4', 'Moxd1', 'Mpzl2', 'Perp', 'Prodh', 'Ptch1', 
             'Slc6a13', 'Stra6', 'Tec', 'Tenm2', 'Wnt10a', 'Wnt6']

In [None]:
de_micheli.var_names

In [None]:
sc.pl.umap(de_micheli, color=['leiden', 'organism'], ncols=1, cmap=magma)

In [None]:
sc.pl.umap(de_micheli, color=[i for i in A_markers if i in de_micheli.var_names], ncols=3, cmap=magma)

In [None]:
sc.pl.umap(de_micheli, color=[i for i in B_markers if i in de_micheli.var_names], ncols=3, cmap=magma)

**No mapping is available and the markers in human do not correlate to mouse**