In [None]:
import os
import sys
%matplotlib inline
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import datetime
import rpy2
import seaborn as sns

sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=300, dpi_save=300)

In [None]:
protein_liver = sc.read('Liver_Cite_seq_protein_processed_20220406.h5ad')

In [None]:
protein_liver

In [None]:
protein_YS = sc.read('/nfs/team298/ar32/YS_citeseq_submission_1_anndata_objects/cite_seq_protein_simple_processed_for_submission_20220407.h5ad')

In [None]:
protein_YS.obs['individual_anno'].value_counts()

In [None]:
protein_liver.obs['individual_anno'].value_counts()

In [None]:
YS_anno = [
'early erythroid',
'Erythroid',
'Endothelium',
'Fibroblast',
'MEMP',
'MK',
'CMP',
'Mast_cell',
    'HSPC1'
#'Monocyte_0', - merge
#'Monocyte_1',
#'pDC precursor',
#'lymphoid progenitor',
#'Lymphoid',
#'lymphoid- B lin',
#'lymphoid- NK/ILC',
]

liver_anno = [
'Early_Erythroid',
'Erythroid',
'Endothelium',
'Fibroblast',
'MEMP',
'MK',
'CMP',
'Mast_cell',
'Monocyte',
'pDC_precursor',
    'HSC'
#'Pre_B',
#'Pre_pro_B_progenitor',
#'Immature_B_cell',
#'NK',
#'ILC_precursor'
]

In [None]:
# broad
# pull over indiv
# remove non intersect

In [None]:
protein_YS.obs['To_plot_anno'] = protein_YS.obs['broad_anno'].astype(str)

for anno in YS_anno:
    protein_YS.obs.loc[protein_YS.obs['individual_anno'].isin([anno]), 'To_plot_anno'] = anno

In [None]:
protein_YS.obs['To_plot_anno'].value_counts()

In [None]:
celltypes_to_remove = [
'Progenitors',
'Endoderm',
'Smooth_Muscle',
'Microglia',
'Mesothelium'
]

protein_YS = protein_YS[~protein_YS.obs['To_plot_anno'].isin(celltypes_to_remove)]
protein_YS.obs['To_plot_anno'].value_counts()

In [None]:
protein_liver.obs['To_plot_anno'] = protein_liver.obs['broad_anno'].astype(str)

for anno in liver_anno:
    protein_liver.obs.loc[protein_liver.obs['individual_anno'].isin([anno]), 'To_plot_anno'] = anno

In [None]:
protein_liver.obs['To_plot_anno'].value_counts()

In [None]:
celltypes_to_remove = [
'Progenitors',
'B_lymphoid',
'DC',
'Granulocyte_precursors',
'Hepatocyte'
]

protein_liver = protein_liver[~protein_liver.obs['To_plot_anno'].isin(celltypes_to_remove)]
protein_liver.obs['To_plot_anno'].value_counts()

In [None]:
protein_markers = [
'CD61',
'CD49b',
'CD117',
'CD146',
'CD38',
'CD35',
'CD9',
'CD28',
'CD45RA',
'TCR_Vg9',
'CD235ab',
'CD49d',
'CD141',
'CD71',
'CD123',
'CD47',
'CD43',
'CD34',
'CD48',
'CD49a',
'CD62L',
'CD201',
'MERTK',
'CD43',
'CD29',
]

In [None]:
sc.pl.dotplot(protein_liver, var_names=protein_markers, groupby='To_plot_anno', cmap='Blues', title='Liver')
sc.pl.dotplot(protein_YS, var_names=protein_markers, groupby='To_plot_anno', cmap='Blues', title='YS')

In [None]:
protein_YS.obs['Dotplot_broad_anno_with_dataset'] = protein_YS.obs['broad_anno'].astype(str) + '_' + 'YS'
protein_liver.obs['Dotplot_broad_anno_with_dataset'] = protein_liver.obs['broad_anno'].astype(str) + '_' + 'liver'
protein_YS.obs['Dotplot_indiv_anno_with_dataset'] = protein_YS.obs['individual_anno'].astype(str) + '_' + 'YS'
protein_liver.obs['Dotplot_indiv_anno_with_dataset'] = protein_liver.obs['individual_anno'].astype(str) + '_' + 'liver'
protein_YS.obs['To_plot_anno'] = protein_YS.obs['To_plot_anno'].astype(str) + '_' + 'YS'
protein_liver.obs['To_plot_anno'] = protein_liver.obs['To_plot_anno'].astype(str) + '_' + 'liver'

In [None]:
adata_list = [protein_YS, protein_liver]
protein_both = sc.AnnData.concatenate(*adata_list, join='inner', batch_categories=None ,index_unique=None)

In [None]:
sc.pl.dotplot(protein_both, var_names=protein_markers, groupby='To_plot_anno', cmap='Blues')

In [None]:
celltypes = [
'HSPC1_YS',
'HSC_liver',
'CMP_YS',
'CMP_liver',
'MEMP_YS',
'MEMP_liver',
'Lymphoid_YS',
'Lymphoid_liver',
'pDC precursor_YS',
'pDC_precursor_liver',
'Monocyte_YS',
'Monocyte_liver',
'Macrophage_YS',
'Kupffer_cell_liver',
'Mast_cell_YS',
'Mast_cell_liver',
'MK_YS',
'MK_liver',
'early erythroid_YS',
'Early_Erythroid_liver',
'Erythroid_YS',
'Erythroid_liver',
'Endothelium_YS',
'Endothelium_liver',
'Fibroblast_YS',
'Fibroblast_liver']



protein_both.obs['To_plot_anno'] = protein_both.obs['To_plot_anno'].cat.reorder_categories(celltypes)

In [None]:
sc.pl.dotplot(protein_both, var_names=protein_markers, groupby='To_plot_anno', cmap='Blues', save='Decision_tree_protein_markers_on_EL_cite_seq_20220407.pdf')