# Figure 3h

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt

sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80, color_map='viridis')

In [None]:
YS = sc.read('/lustre/scratch117/cellgen/team298/ar32/Microglia_comparison_datasets/A4_V7_YS_integrated_data_singlets_with_raw_counts_for_MS_plotting_20211111_with_obsp.h5ad')
skin = sc.read('/lustre/scratch117/cellgen/team298/ar32/Microglia_comparison_datasets/fetal_skin_all_cellxgene_20211215.h5ad')
brain = sc.read('/nfs/team298/ar32/YS/brain_objects/kriegstein/Cellbender_from_Jimmy/processed_object_20220329/raw_basic_batch_brain_cellbender_20220329.h5ad')

skin_meta = pd.read_csv('/nfs/team298/ar32/YS/YS_to_skin_microglia/skin_data_with_clean_microglia_20220405.csv', index_col=0)
brain_meta = pd.read_csv('/nfs/team298/ar32/YS/brain_objects/kriegstein/Cellbender_from_Jimmy/cellbender_from_jimmy/training_on_all_brain_cellbender_with_model_already_made/LR_indiv/brain_microglia_clean_metadata_20220405.csv', index_col=0)

In [None]:
sc.pp.normalize_total(YS)
sc.pp.log1p(YS)

skin = skin.raw.to_adata() # normalised and logged

sc.pp.log1p(brain) # already normalised

In [None]:
YS_genes = list(YS.var.index)
skin_genes = list(skin.var.index)
brain_genes = list(brain.var.index)
keep_SC_genes = list(set(YS_genes) & set(skin_genes) & set(brain_genes))
print("keep gene list = " , len(keep_SC_genes), "YS gene length = ", len(YS_genes) , "skin gene length = ", len(skin_genes), "brain gene length = ", len(brain_genes) )

# Remove non-intersecting genes (this step will remove cite-seq data if training data is pure RNA seq)
YS_intersect = YS[:, keep_SC_genes]
YS = YS_intersect
skin_intersect = skin[:, keep_SC_genes]
skin = skin_intersect
brain_intersect = brain[:, keep_SC_genes]
brain = brain_intersect

In [None]:
sc.pp.scale(YS, max_value=10)
sc.pp.scale(skin, max_value=10)
sc.pp.scale(brain, max_value=10)

In [None]:
YS = YS[YS.obs['cell.labels'].isin(['MOP', 'Promonocyte','Monocyte','Pre_Macrophage','Macrophage','Microglia'])]
YS.obs['Celltypes_to_plot'] = YS.obs['cell.labels'].astype(str) + '_YS'
YS.obs['Celltypes_to_plot'] = YS.obs['Celltypes_to_plot'].astype('category')
YS.obs['Dataset'] = 'YS'

YS_subset = YS[:]
del YS


skin.obs = skin_meta
skin = skin[skin.obs['Microglia_skin_clean'].isin(['Macrophage_clus_prediction','Microglia_clus_prediction'])]
skin.obs['Microglia_skin_clean'] = skin.obs['Microglia_skin_clean'].str.replace('_clus_prediction','_skin',regex=True)
skin.obs['Celltypes_to_plot'] = skin.obs['Microglia_skin_clean'].astype('category')
skin.obs['Dataset'] = 'skin'

skin_subset = skin[:]
del skin


brain.obs = brain_meta
brain = brain[brain.obs['clus_prediction'].isin(['Macrophage_clus_prediction','Microglia_clus_prediction'])]
brain.obs['clus_prediction'] = brain.obs['clus_prediction'].str.replace('_clus_prediction','_brain',regex=True)
brain.obs['Celltypes_to_plot'] = brain.obs['clus_prediction'].astype('category')
brain.obs['Dataset'] = 'brain'

brain_subset = brain[:]
del brain

YS = YS_subset
skin = skin_subset
brain = brain_subset

In [None]:
adata_list = [YS,skin,brain]
adata = sc.AnnData.concatenate(*adata_list, join='inner', batch_categories=None ,index_unique=None)

In [None]:
adata.obs['Celltypes_to_plot'] = adata.obs['Celltypes_to_plot'].astype('category')
adata.obs['Celltypes_to_plot'] = adata.obs['Celltypes_to_plot'].cat.reorder_categories(['MOP_YS','Promonocyte_YS','Monocyte_YS','Pre_Macrophage_YS','Macrophage_YS','Macrophage_skin','Macrophage_brain','Microglia_YS','Microglia_skin','Microglia_brain'])

genes_to_plot = [
'CD4',
'CD14',
'C1QA',
'TREM2',
'CX3CR1',
'P2RY12'
]

sc.pl.dotplot(adata, var_names=genes_to_plot, use_raw=False, groupby='Celltypes_to_plot', vmin=0 ,save='Microglia_dotplot_comparison_vmin_0_20220405.pdf')