In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(color_codes=True)
from pathlib import Path

sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80, dpi_save=300, color_map='viridis')

In [None]:
YS_EL = sc.read('/home/jovyan/YS_project/YS/Data_objects/final_objects/Combined_YS_Fliver_raw_15112021.h5ad')

In [None]:
gast = sc.read('/home/jovyan/wgeted_files/gastrulation.h5ad')

In [None]:
kidney = sc.read('/home/jovyan/mount_farm/lustre/team298/SharedFolders/ar32/raw_kidney_from_pan_fetal_20220308.h5ad')
kidney.var_names_make_unique()

In [None]:
liver_rest = sc.read('/home/jovyan/mount_farm/nfs/team298/ar32/YS/All_fetal_liver_data/A5_fetal_liver_alladata_updated_annots_210721.h5ad')
liver_rest = liver_rest[~liver_rest.obs['orig.ident'].isin(['F61', 'F35', 'F32'])] # remove any cells under 'F61', 'F35', 'F32' as in combined YS_EL object

In [None]:
liver_adult = sc.read('/home/jovyan/mount_farm/nfs/team298/ar32/YS/adult_liver/data_set1_sharma_not_raw/Liver_raw_20220303.h5ad')
liver_adult = liver_adult[liver_adult.obs['NormalvsTumor'].isin(['Normal'])]

In [None]:
liver_adult.obs['patientno'].value_counts()

In [None]:
liver_adult.obs['HCCF1F2_anno'].value_counts()

In [None]:
# Some anno of cells which match/look like fetal celltypes but are definately adult cells

In [None]:
YS_EL.obs['Celltype_for_plot'] = YS_EL.obs['broad_cell_labels_organ']
gast.obs['Celltype_for_plot'] = gast.obs['corr_concat'].astype(str) + '_gast'
kidney.obs['Celltype_for_plot'] = kidney.obs['anno_to_use'].astype(str) + '_kidney'
liver_rest.obs['Celltype_for_plot'] = liver_rest.obs['cell.labels'].astype(str) + '_liver_not_matched'
liver_adult.obs['Celltype_for_plot'] = liver_adult.obs['HCCF1F2_anno'].astype(str) + '_adult_liver'

YS_EL.obs['Dataset'] = 'YS_EL'
gast.obs['Dataset'] = 'gast'
kidney.obs['Dataset'] = 'kidney'
liver_rest.obs['Dataset'] = 'liver_rest'
liver_adult.obs['Dataset'] = 'liver_adult'

In [None]:
# Define intersecting genes between datasets
YS_EL_genes = list(YS_EL.var.index)
gast_genes = list(gast.var.index)
kidney_genes = list(kidney.var.index)
liver_rest_genes = list(liver_rest.var.index)
liver_adult_genes = list(liver_adult.var.index)
keep_SC_genes = list(set(YS_EL_genes) & set(gast_genes) & set(kidney_genes) & set(liver_rest_genes) & set(liver_adult_genes))
#print("keep gene list = " , len(keep_SC_genes), "YS_EL gene length = ", len(YS_EL_genes) , "gast gene length = ", len(gast_genes) , "liver_rest gene length = ", len(liver_rest_genes), "liver_adult gene length = ", len(liver_adult_genes) )

In [None]:
YS_EL_intersect1 = YS_EL[:, keep_SC_genes]
YS_EL = YS_EL_intersect1
gast_intersect = gast[:, keep_SC_genes]
gast = gast_intersect
kidney_intersect = kidney[:, keep_SC_genes]
kidney = kidney_intersect
liver_rest_intersect2 = liver_rest[:, keep_SC_genes]
liver_rest = liver_rest_intersect2
liver_adult_intersect3 = liver_adult[:, keep_SC_genes]
liver_adult = liver_adult_intersect3

In [None]:
del YS_EL.uns
del YS_EL.obsm

del gast.obsm

del liver_rest.obsm

adata_list = [YS_EL, gast, liver_rest]
adata = sc.AnnData.concatenate(*adata_list, join='inner', batch_categories=None ,index_unique=None)

In [None]:
# normalise count data
sc.pp.normalize_total(adata)

In [None]:
# log the count data
sc.pp.log1p(adata)

In [None]:
# scale the data
sc.pp.scale(adata, max_value=10)
# scale the negative values in adata.X (which have been normalised, logged and scaled) to zero
#adata.X = np.where(adata.X < 0, 0, adata.X)

In [None]:
# normalise count data
sc.pp.normalize_total(kidney)

In [None]:
# log the count data
sc.pp.log1p(kidney)

In [None]:
# scale the data
sc.pp.scale(kidney, max_value=10)
# scale the negative values in adata.X (which have been normalised, logged and scaled) to zero
#adata.X = np.where(adata.X < 0, 0, adata.X)

In [None]:
# normalise count data
sc.pp.normalize_total(liver_adult)

In [None]:
# log the count data
sc.pp.log1p(liver_adult)

In [None]:
# scale the data
sc.pp.scale(liver_adult, max_value=10)
# scale the negative values in adata.X (which have been normalised, logged and scaled) to zero
#adata.X = np.where(adata.X < 0, 0, adata.X)

In [None]:
celltypes_to_keep = [
# YS
'Progenitors_ys',
'Lymphoid_ys',
'DC_ys',
'Monocyte_ys',
'Macrophage_ys',
'Microglia_ys',
'Granulocyte_precursors_ys',
'Mast_cell_ys',
'MK_ys',
'Erythroid_ys',
'Endothelium_ys', 
'Fibroblast_ys',
'Smooth_Muscle_ys',
'Mesothelium_ys',
'Endoderm_ys',
# Matched liver  
'Progenitor_fliv',
'Lymphoid_fliv',
'B_lymphoid_fliv',
'DC_fliv',
'Monocyte_fliv',
'Kupffer_cell_fliv',
'Granulocyte_precursor_fliv',
'Mast_cell_fliv',
'MK_fliv',
'Erythroid_fliv',
'Endothelium_fliv',
'Fibroblast_fliv',
'Hepatocyte_fliv',
# gast
'YS Endoderm_Yolk Sac_gastrulation_gast',
# Rest of liver - currently cell.labels
'Hepatocyte_liver_not_matched',
# kidney
'FIBROBLAST_XI_kidney',
# adult liver
'Hepatocytes_adult_liver',
]

In [None]:
del liver_adult.obsm
del liver_adult.uns
del liver_adult.layers


adata_list = [adata,kidney,liver_adult]
adata = sc.AnnData.concatenate(*adata_list, join='inner', batch_categories=None ,index_unique=None)

# Plot

In [None]:
genes_plot = ['F10', 'EPO', 'EGFR', 'THPO', 'TNFSF12','EPHA1']

In [None]:
len(adata.obs["Celltype_for_plot"].unique())

In [None]:
adata = adata[adata.obs['Celltype_for_plot'].isin(celltypes_to_keep)]

In [None]:
adata.obs["Celltype_for_plot"] = adata.obs["Celltype_for_plot"].astype('category')
adata.obs["Celltype_for_plot"] = adata.obs["Celltype_for_plot"].cat.reorder_categories([
# YS
'Progenitors_ys',
'Lymphoid_ys',
'DC_ys',
'Monocyte_ys',
'Macrophage_ys',
'Microglia_ys',
'Granulocyte_precursors_ys',
'Mast_cell_ys',
'MK_ys',
'Erythroid_ys',
'Endothelium_ys', 
'Fibroblast_ys',
'Smooth_Muscle_ys',
'Mesothelium_ys',
'Endoderm_ys',
# gast
'YS Endoderm_Yolk Sac_gastrulation_gast',
# Matched liver  
'Progenitor_fliv',
'Lymphoid_fliv',
'B_lymphoid_fliv',
'DC_fliv',
'Monocyte_fliv',
'Kupffer_cell_fliv',
'Granulocyte_precursor_fliv',
'Mast_cell_fliv',
'MK_fliv',
'Erythroid_fliv',
'Endothelium_fliv',
'Fibroblast_fliv',
'Hepatocyte_fliv',
# Rest of liver - currently cell.labels
'Hepatocyte_liver_not_matched',
# adult liver
'Hepatocytes_adult_liver',
# kidney
'FIBROBLAST_XI_kidney',
])

In [None]:
sc.pl.dotplot(adata, var_names=genes_plot, groupby='Celltype_for_plot', swap_axes=True, use_raw=False, save= 'fig6e_EPo_plot_1_20220310.pdf') #save= 'fig3c'