# Data load and preperation to construct scvi embeddings of lineages

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata

In [26]:
adata = anndata.read_h5ad('fetal_skin.norm.maternal_removed.20220202.h5ad')

In [43]:
adata

AnnData object with n_obs × n_vars = 186533 × 28885
    obs: 'sanger_id', 'chemistry', 'donor', 'gender', 'pcw', 'sorting', 'sample', 'chemistry_sorting', 'cell_caller', 'scrublet_score', 'cluster_scrublet_score', 'doublet_pval', 'bh_doublet_pval', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'independent_annotation_refined', 'independent_annotation_broad1', 'independent_annotation_broad2', 'independent_annotation_broad3', 'independent_annotation_broad4', 'independent_annotation_broad5', 'joint_annotation', 'is_maternal', 'fig1b_annotation', 'independent_annotation_refined_20220201a', 'independent_annotation_refined_20220201b', 'independent_annotation_broad1_20220201', 'independent_annotation_broad2_20220201', 'independent_annotation_broad3_20220201', 'joint_annotation_20220201a', 'joint_annotation_20220201b', 'fig1b_annotation_20220201', 'fig1b_annotation_20220202', 'joint_annotation_20220202', 'fig1b_annotation_v2', 'independent_annotation_refined_20220202'
  

In [None]:
# get raw count data 
#adata_raw = anndata.read_h5ad('/lustre/scratch126/cellgen/team298/SharedFolders/fetal_skin/data_for_Issac/fetal_skin_raw.h5ad')
#print(np.unique([c in adata_raw.obs_names for c in adata.obs_names]))
#adata_raw = adata_raw[adata.obs_names, adata.var_names]
#adata.layers['counts'] = adata_raw.X
#adata = adata_raw

In [41]:
fib_ctypes = {
'FRZB+ early fibroblast': 'Fibroblast',
'HOXC5+ early fibroblast': 'Fibroblast',
'WNT2+ fibroblast': 'Fibroblast',
'PEAR1+ fibroblast': 'Fibroblast',
'Pre-dermal condensate': 'Fibroblast',
'Dermal condensate': 'Fibroblast',
'Dermal papilla': 'Fibroblast',
'Myofibroblasts': 'Fibroblast'
}

kera_ctypes = {
  'Periderm': 'Keratinocyte', 
'Immature basal': 'Keratinocyte',
'Immature suprabasal': 'Keratinocyte',
'POSTN+ basal': 'Keratinocyte',
'DPYSL2+ basal': 'Keratinocyte',
'Suprabasal IFE': 'Keratinocyte',
'Outer root sheath': 'Keratinocyte',
'Companion layer': 'Keratinocyte',
'Placode/matrix': 'Keratinocyte',
'Inner root sheath': 'Keratinocyte',
'Cuticle/cortex': 'Keratinocyte'} 

endo_ctypes = {
 'Early LE': 'Lymphatic endothelium',
'LE': 'Lymphatic endothelium',
'Early endothelial cells': 'Vascular endothelium',
'Capillary arterioles': 'Vascular endothelium',
'Arterioles': 'Vascular endothelium',
'Capillaries': 'Vascular endothelium',
'Postcapillary venules': 'Vascular endothelium',
'Venules': 'Vascular endothelium'}

ANNO_COLNAME = 'joint_annotation_20220202'

def subset_adata(adata, ctypes):
    adata_sub = adata[[x in list(ctypes.keys()) for x in adata.obs[ANNO_COLNAME]]] 
    print(np.unique(adata_sub.obs[ANNO_COLNAME]))
    return adata_sub
    
print('Fibroblasts ------------ ')
adata_Fibro = subset_adata(adata, fib_ctypes)
print('Keratinocytes ------------ ')
adata_Keratino = subset_adata(adata, kera_ctypes)
print('Vascular Endothelium ------------ ')
adata_Endo = subset_adata(adata, endo_ctypes)

Fibroblasts ------------ 
['Dermal condensate' 'Dermal papilla' 'FRZB+ early fibroblast'
 'HOXC5+ early fibroblast' 'Myofibroblasts' 'PEAR1+ fibroblast'
 'Pre-dermal condensate' 'WNT2+ fibroblast']
Keratinocytes ------------ 
['Companion layer' 'Cuticle/cortex' 'DPYSL2+ basal' 'Immature basal'
 'Immature suprabasal' 'Inner root sheath' 'Outer root sheath'
 'POSTN+ basal' 'Periderm' 'Placode/matrix' 'Suprabasal IFE']
Vascular Endothelium ------------ 
['Arterioles' 'Capillaries' 'Capillary arterioles' 'Early LE'
 'Early endothelial cells' 'LE' 'Postcapillary venules' 'Venules']


In [42]:
print(adata_Fibro.shape)
print(adata_Endo.shape)
print(adata_Keratino.shape)

(95576, 28885)
(2173, 28885)
(1469, 28885)


In [170]:
adata_Fibro.write_h5ad('adata_Fibro.h5ad')
adata_Keratino.write_h5ad('adata_Keratino.h5ad')
adata_Endo.write_h5ad('adata_Endo.h5ad')