In [1]:
import pandas as pd
import scanpy as sc
import numpy as np

In [2]:
# Marker genes:
# not a basic cell type --> 'other'
path_mg = "/home/wout/Documents/Thesis_lokaal/Mouse_Liver_Resolve_Data/markerGeneListMartinNoLow.csv"
marker_genes = pd.read_csv(path_mg, sep=',', index_col=0)
# print column names
print(marker_genes.columns)

Index(['portal_vein_EC45', 'LSEC45', 'LSEC Portal', 'LSEC Central',
       'central_vein_EC45', 'stellateAll', 'stellate PeriPortal',
       'stellate PeriCentral', 'FibroblastAll', 'fibroblastPortal',
       'FibroblastsCentral', 'VSMC', 'capsular_fibroblasts45',
       'Mesothelial cells', 'Hepatocytes', 'HepatocytesPortal',
       'HepatocytesCentral', 'Cholangiocytes', 'cDC2s', 'Mig. cDCs',
       'Neutrophils', 'Basophils', 'NK cells', 'ILC1s', 'T cells', 'pDCs',
       'B cells', 'cDC1s', 'Kupffer cells', 'Capsule and Central Vein Mac',
       'Portal LAM macrophages', 'Monocytes', 'LECs'],
      dtype='object')


In [3]:
# make a new dataframe
marker_genes_basic = pd.DataFrame()
marker_genes_basic['Hepa'] = marker_genes['Hepatocytes'] + marker_genes['HepatocytesPortal'] + marker_genes['HepatocytesCentral']
marker_genes_basic['LSEC'] = marker_genes['LSEC45'] + marker_genes['LSEC Portal'] + marker_genes['LSEC Central']
marker_genes_basic['Endo'] = marker_genes['portal_vein_EC45'] + marker_genes['central_vein_EC45'] + marker_genes['LECs']
marker_genes_basic['Chol'] = marker_genes['Cholangiocytes']  
marker_genes_basic['Kupf'] = marker_genes['Kupffer cells']
marker_genes_basic['Stel'] = marker_genes['stellateAll'] + marker_genes['stellate PeriPortal'] + marker_genes['stellate PeriCentral'] 
marker_genes_basic['Fibr'] = marker_genes['FibroblastAll'] + marker_genes['fibroblastPortal'] + marker_genes['FibroblastsCentral'] + marker_genes['capsular_fibroblasts45']
marker_genes_basic['B cell'] = marker_genes['B cells']
marker_genes_basic['Oth'] = marker_genes['VSMC'] + marker_genes['Mesothelial cells'] + marker_genes['cDC1s'] + marker_genes['cDC2s'] + marker_genes['Mig. cDCs'] + marker_genes['Neutrophils'] + marker_genes['Basophils'] + marker_genes['NK cells'] + marker_genes['ILC1s'] + marker_genes['T cells'] + marker_genes['pDCs'] + marker_genes['Capsule and Central Vein Mac'] + marker_genes['Portal LAM macrophages'] + marker_genes['Monocytes']  
marker_genes_basic
# if value in marker_genes_basic is > 1, set to 1
marker_genes_basic[marker_genes_basic > 1] = 1
print(marker_genes_basic.columns)
# save to csv
marker_genes_basic.to_csv("/home/wout/Documents/Thesis_lokaal/Mouse_Liver_Resolve_Data/markerGeneListBasic.csv")

Index(['Hepa', 'LSEC', 'Endo', 'Chol', 'Kupf', 'Stel', 'Fibr', 'B cell',
       'Oth'],
      dtype='object')


In [4]:
ad_sc = sc.read('/home/wout/Documents/Thesis_lokaal/Data_to_create_liver_atlas/adataAtlasRESOLVE.h5ad')
ad_sc=ad_sc[ad_sc.obs.typeSample=='nucSeq']


In [5]:
ad_sc

View of AnnData object with n_obs × n_vars = 18666 × 31053
    obs: 'annot', 'typeSample', 'cluster_orig', 'annotation_CD45Neg', 'annotation_Fibro', 'annotation'
    uns: 'annot_colors'
    obsm: 'X_umap'

In [6]:
np.unique(ad_sc.obs['annotation'])

array(['B cells', 'Cholangiocytes', 'Endothelial cells', 'Hepatocytes',
       'HsPCs', 'Kupffer cells', 'LECs', 'LSECs', 'Mesothelial cells',
       'Other_ImmuneCells', 'VSMC', 'central_vein_EC45', 'fibroblast',
       'portal_vein_EC45', 'stellate'], dtype=object)

In [7]:
# substitute values in ad_sc.obs['annot'] 
ad_sc.obs['annotation'] = ad_sc.obs['annotation'].replace(['stellate'], 'Stel')
ad_sc.obs['annotation'] = ad_sc.obs['annotation'].replace(['LECs','central_vein_EC45','portal_vein_EC45','Endothelial cells'], 'Endo')
ad_sc.obs['annotation'] = ad_sc.obs['annotation'].replace(['fibroblast'], 'Fibr')
ad_sc.obs['annotation'] = ad_sc.obs['annotation'].replace(['HsPCs','Mesothelial cells','Other_ImmuneCells','VSMC'], 'Oth')
ad_sc.obs['annotation'] = ad_sc.obs['annotation'].replace(['Cholangiocytes'], 'Chol')
ad_sc.obs['annotation'] = ad_sc.obs['annotation'].replace(['Hepatocytes'], 'Hepa')
ad_sc.obs['annotation'] = ad_sc.obs['annotation'].replace(['Kupffer cells'], 'Kupf')
ad_sc.obs['annotation'] = ad_sc.obs['annotation'].replace(['B cells'], 'B cell')
ad_sc.obs['annotation'] = ad_sc.obs['annotation'].replace(['LSECs'], 'LSEC')

np.unique(ad_sc.obs['annotation'])

  ad_sc.obs['annotation'] = ad_sc.obs['annotation'].replace(['stellate'], 'Stel')


array(['B cell', 'Chol', 'Endo', 'Fibr', 'Hepa', 'Kupf', 'LSEC', 'Oth',
       'Stel'], dtype=object)

In [8]:
# calculate proportions of each value in column 'annotation' in ad_sc
ad_sc.obs['annotation'].value_counts(normalize=True)

Hepa      0.579985
Stel      0.137683
LSEC      0.092414
Oth       0.061609
Endo      0.032894
Kupf      0.032037
Fibr      0.030430
Chol      0.026733
B cell    0.006215
Name: annotation, dtype: float64

In [9]:
# to dataframe
df_atlas = pd.DataFrame(ad_sc.obs['annotation'].value_counts(normalize=True))
df_atlas.sort_index(inplace=True)


In [10]:
# change name of column 
df_atlas.columns = ['atlas']
# write to csv
df_atlas.to_csv("/home/wout/Documents/Thesis_lokaal/Mouse_Liver_Resolve_Data/basic_annotation_percentage_atlas.csv")