In [1]:
import numpy as np
import pandas as pd
import pathlib
import json
import sys; sys.path.append('../../scripts')
import download_cell_type_markers
import readwrite
cfg = readwrite.config()
save_dir = cfg['markers_dir']

## Load table of cell types to query

In [2]:
df_cell_types = pd.read_csv(cfg['data_dir']+'xenium/metadata/col_palette_cell_types.csv')

In [39]:
df_cell_types = pd.Series(
    {
    'glial cell':'CL:0000125',
    'B cell':'CL:0000236',
    'dendritic cell':'CL:0000451',
    'myeloid cell':'CL:0000763',
    'endothelial cell':'CL:0000115',
    'stromal cell':'CL:0000499',
    'fibroblast':'CL:0000057',
    'granulocyte':'CL:0000094',
    'epithelial cell':'CL:0000066',
    'macrophage':'CL:0000235',
    'monocyte':'CL:0000576',
    'T cell':'CL:0000084',
    'natural killer cell':'CL:0000623',
    'pericyte':'CL:0000669',
    'plasma cell':'CL:0000786',
    'smooth muscle cell':'CL:0000192',
    'malignant cell':'CL:0001064',
    }
).to_frame().reset_index()

In [7]:
df_cell_types = pd.DataFrame(
    [
    ['glial cell','glial cell'],
    ['B cell','B cell'],
    ['dendritic cell','dendritic cell'],
    ['myeloid cell','myeloid cell'],
    ['myeloid cell','macrophage'],
    ['myeloid cell','monocyte'],
    ['endothelial cell','endothelial cell'],
    ['fibroblast','fibroblast'],
    ['epithelial cell','epithelial cell'],
    ['smooth muscle cell', 'smooth muscle cell'],
    ['granulocyte','granulocyte'],
    ['granulocyte','neutrophil'],
    ['T cell','T cell'],
    ['natural killer cell', 'natural killer cell'],
    ['pericyte','pericyte'],
    ['plasma cell','plasma cell'],
    ['malignant cell','cancer cell']],
)


## CellMarker markers

In [8]:
# load table of markers
if not pathlib.Path(save_dir+'cellmarker_canonical.csv').exists():
    df_markers_canonical, df_markers_computational = download_cell_type_markers.download_cellmarker_markers(
        canonical_url = 'http://www.bio-bigdata.center/CellMarker_download_files/file/Cell_marker_All.xlsx',
        computational_url = 'http://www.bio-bigdata.center/CellMarker_download_files/file/Cell_marker_Seq.xlsx',
        species_list=["Human"],save_dir=save_dir)
else:
    df_markers_canonical, df_markers_computational = download_cell_type_markers.load_cellmarker_markers(save_dir=save_dir)

# query table with custom name mapping for missing cell types
column = 'cell_name'
cellmarker_custom_map = {}
# cellmarker_custom_map = {  
#     'CL:0000082':'CL:0000066',
#     'CL:0005012':'CL:0000066',
#     'CL:0002553':'CL:0000057',
#     'CL:2000093':'CL:0000057',
#     'CL:0002144':'CL:0000115',
#     'CL:1001568':'CL:0000115',
# }

for tissues in ['all']:
    print('\n\n',tissues)

    if tissues !='all':
        df_markers_canonical_tissues = df_markers_canonical[df_markers_canonical['tissue_class'].isin(tissues)]
        df_markers_computational_tissues = df_markers_computational[df_markers_computational['tissue_class'].isin(tissues)]

    else:
        df_markers_canonical_tissues = df_markers_canonical
        df_markers_computational_tissues = df_markers_computational

    (cellmarker_ct_descr_canonical, 
    cellmarker_ct_markers_canonical, 
    cellmarker_ct_markers_canonical_df) = download_cell_type_markers.get_cellmarker_markers(
        df_cell_types, 
        df_markers_canonical_tissues, 
        custom_map=cellmarker_custom_map,
        column=column)

    (cellmarker_ct_descr_computational, 
    cellmarker_ct_markers_computational, 
    cellmarker_ct_markers_computational_df) = download_cell_type_markers.get_cellmarker_markers(
        df_cell_types, 
        df_markers_computational_tissues, 
        custom_map=cellmarker_custom_map,
        column=column)

    # add markers for missing cell types
    # cDC2 = 'Conventional dendritic cell 2(cDC2)'
    # cellmarker_ct_markers_canonical['CD1c-positive myeloid dendritic cell'] = list(df_markers_canonical_tissues.query(f"cell_name == '{cDC2}'")['Symbol'].dropna().unique())
    # cellmarker_ct_markers_computational['CD1c-positive myeloid dendritic cell'] = list(df_markers_computational_tissues.query(f"cell_name == '{cDC2}'")['Symbol'].dropna().unique())

    cellmarker_ct_markers = {
        'canonical':cellmarker_ct_markers_canonical,
        'computational':cellmarker_ct_markers_computational
    }

    # save markers
    if tissues !='all':
        t = '_'.join(tissues)
        save_name = f'cellmarker_cell_types_markers_{t}.json'
    else:
        save_name = 'cellmarker_cell_types_markers.json'

    path = save_dir+save_name
    with open(path, 'w') as json_file:
        json.dump(cellmarker_ct_markers, json_file)



 all


## Create lists filtered to xenium chuvio panel genes

In [26]:
xenium_chuvio_panel_genes = pd.read_csv(cfg['data_dir']+'xenium_chuvio_panel_genes.csv',index_col=0)['0'].values

files = [
'hubmap_cell_types_markers',
'cellmarker_cell_types_markers',
'cellmarker_cell_types_markers_lung',
]
for file in files:
    print(file)

    # read
    path = save_dir+file+'.json'
    with open(path, 'r') as json_file:
        ct_markers = json.load(json_file)

    # filter to chuvio genes
    for k in ['canonical','computational']:
        ct_markers[k] = {k0:[marker for marker in v0 if marker in xenium_chuvio_panel_genes] 
                                    for k0,v0 in ct_markers['canonical'].items() }

    # write
    path = save_dir+file+'_xenium_chuvio_panel_genes.json'
    with open(path, 'w') as json_file:
        json.dump(ct_markers,json_file)

hubmap_cell_types_markers
cellmarker_cell_types_markers
cellmarker_cell_types_markers_lung


## Check n° of markers per cell type

In [6]:
files = [
'hubmap_cell_types_markers',
'cellmarker_cell_types_markers',
'cellmarker_cell_types_markers_lung',
]

marker_counts = {}
for file in files:
    print(file)

    # read
    path = save_dir+file+'.json'
    with open(path, 'r') as json_file:
        ct_markers = json.load(json_file)

    path = save_dir+file+'_xenium_chuvio_panel_genes.json'
    with open(path, 'r') as json_file:
        ct_markers_chuvio = json.load(json_file)

    for k in ['canonical','computational']:

        marker_counts[file+'_'+k] = {k0:len(v0) for k0,v0 in ct_markers[k].items()}
        marker_counts[file+'_'+k+'_chuvio'] = {k0:len(v0) for k0,v0 in ct_markers_chuvio[k].items()}

marker_counts = pd.DataFrame(marker_counts).T.replace(np.nan,0)
marker_counts = marker_counts[sorted(marker_counts.columns)]
marker_counts.index = marker_counts.index.str.replace('cell_types_markers_','')
marker_counts.to_csv(save_dir+'marker_counts.csv')

hubmap_cell_types_markers
cellmarker_cell_types_markers
cellmarker_cell_types_markers_lung
