# Build feature table

In [None]:
import os
import pandas as pd
import numpy as np
import skimage.io as io

In [None]:
base_dir = "../data"
seg_dir = os.path.join(base_dir,"segmentation_masks")

cell_tab_path = os.path.join(base_dir,"tables","cell_table_size_normalized.csv")
cell_tab = pd.read_csv(cell_tab_path)

metadata_path = os.path.join(base_dir,"tables","metadata.csv")
metadata = pd.read_csv(metadata_path)

feature_tab = metadata.copy()

## Cell density

In [None]:
tissue_area_path = os.path.join(base_dir,"tables","tissue_area.csv")
tissue_area = pd.read_csv(tissue_area_path)

counts = cell_tab.groupby(['fov','cell_meta_cluster']).size().reset_index(name='count')
counts = pd.merge(counts, tissue_area[['fov','tissue_area']], on='fov', how='left')
counts['density'] = counts['count'] / counts['tissue_area'] / 0.64 #0.64 because each FOV is 800x800um

counts['feature_name'] = [x+"_density" for x in counts['cell_meta_cluster']]

In [None]:
counts_wide = counts.pivot(index='fov', columns='feature_name', values='density')
counts_wide = counts_wide.fillna(0)
feature_tab = pd.merge(feature_tab, counts_wide, on='fov', how='left')

feature_tab

## Cell ratios

In [None]:
# Get cell totals
counts = cell_tab.groupby(['fov','cell_meta_cluster']).size().reset_index(name='count')
counts_wide = counts.pivot(index='fov', columns='cell_meta_cluster', values='count')
counts_wide = counts_wide.fillna(0)
counts_wide['total_cd4t'] = counts_wide[['CD4T','Foxp3','Tfh']].sum(axis=1)
counts_wide['total_t'] = counts_wide[['CD4T','CD8T','Foxp3','Tfh','CD3_other']].sum(axis=1)
counts_wide['total_mono_mac_dc'] = counts_wide[['APC','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD68']].sum(axis=1)

In [None]:
# Specify ratios we want here
ratios = {'CD4T_CD8T_ratio':['total_cd4t','CD8T'],
          'Treg_CD4T_ratio':['Foxp3','total_cd4t'],
          'T_myeloid_ratio':['total_t','total_mono_mac_dc']}

for name,celltypes in ratios.items():
    counts_wide[name] = counts_wide[celltypes[0]] / counts_wide[celltypes[1]]
feature_tab = pd.merge(feature_tab, counts_wide[list(ratios.keys())], on='fov', how='left')
feature_tab

In [None]:
# Same thing but only in the follicle region
cell_tab_follicle = cell_tab[cell_tab['in_follicle_mask']==True]
# Get cell totals
counts = cell_tab_follicle.groupby(['fov','cell_meta_cluster']).size().reset_index(name='count')
counts_wide = counts.pivot(index='fov', columns='cell_meta_cluster', values='count')
counts_wide = counts_wide.fillna(0)
counts_wide['total_cd4t'] = counts_wide[['CD4T','Foxp3','Tfh']].sum(axis=1)
counts_wide['total_t'] = counts_wide[['CD4T','CD8T','Foxp3','Tfh','CD3_other']].sum(axis=1)
counts_wide['total_mono_mac_dc'] = counts_wide[['APC','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD68']].sum(axis=1)

In [None]:
# Specify ratios we want here
ratios = {'CD4T_CD8T_ratio_follicle':['total_cd4t','CD8T'],
          'Treg_CD4T_ratio_follicle':['Foxp3','total_cd4t'],
          'T_myeloid_ratio':['total_t','total_mono_mac_dc']}

for name,celltypes in ratios.items():
    counts_wide[name] = counts_wide[celltypes[0]] / counts_wide[celltypes[1]]
feature_tab = pd.merge(feature_tab, counts_wide[list(ratios.keys())], on='fov', how='left')
feature_tab = feature_tab.fillna(0)
feature_tab

## Functional marker positivity (thresholded) cell proportion

In [None]:
func_tab_path = os.path.join(base_dir,"tables","nimbus_binarized.csv")
func_tab = pd.read_csv(func_tab_path)

func_markers = func_tab.columns.values
func_markers = [x for x in func_markers if x not in ['fov','cell_meta_cluster','label']]

func_counts = func_tab.groupby(['fov','cell_meta_cluster'])[func_markers].sum(min_count=1).reset_index()
func_long = pd.melt(func_counts, id_vars=['fov','cell_meta_cluster'], var_name='marker', value_name='value')

cell_counts = func_tab.groupby(['fov','cell_meta_cluster']).size().reset_index(name='total_count')
func_long = pd.merge(func_long, cell_counts, on=['fov','cell_meta_cluster'], how='left')
func_long['prop'] = func_long['value'] / func_long['total_count']
func_long = func_long.dropna() #na means that we didn't care about the marker/cell combination
func_long['feature_name'] = func_long['marker'] + 'pos_' + func_long['cell_meta_cluster'] + "_prop"

In [None]:
func_wide = func_long.pivot(index='fov', columns='feature_name', values='prop')
func_wide = func_wide.fillna(0)
feature_tab = pd.merge(feature_tab, func_wide, on='fov', how='left')

feature_tab

## Functional marker positivity (thresholded) cell density

In [None]:
func_tab_path = os.path.join(base_dir,"tables","nimbus_binarized.csv")
func_tab = pd.read_csv(func_tab_path)

func_markers = func_tab.columns.values
func_markers = [x for x in func_markers if x not in ['fov','cell_meta_cluster','label']]

func_counts = func_tab.groupby(['fov','cell_meta_cluster'])[func_markers].sum(min_count=1).reset_index()
func_long = pd.melt(func_counts, id_vars=['fov','cell_meta_cluster'], var_name='marker', value_name='value')
func_long = pd.merge(func_long, tissue_area[['fov','tissue_area']], on='fov', how='left')
func_long['density'] = func_long['value'] / func_long['tissue_area'] / 0.64
func_long = func_long.dropna() #na means that we didn't care about the marker/cell combination
func_long['feature_name'] = func_long['marker'] + 'pos_' + func_long['cell_meta_cluster'] + "_density"

In [None]:
func_wide = func_long.pivot(index='fov', columns='feature_name', values='density')
func_wide = func_wide.fillna(0)
feature_tab = pd.merge(feature_tab, func_wide, on='fov', how='left')

feature_tab

## Functional marker average expression

In [None]:
cell_func_marker_dict = {'Biotin':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Endothelial','FDC','Foxp3','Mast','NK','Neutrophils','SMA','Tfh'],
                         'Caspase1':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Endothelial','FDC','Foxp3','Mast','NK','Neutrophils','SMA','Tfh'],
                         'CD45RO':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Foxp3','Neutrophils','Tfh'],
                         'CD69':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Foxp3','NK','Tfh'],
                         'CD86':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Endothelial','FDC','Foxp3','Neutrophils','SMA','Tfh'],
                         'Galectin9':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Endothelial','FDC','Foxp3','Mast','NK','Neutrophils','SMA','Tfh'],
                         'Glut1':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Endothelial','FDC','Foxp3','Mast','NK','Neutrophils','SMA','Tfh'],
                         'GranzymeB':['CD8T','NK'],
                         'ICOS':['CD4T','CD8T','Foxp3','Tfh'],
                         'IDO':['APC','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD68','Endothelial','FDC','Mast','NK','Neutrophils','SMA'],
                         'IFNg':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','FDC','Foxp3','Mast','NK','Neutrophils','Tfh'],
                         'Ki67':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Endothelial','FDC','Foxp3','Mast','NK','Neutrophils','SMA','Tfh'],
                         'Lag3':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','FDC','Foxp3','NK','Tfh'],
                         'NLRP3':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Endothelial','FDC','Foxp3','Mast','NK','Neutrophils','SMA','Tfh'],
                         'PD1':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Foxp3','NK','Neutrophils'],
                         'TCF1TCF7':['CD4T','CD8T','Foxp3','Tfh'],
                         'TIGIT':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Endothelial','FDC','Foxp3','Mast','NK','SMA','Tfh'],
                         'TIM3':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Endothelial','FDC','Foxp3','Mast','NK','Neutrophils','SMA','Tfh'],
                         'Vimentin':['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD4T','CD68','CD8T','Endothelial','FDC','Foxp3','Mast','NK','Neutrophils','SMA','Tfh']}
func_markers = list(cell_func_marker_dict.keys())

In [None]:
func_cell_tab = cell_tab[['fov','cell_meta_cluster']+func_markers]
func_cell_tab = func_cell_tab.groupby(['fov','cell_meta_cluster']).mean().reset_index()
func_cell_tab_long = pd.melt(func_cell_tab, id_vars=['fov','cell_meta_cluster'], var_name='marker', value_name='average_expression')
func_cell_tab_long['feature_name'] = func_cell_tab_long['marker']+"_exp_"+func_cell_tab_long['cell_meta_cluster']

# Remove marker/cell combinations we don't care about
keep_features = [f"{marker}_exp_{cell}" for marker, cells in cell_func_marker_dict.items() for cell in cells]
func_cell_tab_long = func_cell_tab_long[func_cell_tab_long['feature_name'].isin(keep_features)]

In [None]:
func_cell_tab_wide = func_cell_tab_long.pivot(index='fov', columns='feature_name', values='average_expression')
func_cell_tab_wide = func_cell_tab_wide.fillna(0)
feature_tab = pd.merge(feature_tab, func_cell_tab_wide, on='fov', how='left')

feature_tab

## Cell density in follcile (Ki67+CD21+CD14+) or extrafollicular region

In [None]:
cells_to_analyze = ['APC','B','CD11c','CD11c_CD14','CD11c_CD68','CD14','CD14_CD68_CD163','CD68','CD4T','CD8T','Foxp3','Tfh']

one_mapping = pd.read_csv("../data/mantis/sample1_fov1/population_cell_mask.csv")
one_mapping_keep = one_mapping[one_mapping['region_name'].isin(cells_to_analyze)]
ids_to_analyze = one_mapping_keep['region_id']
cell_to_id_dict = dict(zip(one_mapping_keep['region_id'], one_mapping_keep['region_name']))

cell_overlay_dir = "../data/mantis"
follicle_mask_dir = "../data/region_masks/combined_regions_individual"
gold_mask_dir = "../data/region_masks/gold/thresh350"
all_fovs = feature_tab['fov'].values

all_dfs = []
for fov in all_fovs:
    print(fov)
    follicle_mask = io.imread(os.path.join(follicle_mask_dir, fov, "Ki67_CD21_CD14.tiff"))
    
    if np.sum(follicle_mask)!=0:
        seg_mask = io.imread(os.path.join(seg_dir, fov+"_whole_cell.tiff"))
        cell_mask = io.imread(os.path.join(cell_overlay_dir, fov, "population_cell_mask.tiff"))
        gold_mask = io.imread(os.path.join(gold_mask_dir, fov+".tiff"))

        follicle_dict = {}
        extrafol_dict = {}

        for one_id in ids_to_analyze:
            copy_seg = seg_mask.copy()

            # Only keep cells that are of a certain type
            copy_seg[cell_mask != one_id] = 0

            # Get all cells of this cell type
            all_cells = np.unique(copy_seg)
            all_cells = [x for x in all_cells if x!=0]

            # Only keep cells in the follicle
            copy_seg[follicle_mask==0] = 0

            # Get cells in the follicle
            follicle_cells = np.unique(copy_seg)
            follicle_cells = [x for x in follicle_cells if x!=0]
            follicle_dict[one_id] = len(follicle_cells)

            # Get cells outside the follicle
            extrafol_cells = [x for x in all_cells if x not in follicle_cells]
            extrafol_dict[one_id] = len(extrafol_cells)

        follicle_df = pd.DataFrame(list(follicle_dict.items()), columns=['region_id', 'follicle_count'])
        extrafol_df = pd.DataFrame(list(extrafol_dict.items()), columns=['region_id', 'extrafollicular_count'])

        fov_df = pd.merge(follicle_df, extrafol_df, on='region_id')
        fov_df['fov'] = fov
        fov_df['follicle_region_area'] = np.sum(follicle_mask==1)
        fov_df['extrafolliclular_region_area'] = np.sum(gold_mask==0) - np.sum(follicle_mask==1)

        all_dfs.append(fov_df)

final_df = pd.concat(all_dfs, ignore_index=True)
final_df = pd.merge(final_df, one_mapping_keep, on='region_id')

final_df['density_follicle'] = final_df['follicle_count'] / final_df['follicle_region_area'] / ((800/2048)**2) * 1e6 #Each FOV is 800x800um, 2048x2048px
final_df['density_extrafollicular'] = final_df['extrafollicular_count'] / final_df['extrafolliclular_region_area'] / ((800/2048)**2) * 1e6

final_df_melt = final_df.melt(
    id_vars=['fov','region_name'],
    value_vars=['density_follicle', 'density_extrafollicular'],
    var_name='density_type',
    value_name='density_value'
)
final_df_melt['feature_name'] = final_df_melt['region_name']+"_"+final_df_melt['density_type']

In [None]:
follicle_tab_wide = final_df_melt.pivot(index='fov', columns='feature_name', values='density_value')
feature_tab = pd.merge(feature_tab, follicle_tab_wide, on='fov', how='left')

feature_tab

## Number of cells in each spatial-LDA microenvironment

In [None]:
lda_tab_path = os.path.join(base_dir,"tables","cell_table_spatial_lda.csv")
lda_tab = pd.read_csv(lda_tab_path)

lda_count = lda_tab.groupby(['fov','lda_me']).size().reset_index(name="count")
lda_count = pd.merge(lda_count, tissue_area[['fov','tissue_area']], on='fov', how='left')
lda_count['density'] = lda_count['count'] / lda_count['tissue_area'] / 0.64
lda_count['feature_name'] = ['lda_me_'+x for x in lda_count['lda_me']]
lda_count['feature_name'] = lda_count['feature_name'].str.replace('-', '_')

In [None]:
lda_count_wide = lda_count.pivot(index='fov', columns='feature_name', values='density')
lda_count_wide = lda_count_wide.fillna(0)
feature_tab = pd.merge(feature_tab, lda_count_wide, on='fov', how='left')

feature_tab

## Cell-cell enrichment scores

In [None]:
enrichment_score_table_dir = os.path.join(base_dir,"spatial_analysis","cell_cell_enrichment_20um","tables")
all_files = os.listdir(enrichment_score_table_dir)

all_df = feature_tab[['fov']].copy()
for file_path in all_files:
    filename = os.path.splitext(os.path.basename(file_path))[0]
    one_df = pd.read_csv(os.path.join(enrichment_score_table_dir, file_path))
    one_df = one_df.rename(columns={'z': "es_"+filename})
    one_df = one_df.drop(['pheno1', 'pheno2'], axis=1)
    all_df = pd.merge(all_df, one_df, how='left', on='fov')

In [None]:
all_df = all_df.fillna(0)
all_df = all_df.replace([np.inf, -np.inf], 0)
feature_tab = pd.merge(feature_tab, all_df, on='fov', how='left')

feature_tab

## Neighborhood diversity

In [None]:
diversity_tab_path = os.path.join(base_dir,"spatial_analysis","cell_neighbor_analysis","neighborhood_diversity_cell_meta_cluster_radius50.csv")
diversity_tab = pd.read_csv(diversity_tab_path)
diversity_tab = diversity_tab.drop('label', axis=1)

diversity_tab_mean = diversity_tab.groupby(['fov','cell_meta_cluster']).mean().reset_index()
diversity_tab_mean['feature_name'] = diversity_tab_mean['cell_meta_cluster']+"_diversity"

In [None]:
diversity_tab_mean_wide = diversity_tab_mean.pivot(index='fov', columns='feature_name', values='diversity_cell_meta_cluster')
diversity_tab_mean_wide = diversity_tab_mean_wide.fillna(0)
feature_tab = pd.merge(feature_tab, diversity_tab_mean_wide, on='fov', how='left')

feature_tab

## Save final table

In [None]:
# Remove features that are all 0
feature_tab = feature_tab.loc[:, (feature_tab != 0).any(axis=0)]
feature_tab.to_csv("../data/tables/feature_tab.csv", index=False)