# p24+ cell enrichment

In [None]:
import os
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist
import random
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns

## Get enrichment scores

In [None]:
cell_tab_path = "../data/tables/cell_table_size_normalized.csv"
p24_cell_tab_path = "../data/p24pos_cells.csv"

output_dir = "../data/spatial_analysis/p24_cell_enrichment_20um"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

cell_table = pd.read_csv(cell_tab_path)
p24_cell_table = pd.read_csv(p24_cell_tab_path)
cell_table['p24_status'] = cell_table.merge(p24_cell_table[['fov','label']], on=['fov', 'label'], how='left', indicator=True)['_merge'] == 'both'

p24_fovs = np.unique(cell_table[cell_table['p24_status']==True]['fov'])

random.seed(329)

In [None]:
all_phenos = ['APC', 'B', 'CD11c', 'CD11c_CD14', 'CD11c_CD68', 'CD14',
              'CD14_CD68_CD163', 'CD4T', 'CD68', 'CD8T', 'Endothelial',
              'FDC', 'Foxp3', 'Mast', 'NK', 'Neutrophils', 'SMA', 'Tfh']

dist_thresh = 20*(2048/800)
bootstrap_n = 100

data_list = []
for fov in p24_fovs:
    # Calculate distance between all cells in the FOV
    fov_cell_table = cell_table.loc[cell_table['fov'] == fov].reset_index(drop=True)
    all_centroids = list(zip(fov_cell_table['centroid-0'],fov_cell_table['centroid-1']))
    dist_mat = cdist(all_centroids, all_centroids, 'euclidean')
    
    # pheno1 is p24+ cells
    pheno1_idx = fov_cell_table[fov_cell_table['p24_status'] == True].index.to_list()

    # Only keep pheno1 cells in x-axis of distance matrix
    pheno1_dist_mat = dist_mat[pheno1_idx,:]
    # Binarize the distance matrix for distances that are within the defined threshold
    bin_mask = (pheno1_dist_mat < dist_thresh) & (pheno1_dist_mat > 0)
    pheno1_dist_mat_bin = bin_mask*1

    for pheno2 in all_phenos:
        # Count number of close contacts between cell types
        pheno2_idx = fov_cell_table[fov_cell_table['cell_meta_cluster'] == pheno2].index.to_list()

        # Check if the cell type exists in the data
        if len(pheno2_idx)==0:
            data_list.append([fov, pheno2, ""])
            continue

        # Subset this distance matrix for pheno2 cells in y-axis of distance matrix
        true_dist_mat_bin = pheno1_dist_mat_bin[:,pheno2_idx]

        # For each pheno1 cell, count number of "close" contacts with pheno2 cells
        true_close_contacts = np.sum(true_dist_mat_bin, axis=1)
        # Take the average across all pheno1 cells
        true_close_contacts_mean = np.mean(true_close_contacts)

        # Get all possible cell indices (total pool of available cells to randomize)
        all_idx = fov_cell_table.index.to_list()
        # Remove cells that are of pheno1 from this pool (since they are held constant in this randomization)
        all_idx = [x for x in all_idx if x not in pheno1_idx]
        # Get total number of cells that are pheno2
        num_pheno2 = len(pheno2_idx)

        # Randomly sample all cells to be labeled as pheno2 (bootstrapping)
        all_bootstrap = []
        for _ in range(bootstrap_n):
            # Select num_pheno2 random numbers, represents the indices of the randomly selected cells
            random_pheno2_idx = random.sample(all_idx, num_pheno2)
            # Subset the distance matrix to only keep these randomly selected cells
            keep_dist_mat_bin = pheno1_dist_mat_bin[:,random_pheno2_idx]
            # Find the total number of close contacts between pheno1 cells and randomly selected cells
            close_contacts = np.sum(keep_dist_mat_bin, axis=1)
            # Take the mean across all cells of pheno1
            close_contacts_mean = np.mean(close_contacts)
            # Add this value to the list of all bootstraps
            all_bootstrap.append(close_contacts_mean)

        # Calculate statistics of null distribution
        muhat, sigmahat = stats.norm.fit(all_bootstrap)
        # Calculate z score based on distribution
        z = (true_close_contacts_mean - muhat) / sigmahat

        data_list.append([fov, pheno2, z])

save_df = pd.DataFrame(data_list, columns=['fov', 'pheno', 'z'])
save_df.to_csv(os.path.join(output_dir, "p24pos_enrichment_scores.csv"), index=False)

## Context dependent - follicle area only

In [None]:
cell_tab_path = "../data/tables/cell_table_size_normalized.csv"
p24_cell_tab_path = "../data/p24pos_cells.csv"

output_dir = "../data/spatial_analysis/p24_cell_enrichment_20um_context_dependent_follicle"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

cell_table = pd.read_csv(cell_tab_path)
p24_cell_table = pd.read_csv(p24_cell_tab_path)
cell_table['p24_status'] = cell_table.merge(p24_cell_table[['fov','label']], on=['fov', 'label'], how='left', indicator=True)['_merge'] == 'both'

p24_fovs = np.unique(cell_table[cell_table['p24_status']==True]['fov'])

random.seed(329)

In [None]:
all_phenos = ['FDC']

dist_thresh = 20*(2048/800)
bootstrap_n = 100

data_list = []
for fov in p24_fovs:
    # Calculate distance between all cells in the FOV
    fov_cell_table = cell_table.loc[cell_table['fov'] == fov].reset_index(drop=True)
    all_centroids = list(zip(fov_cell_table['centroid-0'],fov_cell_table['centroid-1']))
    dist_mat = cdist(all_centroids, all_centroids, 'euclidean')
    
    # pheno1 is p24+ cells
    pheno1_idx = fov_cell_table[fov_cell_table['p24_status'] == True].index.to_list()

    # Only keep pheno1 cells in x-axis of distance matrix
    pheno1_dist_mat = dist_mat[pheno1_idx,:]
    # Binarize the distance matrix for distances that are within the defined threshold
    bin_mask = (pheno1_dist_mat < dist_thresh) & (pheno1_dist_mat > 0)
    pheno1_dist_mat_bin = bin_mask*1

    for pheno2 in all_phenos:
        # Count number of close contacts between cell types
        pheno2_idx = fov_cell_table[fov_cell_table['cell_meta_cluster'] == pheno2].index.to_list()

        # Check if the cell type exists in the data
        if len(pheno2_idx)==0:
            data_list.append([fov, pheno2, ""])
            continue

        # Subset this distance matrix for pheno2 cells in y-axis of distance matrix
        true_dist_mat_bin = pheno1_dist_mat_bin[:,pheno2_idx]

        # For each pheno1 cell, count number of "close" contacts with pheno2 cells
        true_close_contacts = np.sum(true_dist_mat_bin, axis=1)
        # Take the average across all pheno1 cells
        true_close_contacts_mean = np.mean(true_close_contacts)

        # Get all possible cell indices (total pool of available cells to randomize) -  CONTEXT DEPENDENT HERE, ONLY FOLLICLE
        all_idx = fov_cell_table[fov_cell_table['in_follicle_mask']==True].index.to_list()
        # Remove cells that are of pheno1 from this pool (since they are held constant in this randomization)
        all_idx = [x for x in all_idx if x not in pheno1_idx]
        # Get total number of cells that are pheno2
        num_pheno2 = len(pheno2_idx)

        # Randomly sample all cells to be labeled as pheno2 (bootstrapping)
        all_bootstrap = []
        for _ in range(bootstrap_n):
            # Select num_pheno2 random numbers, represents the indices of the randomly selected cells
            random_pheno2_idx = random.sample(all_idx, num_pheno2)
            # Subset the distance matrix to only keep these randomly selected cells
            keep_dist_mat_bin = pheno1_dist_mat_bin[:,random_pheno2_idx]
            # Find the total number of close contacts between pheno1 cells and randomly selected cells
            close_contacts = np.sum(keep_dist_mat_bin, axis=1)
            # Take the mean across all cells of pheno1
            close_contacts_mean = np.mean(close_contacts)
            # Add this value to the list of all bootstraps
            all_bootstrap.append(close_contacts_mean)

        # Calculate statistics of null distribution
        muhat, sigmahat = stats.norm.fit(all_bootstrap)
        # Calculate z score based on distribution
        z = (true_close_contacts_mean - muhat) / sigmahat

        data_list.append([fov, pheno2, z])

save_df = pd.DataFrame(data_list, columns=['fov', 'pheno', 'z'])
save_df.to_csv(os.path.join(output_dir, "p24pos_enrichment_scores.csv"), index=False)