# Mixing Score
The purpose of this notebook is to calculate the mixing score between any two cell populations in a sample.

In [None]:
import os
import seaborn
import pandas as pd

import ark.settings as settings
from ark.analysis.spatial_analysis import compute_cell_neighbors, compute_mixing_score, create_neighborhood_matrix
from ark.utils import spatial_analysis_utils

If you would like to stitch images from an external drive, see [External Hard Drives and Google File Stream](https://github.com/angelolab/ark-analysis#external-hard-drives-and-google-file-stream). Otherwise all data, images, files, etc. must be placed in the 'data' directory, and referenced via `'../data/path_to_your_data'`.

### Define paths
* `base_dir`: the path to all of your imaging data. Should contain a directory for your images, segmentations, and cell table (generated from `1_Segment_Image_Data.ipynb`)
* `cell_table_path`: the path to the cell table that contains columns for fov, cell label, and cell phenotype (generated from `3_Cluster_Cells.ipynb`)
* `segmentation_dir`: the path to the directory containing your segmentations (generated from 1_Segment_Image_Data.ipynb)
* `dist_mat_dir`: the path to directory containing the distance matrices for each FOV, if not previously generated by either `example_neighborhood_analysis_script.ipynb` or `example_pairwise_spatial_enrichment.ipynb`, a new directory will be created below

In [None]:
base_dir = "../data/example_dataset"
cell_table_path = os.path.join(base_dir, "segmentation/cell_table/cell_table_size_normalized_cell_labels.csv")
segmentation_dir = os.path.join(base_dir, "segmentation/deepcell_output")
dist_mat_dir = os.path.join(base_dir, "spatial_analysis/dist_mats")

# new directories
mixing_score_dir = os.path.join(base_dir, "mixing_score")
cell_neighbors_dir = os.path.join(mixing_score_dir, "cell_neighbors")

In [None]:
# create output directory
if not os.path.exists(cell_neighbors_dir):
    os.makedirs(cell_neighbors_dir)

# create the dist_mat_output directory if it doesn't exist
if not os.path.exists(dist_mat_dir):
    os.makedirs(dist_mat_dir)
    spatial_analysis_utils.calc_dist_matrix(segmentation_dir, dist_mat_dir)

### Read in data from the cell table

In [None]:
all_data = pd.read_csv(cell_table_path)
all_fovs = all_data[settings.FOV_ID].unique()

### Define cell populations of interest and compute neighbors matrices
The neighbors matrix for each FOV will be saved as individual files in the the `cell_neighbors_dir`.

In [None]:
pixel_radius = 100
compute_cell_neighbors(all_data, dist_mat_dir, cell_neighbors_dir, neighbors_radius=pixel_radius)

## Compute mixing scores
This will compute the mixing score for each FOV and save the output in the `mixing_score_dir`. 
- `target_cell`: the infiltrating cell population
- `reference_cell`: the reference cell population
- `cold_threshold`: the minimum amount of reference cells each FOV must contain to not be classified as *cold*, cold samples will not be assigned a mixing score
- `percent_mix`: setting to True will generate mixing scores as percentages instead of raw values

You can adjust these arguments and run the two code cells multiple times.

In [None]:
target_cell_list = ['Cancer']
reference_cell_list = ['T', 'B', 'Mono_Mac', 'NK', 'Granulocyte']
cold_threshold = 0

In [None]:
file_name = "Cancer_Immune-mixing_score.csv"

scores, ratios = [], []
for fov in all_fovs:
    fov_score, fov_ratio = compute_mixing_score(cell_neighbors_dir, fov=fov, target_cells=target_cell_list, 
                                            reference_cells=reference_cell_list, cold_thresh=cold_threshold)
    scores.append(fov_score)
    ratios.append(fov_ratio)
    
mixing_score_data = pd.DataFrame(list(zip(all_fovs, scores, ratios)), columns =['fov', 'mixing_score', 'population_ratio'])
mixing_score_data.to_csv(os.path.join(mixing_score_dir, file_name), index=False)
seaborn.histplot(mixing_score_data, x='mixing_score')