# Markov phenotyping 1 ROI

In [None]:
#use cellpose env
import numpy as np
import time
import os
import glob
import matplotlib.pyplot as plt
import csv
import pandas as pd
import torch
import torch.nn as nn

### Code to load the csv data into tensor format
This code dynamically loads the csv files in the same folder 
1. It creates a 3d tensor where each 2d matrix is a csv file
2. Saves the name of the file as its feature name in a list

In [3]:
path_to_experiment = r"\\store\department\gene\chien_data\Lab\Data_and_Analysis\Wilco_van_Nes\WvN014_phenotype_imaging_strategy5_20250217\_from_Li_Results_ROI9"

if not os.path.exists(path_to_experiment):
    print("path to experiment folder doesn't exist")

path_to_table_data = os.path.join(path_to_experiment, "template_feature_tables")

if not os.path.exists(path_to_experiment):
    print("path to table folder doesn't exist")

csv_files_paths = glob.glob(os.path.join(path_to_table_data, "*.csv"))

tensor_list = []
feature_name_list = []

for csv_file_path in csv_files_paths:

    feature_name = os.path.splitext(os.path.basename(csv_file_path))[0]
    feature_name_list.append(feature_name)
    # print(feature_name_list)

    with open(csv_file_path, newline = '') as csvfile:
        reader = csv.reader(csvfile)
        data = [list(map(float, row)) for row in reader]
        data_tensor = torch.tensor(data, dtype=torch.float32)

        if data_tensor.shape[1] == 1:
            data_tensor = data_tensor.repeat(1, 101)

        tensor_list.append(data_tensor)
        # print(data_tensor.shape)
        # print(len(tensor_list))

all_cells_tensor = torch.stack(tensor_list, dim=0)
print(all_cells_tensor.shape)
print(feature_name_list)
print(f"There are suppose to be {len(feature_name_list)} feature dimensions in the torch tensor")

torch.Size([6, 547, 101])
['tb_brightness', 'tb_coordinates_x', 'tb_circularity', 'tb_coordinates_y', 'tb_cell_category', 'tb_area']
There are suppose to be 6 feature dimensions in the torch tensor


### Markov transition matrix estimation

Here we create boxes inside the image were you calculate the state (counting the number of individual T cells and cancer cells centroids in the region)
We also figure out from this how many unique states there are for the transition matrix

In [17]:
# box pixel size
box_size = 100

# image pixel size
# image_width = 5120
# image_height = 5120
image_width = 2000
image_height = 1800

# Integer check to ensure image dimensions are divisible by box size
if image_width % box_size != 0 or image_height % box_size != 0:
    raise ValueError(f"Image dimensions ({image_width}x{image_height}) must be divisible by box size ({box_size})")
else:
    print(f"Image dimensions are valid: {image_width}x{image_height} is divisible by box size {box_size}")

# The tensor to store the states of each box
# 1st dimension: frame number
# 2nd dimension: x position of the box in the image
# 3rd dimension: y position of the box in the image
# 4th dimension: state values (e.g., T-cells and cancer cells) (τ, κ)

state_tensor = torch.zeros(all_cells_tensor.shape[2],
                           image_width // box_size, 
                           image_height // box_size, 
                           2)

unique_states = set()

# Function to calculate the box index for each cell
def get_box_index(x, y, box_size, image_width, image_height):

    # Calculate box index along the x-axis and y-axis
    box_x_index = int(min(x // box_size, (image_width // box_size) - 1))
    box_y_index = int(min(y // box_size, (image_height // box_size) - 1))

    return box_x_index, box_y_index

# for loop to calculate the state
for frame in range(all_cells_tensor.shape[2]):
    #first get the position of the feature in the all_cells_tensor
    x_tensor_pos = feature_name_list.index("tb_coordinates_x")
    y_tensor_pos = feature_name_list.index("tb_coordinates_y")
    cell_type_tensor_pos = feature_name_list.index("tb_cell_category")

    x_coord_frame = all_cells_tensor[x_tensor_pos, :, frame]
    y_coord_frame = all_cells_tensor[y_tensor_pos, :, frame]
    cell_type_frame = all_cells_tensor[cell_type_tensor_pos, :, frame]

    # Find the position of the cell in which box and adjust the state of the box
    for cell_id, (x, y, cell_type) in enumerate(zip(x_coord_frame, y_coord_frame, cell_type_frame)):
        # print(f"x is:{x}")
        # print(f"y is:{y}")

        box_x_index, box_y_index = get_box_index(x, y, box_size, image_width, image_height)

        # print(f"box x index is {box_x_index} and type {type(box_x_index)}, box y index is {box_y_index}")

        if cell_type == 2:
            state_tensor[frame, box_x_index, box_y_index, 0] += 1
        if cell_type == 0 or cell_type == 1:
            state_tensor[frame, box_x_index, box_y_index, 1] += 1

    # Cell check
    if torch.sum(state_tensor[frame]) != all_cells_tensor.shape[1]:
        print(f"Not all cells were put in boxes correctly placed {torch.sum(state_tensor[frame])} need to place {all_cells_tensor.shape[1]} cells")
    elif frame == 0:
        print(f"All cells were put in boxes correctly, placed {torch.sum(state_tensor[frame])} cells need to place {all_cells_tensor.shape[1]} cells for the first frame")
    
    # Iterate over the boxes to find the unique states
    for box_x in range(image_width // box_size):
        for box_y in range(image_height // box_size):
            state_t = state_tensor[frame, box_x, box_y]
            t_cells = int(state_t[0])
            cancer_cells = int(state_t[1])
            unique_states.add((t_cells, cancer_cells))

# Give each state a index for the transition matrix
state_to_index = {tuple(state): i for i, state in enumerate(unique_states)}
print(f"Number of unique states: {len(unique_states)}")
print(f"Unique states: {unique_states}")


Image dimensions are valid: 2000x1800 is divisible by box size 100
All cells were put in boxes correctly, placed 547.0 cells need to place 547 cells for the first frame
Number of unique states: 77
Unique states: {(4, 0), (5, 1), (8, 0), (0, 5), (2, 2), (6, 2), (7, 1), (4, 2), (3, 6), (5, 3), (8, 2), (0, 7), (2, 4), (1, 8), (6, 4), (3, 8), (5, 5), (8, 4), (11, 0), (0, 0), (0, 9), (1, 10), (6, 6), (3, 1), (0, 2), (1, 3), (3, 3), (5, 0), (1, 5), (6, 1), (7, 0), (3, 5), (5, 2), (4, 4), (9, 0), (1, 7), (2, 6), (3, 7), (5, 4), (4, 6), (9, 2), (1, 0), (0, 11), (6, 8), (3, 0), (4, 8), (1, 2), (0, 4), (2, 1), (0, 13), (2, 10), (3, 2), (4, 1), (8, 1), (1, 4), (0, 6), (2, 3), (0, 15), (6, 3), (3, 4), (4, 3), (10, 0), (1, 6), (0, 8), (2, 5), (6, 5), (4, 5), (8, 5), (9, 4), (0, 1), (11, 1), (0, 10), (1, 1), (0, 3), (2, 0), (2, 9), (6, 0)}


In the code below we create the markov transition matrix

In [None]:
# The markov transition tnsor stores the states of each box
# 1st dimension: frame number
# 2nd dimension: x position of the box in the image
# 3rd dimension: y position of the box in the image
# 4th dimension: state values (e.g., T-cells and cancer cells) (τ, κ) at t
# 5th dimension: state values (e.g., T-cells and cancer cells) (τ, κ) at t+1

markov_transition_tensor = torch.zeros(all_cells_tensor.shape[2]-1,
                                image_width // box_size,
                                image_height // box_size,
                                len(unique_states),
                                len(unique_states))

# For loop to fill in the transition matrix with all the states
for frame in range(all_cells_tensor.shape[2]-1):
    for box_x in range(image_width // box_size):
        for box_y in range(image_height // box_size):
            state_t = tuple(state_tensor[frame, box_x, box_y].tolist())
            state_t_plus_1 = tuple(state_tensor[frame + 1, box_x, box_y].tolist())

            state_t_index = state_to_index[state_t]
            state_t_plus_1_index = state_to_index[state_t_plus_1]

            if frame ==0:
                markov_transition_tensor[frame, box_x, box_y, state_t_index, state_t_plus_1_index] += 1
            else:
                markov_transition_tensor[frame, box_x, box_y] = markov_transition_tensor[frame-1, box_x, box_y].clone()
                markov_transition_tensor[frame, box_x, box_y, state_t_index, state_t_plus_1_index] += 1

# Create a seperate normalized matrix to seperate for counts
normalized_markov_transition_tensor = markov_transition_tensor.clone().float()

# For loop to normalize the transition matrix
for frame in range(all_cells_tensor.shape[2]-1):
    for box_x in range(image_width // box_size):
        for box_y in range(image_height // box_size):
            row_sums = normalized_markov_transition_tensor[frame, box_x, box_y].sum(dim=1)

            # Avoid division by 0, 0/1=0 so will still create proper matrix
            row_sums[row_sums == 0] = 1

            normalized_markov_transition_tensor[frame, box_x, box_y] = normalized_markov_transition_tensor[frame, box_x, box_y]/row_sums.unsqueeze(1)

total_possible_transitions = (image_width // box_size) * (image_height // box_size) * (all_cells_tensor.shape[2] - 1)



Detachment Probability (pd): 0.0064344825223088264


In [28]:
# For loop to calculate the detatchment probability
start_state_detachment = (1,1)
start_state_detachment_index = state_to_index[start_state_detachment]
detachment_probability = 0

for frame in range(all_cells_tensor.shape[2]-1):
    for box_x in range(image_width // box_size):
        for box_y in range(image_height // box_size):

            state_t_plus_1 = tuple(state_tensor[frame + 1, box_x, box_y].tolist())
            state_t_plus_1_index = state_to_index[state_t_plus_1]

            if state_t_plus_1[0] == 0:
                detachment_probability += markov_transition_tensor[frame, box_x, box_y, start_state_detachment_index, state_t_plus_1_index]

detachment_probability = detachment_probability/total_possible_transitions
print(f"Detachment Probability (pd): {detachment_probability}")

Detachment Probability (pd): 0.0064344825223088264


### Plotting code

In [None]:
# number of T cells over time

# number of cancer cells over time 

# mean cancer cell area

# mean T-cell area

# mean cancer cell roundness

# mean T-cell roundness

### deprecated/test code

#### Code to load the data as a panda's datatype

In [2]:
path_to_experiment = r"\\store\department\gene\chien_data\Lab\Data_and_Analysis\Wilco van Nes\WvN014_phenotype_imaging_strategy5_20250217\_from_Li_Results_ROI9"

if not os.path.exists(path_to_experiment):
    print("path to experiment folder doesn't exist")

path_to_table_data = os.path.join(path_to_experiment, "template_feature_tables")
if not os.path.exists(path_to_experiment):
    print("path to table folder doesn't exist")


csv_files_paths = glob.glob(os.path.join(path_to_table_data, "*.csv"))

dfs =[]
feature_name_list = []

for csv_file_path in csv_files_paths:
    df = pd.read_csv(csv_file_path, header=None)
    feature_name = os.path.splitext(os.path.basename(csv_file_path))[0]
    feature_name_list.append(feature_name)

    # print(feature_name)
    # print(df.shape)

    frames = [f'frame_{i}' for i in range(df.shape[1])]
    df.columns = frames
    dfs.append(df)

all_cells_df = pd.concat(dfs, axis=1, keys=feature_name_list)

all_cells_df.index = [f'cell_{i+1}' for i in range(all_cells_df.shape[0])]

print(all_cells_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 547 entries, cell_1 to cell_547
Columns: 506 entries, ('tb_brightness', 'frame_0') to ('tb_area', 'frame_100')
dtypes: float64(204), int64(302)
memory usage: 2.1+ MB
None


In [3]:
cols_to_print = ['tb_cell_category'] + [f'{feature}' for feature in feature_name_list]
print(all_cells_df[cols_to_print].head())

       tb_cell_category tb_brightness                                      \
                frame_0       frame_0  frame_1  frame_2  frame_3  frame_4   
cell_1                0        98.601   98.059   99.898  102.370   97.427   
cell_2                0        98.601   98.059   99.898  102.370   97.427   
cell_3                2        89.867   92.742   97.872   90.343   87.914   
cell_4                2        97.040  103.230  104.270  103.820  101.600   
cell_5                0       101.120  102.340  103.640  102.860  100.240   

                                           ...  tb_area                    \
        frame_5  frame_6 frame_7  frame_8  ... frame_91 frame_92 frame_93   
cell_1  100.280   98.531  97.695   98.941  ...      548     7004     8241   
cell_2  100.280   98.531  97.695   98.941  ...      548     7004     8241   
cell_3   91.324   98.531  88.954   88.955  ...     1896     1989     1873   
cell_4  102.760  101.590  99.716   99.608  ...     1389     1347     1471  

In [8]:
all_cells_tensor = torch.tensor(all_cells_df.values, dtype=torch.float32)
print(all_cells_tensor.shape)

torch.Size([547, 506])


### Own values/definitions
101 time points, timepoint = 3 minutes interval
ROI9 crop FOV of (2000,1800) square

individual cell features
* centroid position
* morphology
    * area
    * circularity
* speed
* directionality

Dynamic 3d feature tensor



<!-- ```python
cell_data = {
    cell_id: {
        "frames": [t1, t2, t3, ...],      # List of frame timepoints when the cell appears
        "x": [x1, x2, x3, ...],           # X coordinates over time
        "y": [y1, y2, y3, ...],           # Y coordinates over time
        "area": [a1, a2, a3, ...],        # Cell area over time
        "brightness": [b1, b2, b3, ...],  # Brightness over time
        "circularity": [c1, c2, c3, ...], # Circularity over time
        "type": cell_type,                # 0 = non-MN tumor, 1 = MN tumor, 2 = immune cell
        "parent": parent_id,              # If cell was born from division, store parent ID
    }
}
``` -->





### Paper values/definitions
Verma, A., Yu, C., Bachl, S., Lopez, I., Schwartz, M., Moen, E., ... & Engelhardt, B. E. (2024). Cellular behavior analysis from live-cell imaging of TCR T cell–cancer cell interactions. bioRxiv.

#### Algorithm details
Neighborhood encoding model input:
* centroid position
* morphology
    * area
    * perimeter
    * eccentricity

Edges assigned to cells within 64 pixels(41.6 µm) of each other.  
Model is given neighborhood embeddings and centroid positions of cells in the previous seven frames [t<sub>n-7</sub>,t<sub>n</sub>] to compare with t<sub>n+1</sub>.  
The temporal context of the previous seven frames is modeled using long short-term memory (LSTM) layers

These feature vectors are ten fed into a tracking model that causally integrates temporal information and performs a pairwise comparison of each cell's feature vector  
across frames to produce an effective probability score indicating wheter two cells are the same cell, are different cells, or have a parent-child relationship.  
Lineage tracking and hungarian "shadow object" algorithm to assign birth or death of cells.

#### Imaging details
1:1 ratio of T-cells:cancer cells  
Brightfield images capture T-cells & cancer cells  
Red fluorescent protein channel images capture cancer nuclei  
Images made every 4 minutes for 24 hours

#### Methods 
##### Neighborhood encoder architecture in detail
constructed a graph attention network <sup>[1]</sup> with 3 inputs  
1. First head received images of each cell and converted these images to a vector embedding with a convolutional neural network.
    * Each image consisted of a 16x16 crop of the raw data centered on the centroid position of the cell. 
    * Pixels within the nuclear segmentation mask were normalized by subtracting the mean value and dividing by the standard deviation.
2. Second head received the centroid location of each cell
3. Third head received three morphology metrics for each cell.
    * area
    * perimeter
    * eccentricity

The latter two heads made use of fully connected neural networks to convert the inputs into vectors  
Adjacency matrix for the graph attention network based on the Euclidean distance between pairs of cells  
Cells were linked if they were closer than 64 pixels (41.6 µm).  

Normalized adjacency matrix and concatenated embeddings were fed into a graph attention layer to update the embeddings of each cell.
The appearance and morphology embeddings were concatenated to the output of the graph attention layer to generate the final neighborhood embedding

**References**  
[1]: Brody, S., Alon, U., & Yahav, E. (2021). How attentive are graph attention networks?. arXiv preprint arXiv:2105.14491.

The markov matrix looks at cell density to see how the populations move across the whole field of view, while the neighborhood encoder looks at the invididual cell inside the bin to see where it is.
For example there is a difference between 4 cancer cells and 1 T cell where the 4 cancer cells are clustered together and the T cell is close vs 4 cancer cells that surround the T cell but at equal distance from the 1 T cell
And the neighborhood encoding captures this subtle difference the markov matrix can't capture