# Neighborhood encoder

Pipeline for TYC70

In [1]:
# I use cellpose env to run this
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import torch
from torch import nn
from torch.utils.data import DataLoader
import numpy as np
import time
import glob
import matplotlib.pyplot as plt
import csv
import tifffile
from PIL import Image
import cv2
from natsort import natsorted
import pandas as pd
from skimage.measure import regionprops, label

In [6]:
class neighborhood_encoder(nn.Module):
    def __init__(self, image_size, embedding_size=128):
        super(neighborhood_encoder, self).__init__()

        # Assign the appropriate metrics
        self.image_size = image_size
        self.embedding_size = embedding_size

        # Creates the encoders
        self.image_encoder = self.create_image_encoder()
        self.location_encoder = self.create_location_encoder()
        self.morphology_encoder = self.create_morphology_encoder()

        #
    
    def create_image_encoder(self):
        return nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Dropout2d(p=0.3),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(64 * self.image_size * self.image_size, self.embedding_size),
            nn.ReLU()
        )
    
    def create_location_encoder(self):
        return nn.Sequential(

        )

    def create_morphology_encoder(self):
        return nn.Sequential(
            
        )
    
    def normalize_by_mask(self, raw_img, mask_img):

        mask = mask_img > 0 

        if mask.sum() == 0:
            print("Warning: No nucleus pixels detected in the mask! Returning raw image.")
            return raw_img.astype(np.float32)

        nucleus_pixels = raw_img[mask]
        mean = nucleus_pixels.mean()
        std = nucleus_pixels.std() if nucleus_pixels.std() > 0 else 1.0

        normalized_img = raw_img.astype(np.float32).copy()
        normalized_img[mask] = (raw_img[mask] - mean) / std

        return normalized_img
    
    def pad_feature_matrix(self, feature_lists, max_cells):
        num_frames = len(feature_lists)
        feature_matrix = np.zeros((max_cells, num_frames), dtype=np.float32)
        for frame_idx, row in enumerate(feature_lists):
            feature_matrix[:len(row), frame_idx] = row
        return feature_matrix

    def forward(self, x):
        #fill it in here when done
        return


image_size = 32
image_size_half = image_size //2

neighborhood_encoder_model = neighborhood_encoder(image_size)

## Pre-processing of the raw data

The segmentation is already done so I just load the segmentation again and get the data I want in the format I want for this model
Need to integrate these steps in the segmentation for loop if you want to do live tracking


In [None]:
# Getting the paths to the raw images
path_to_experiment = r"\\store\department\gene\chien_data\Lab\Data_and_Analysis\Tsai-Ying_Chen\TYC070_EC546_TILs_72hr_20250328\72hr_recording_20250328at132435"

# For this test I only want ROI 17 from frame 0 to 60 but i preprocess all frames that have a mask
ROI = 19

nucleus_path = os.path.join(path_to_experiment, "637nm")
nucleus_mask_path = os.path.join(path_to_experiment, "637nm_ImgGS")
Membrane_mask_path = os.path.join(path_to_experiment, "460nm_ImgGS")

nucleus_files = natsorted(glob.glob(os.path.join(nucleus_path, f"*_ROI{ROI}.tif")))
nucleus_mask_files = natsorted(glob.glob(os.path.join(nucleus_mask_path, f"*_ROI{ROI}.tif")))
Membrane_mask_files = natsorted(glob.glob(os.path.join(Membrane_mask_path, f"*_ROI{ROI}.tif")))

print(f"Found {len(nucleus_files)} nucleus files.")
print(f"Found {len(nucleus_mask_files)} nucleus mask files.")
print(f"Found {len(Membrane_mask_files)} Membrane mask files.")

if len(nucleus_files) != len(nucleus_mask_files):
    print("Warning: nucleus files and nucleus mask files don't match in length!")
else:
    print(f"Both lists have the same length: {len(nucleus_files)}")
    
if len(Membrane_mask_files) != len(nucleus_mask_files):
    print("Warning: Membrane mask files and nucleus mask files don't match in length!")
else:
    print(f"Both Membrane mask files and nucleus mask files have the same length: {len(Membrane_mask_files)}")

output_table_data_path = os.path.join(path_to_experiment, "raw_feature_data_per_ROI")

if not os.path.exists(output_table_data_path):
    os.makedirs(output_table_data_path)

x_list, y_list, area_list, circ_list, eccentricity_list, cell_type_list = [], [], [], [], [], []

max_cells = 0
total_frames = len(nucleus_mask_files)

for frame_idx, (nucleus_file, nucleus_mask_file, Membrane_mask_file) in enumerate(zip(nucleus_files, nucleus_mask_files, Membrane_mask_files)):

    nucleus_img = cv2.imread(nucleus_file, -1)
    nucleus_mask_img = cv2.imread(nucleus_mask_file, -1)
    Membrane_mask_img = cv2.imread(Membrane_mask_file, -1)

    labeled_mask_nucleus = label(nucleus_mask_img)
    labeled_mask_Membrane = label(Membrane_mask_img)

    props_nucleus = regionprops(labeled_mask_nucleus, intensity_image=nucleus_img)
    props_Membrane = regionprops(labeled_mask_Membrane, intensity_image=Membrane_mask_img)

    print(f"Frame {frame_idx}: {len(props_nucleus)} cells in nucleus mask, {len(props_Membrane)} cells in Membrane mask")

    x_row, y_row, area_row, circ_row, eccentricity_row, cell_type_row = [], [], [], [], [], []

    nucleus_props_by_pos = {
        (int(prop.centroid[0]), int(prop.centroid[1])): prop
        for prop in props_nucleus
    }

    for region_idx, prop_Membrane in enumerate(props_Membrane):
        Membrane_coords = prop_Membrane.coords
        Membrane_label_img = labeled_mask_Membrane == prop_Membrane.label

        matched = False
        for (y_nuc, x_nuc), nuc_prop in nucleus_props_by_pos.items():
            if Membrane_label_img[y_nuc, x_nuc]:
                y, x = y_nuc, x_nuc
                cell_type_row.append(1)
                matched = True
                break

        if not matched:
            y, x = map(int, prop_Membrane.centroid)
            cell_type_row.append(2)

        area = prop_Membrane.area
        perimeter = prop_Membrane.perimeter if prop_Membrane.perimeter > 0 else 1
        circ = 4 * np.pi * (area / (perimeter ** 2))
        eccentricity = prop_Membrane.eccentricity

        x_row.append(x)
        y_row.append(y)
        area_row.append(area)
        circ_row.append(circ)
        eccentricity_row.append(eccentricity)

    max_cells = max(max_cells, len(x_row))

    x_list.append(x_row)
    y_list.append(y_row)
    area_list.append(area_row)
    circ_list.append(circ_row)
    eccentricity_list.append(eccentricity_row)
    cell_type_list.append(cell_type_row)

Found 689 nucleus files.
Found 241 nucleus mask files.
Found 241 Membrane mask files.
Both Membrane mask files and nucleus mask files have the same length: 241
