This is a code that when given a tif file, processes it and segments the droplets, and saves it as a pyTorch input/output file

In [8]:
import numpy as np
import tifffile as tiff
import cv2
import os
import json
import torch

class TIFProcessor:
    def __init__(self, filepath, output_dir):
        self.filepath = filepath
        self.output_dir = output_dir
        self.array = None
        self.gray_arr = None
        self.mask = None
        self.frame_index = None

    def process_frame(self, frame_index, threshold=50, min_consecutive_frames=20):
        # TIF file to RGB numpy array
        self.array = tiff.imread(self.filepath, key=frame_index)
        # Convert to grayscale
        self.gray_arr = self.convert_to_grayscale()
        # Crop frame if larger than 1100 pixels
        self.crop_frame()
        # Apply Sobel filter and generate mask
        self.apply_sobel_filter()

        # Calculate mean along the y-axis for the mask
        mask_mean = np.mean(self.mask, axis=0)

        # Identify droplet segments on the x-axis
        droplet_segments_x = self.identify_droplet_segments(mask_mean, threshold, min_consecutive_frames)

        # Remove droplet segments less than 350 pixels long
        droplet_segments_x = [segment for segment in droplet_segments_x if segment[1] - segment[0] >= 350]

        # Further segment on the y-axis
        droplet_segments_y = self.segment_on_y_axis(droplet_segments_x, threshold, min_consecutive_frames)

        # Remove y-axis segments less than 350 pixels long
        droplet_segments_y = [segment for segment in droplet_segments_y if segment[1] - segment[0] >= 350]

        # Save each droplet as a PyTorch tensor
        for idx, (start_y, end_y) in enumerate(droplet_segments_y):
            droplet_image = torch.from_numpy(self.gray_arr[start_y:end_y + 1, :]).float()
            frame_data = {
                'sequence_name': os.path.splitext(os.path.basename(self.filepath))[0],
                'frame_id': frame_index + 1,
                'droplet_index': idx,
                'droplet_segments_x': [droplet_segments_x[idx]],
                'droplet_segments_y': [(start_y, end_y)],
            }
            droplet_tensor = [droplet_image, frame_data]

            # Save droplet tensor
            output_filename = f"{os.path.splitext(os.path.basename(self.filepath))[0]}_{frame_index + 1}_{idx}.pt"
            output_filepath = os.path.join(self.output_dir, output_filename)
            torch.save(droplet_tensor, output_filepath)

    def crop_frame(self, max_height=1100):
        # Crop frame if height is above max_height
        if self.array.shape[0] > max_height:
            self.array = self.array[:max_height, :]

    def convert_to_grayscale(self):
        return np.dot(self.array[..., :3], [0.2989, 0.5870, 0.1140])

    def apply_sobel_filter(self):
        # Apply Sobel filter to the grayscale image
        sobel_x = cv2.Sobel(self.gray_arr, cv2.CV_64F, 1, 0, ksize=3)
        sobel_y = cv2.Sobel(self.gray_arr, cv2.CV_64F, 0, 1, ksize=3)
        self.mask = np.sqrt(sobel_x**2 + sobel_y**2)
        self.frame_index = frame_index  # You may want to set the frame_index here

    def identify_droplet_segments(self, signal, threshold, min_consecutive_frames):
        # Identify segments where the signal exceeds the threshold for at least min_consecutive_frames frames
        segments = []
        droplet_started = False
        consecutive_below_threshold = 0

        for i, value in enumerate(signal):
            if value > threshold and not droplet_started:
                droplet_started = True
                start_index = i
                consecutive_below_threshold = 0
            elif value <= threshold and droplet_started:
                consecutive_below_threshold += 1
                if consecutive_below_threshold >= min_consecutive_frames:
                    droplet_started = False
                    end_index = i - min_consecutive_frames
                    segments.append((start_index, end_index))
                    consecutive_below_threshold = 0

        # If a droplet continues to the end of the image, consider it
        if droplet_started:
            end_index = len(signal) - 1
            segments.append((start_index, end_index))

        return segments

    def segment_on_y_axis(self, droplet_segments_x, threshold, min_consecutive_frames):
        # Further segment each droplet segment on the y-axis
        droplet_segments_y = []

        for start_x, end_x in droplet_segments_x:
            # Extract the region of interest from the mask
            droplet_roi = self.mask[:, start_x:end_x + 1]

            # Calculate mean along the x-axis for the droplet ROI
            roi_mean = np.mean(droplet_roi, axis=1)

            # Identify droplet segments on the y-axis
            droplet_segments_y.extend(self.identify_droplet_segments(roi_mean, threshold, min_consecutive_frames))

        return droplet_segments_y


TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'
TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'
TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'
TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'
TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'
TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'
TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'
TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'
TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offs

In [20]:
import json
import os

class SequenceLoader:
    def __init__(self, json_filepath):
        self.json_filepath = json_filepath
        self.sequence_number = self.extract_sequence_number()
        self.data = self.load_json_data()

    def extract_sequence_number(self):
        # Extract sequence number from the filename (assuming the filename is in the format "4_2.json")
        filename = os.path.basename(self.json_filepath)
        sequence_number, _ = os.path.splitext(filename)
        return sequence_number

    def load_json_data(self):
        with open(self.json_filepath, 'r') as json_file:
            data = json.load(json_file)
        return data

    def process_frames(self):
        frames_data = []

        for frame_key, frame_info in self.data.items():
            frame_number = int(frame_key.split('_')[-1])

            for box in frame_info.get('boxes', []):
                x, y, w, h, box_id = box
                cell_count = sum(1 for cell in frame_info.get('cells', []) if cell[2] == box_id)
                frames_data.append([self.sequence_number, frame_number, box_id, x, y, cell_count])

        return frames_data

# Example usage
json_filepath = "../Data/all_json/4_2.json"
sequence_loader = SequenceLoader(json_filepath)

frames_data = sequence_loader.process_frames()

# Print the result for the first few entries
for entry in frames_data[:10]:
    print(entry)


['4_2', 1, 0, 1067, 345, 115]
['4_2', 1, 1, 444, 58, 114]
['4_2', 2, 0, 1370, 332, 117]
['4_2', 2, 1, 758, 42, 114]
['4_2', 3, 1, 1067, 32, 114]
['4_2', 3, 2, 186, 309, 101]
['4_2', 4, 1, 1359, 23, 113]
['4_2', 4, 2, 473, 300, 95]
['4_2', 5, 2, 764, 295, 102]
['4_2', 5, 3, 87, 171, 121]


In [None]:
# Setup files
seq42_filepath = "../Data/All_Sequences/4-2.tif"
output_folder = "../Data/input_images"
seq42_processor = TIFProcessor(seq42_filepath, output_folder)

# Process all frames and save each droplet as a PyTorch tensor
for frame_index in range(tiff.imread(seq42_filepath).shape[0]):
    seq42_processor.process_frame(frame_index, threshold=50, min_consecutive_frames=20)


In [18]:
# Specify the path to the output file you want to load
output_file_path = "../Data/input_images/4-2_4_2.pt"

# Load the file
loaded_data = torch.load(output_file_path)

# Print the loaded data
print("Loaded Data:")
print(loaded_data)

# Access the image and frame_data separately
image_np_array, frame_data = loaded_data

# Print image and frame_data
print("Image:")
print(image_np_array)
print("Frame Data:")
print(frame_data)

Loaded Data:
[tensor([[212.9787, 218.9781, 212.9787,  ..., 110.9889, 115.9884, 104.9895],
        [223.9776, 227.9772, 203.9796,  ..., 129.9870, 121.9878, 112.9887],
        [228.9771, 224.9775, 202.9797,  ..., 137.9862, 141.9858, 136.9863],
        ...,
        [167.9832, 166.9833, 140.9859,  ...,  86.9913,  71.9928,  84.9915],
        [140.9859, 161.9838, 150.9849,  ...,  72.9927,  69.9930,  84.9915],
        [164.9835, 142.9857, 133.9866,  ...,  82.9917,  77.9922,  79.9920]]), {'sequence_name': '4-2', 'frame_id': 4, 'droplet_index': 2, 'droplet_segments_x': [(1362, 1907)], 'droplet_segments_y': [(2, 585)]}]
Image:
tensor([[212.9787, 218.9781, 212.9787,  ..., 110.9889, 115.9884, 104.9895],
        [223.9776, 227.9772, 203.9796,  ..., 129.9870, 121.9878, 112.9887],
        [228.9771, 224.9775, 202.9797,  ..., 137.9862, 141.9858, 136.9863],
        ...,
        [167.9832, 166.9833, 140.9859,  ...,  86.9913,  71.9928,  84.9915],
        [140.9859, 161.9838, 150.9849,  ...,  72.9927,  69

In [None]:
# This part is to read the student .json files and reformat them to transfer the data

In [None]:
# This part is to match the droplets found in the image file with the data on the student json files to get input output pairs to 