In [None]:
# This file inputs a .tif file, runs an algorithms to segment the individual droplets into their own greyscale images, and then measures it against the json file

In [None]:
#Step 1, getting X descriptions from .json data, ignoring crappy data
import json
import os
import numpy as np
import cv2
from tifffile import imread
import torch
from sklearn.model_selection import train_test_split

class FrameSegmenter:
    def __init__(self, json_folder, tif_folder, output_folder, test_size=0.3, random_seed=42):
        self.json_folder = json_folder
        self.tif_folder = tif_folder
        self.output_folder = output_folder
        self.test_size = test_size
        self.random_seed = random_seed

    def extract_sequence_number(self, json_filepath):
        filename = os.path.basename(json_filepath)
        sequence_number, _ = os.path.splitext(filename)
        return sequence_number

    def load_json_data(self, json_filepath):
        with open(json_filepath, 'r') as json_file:
            data = json.load(json_file)

        return data

    def convert_to_grayscale(self, frame):
        return np.dot(frame[..., :3], [0.2989, 0.5870, 0.1140])

    def process_frames(self, json_filepath, tif_filepath, output_folder, num_frames=30):
        json_data = self.load_json_data(json_filepath)

        # Check if the TIFF file exists
        if not os.path.exists(tif_filepath):
            print(f"TIFF file not found: {tif_filepath}. Skipping.")
            return

        # Loop through the specified number of frames (or all frames if fewer)
        for frame_key, frame_info in list(json_data.items())[:num_frames]:
            frame_number = int(frame_key.split('_')[-1]) - 1

            # Load the frame from the TIFF file
            frame = imread(tif_filepath, key=frame_number)

            # Process each box in the current frame
            for box_index, box_info in enumerate(frame_info.get('boxes', [])):
                # Convert x, y, w, h, and box_id to integers
                x, y, w, h, box_id = map(int, box_info)

                # Segment the frame based on the bounding box
                segmented_image = frame[y:y+h, x:x+w]

                # Convert the segmented frame to grayscale
                grayscale_image = self.convert_to_grayscale(segmented_image)

                # Save the grayscale image as a PyTorch tensor
                output_filename = f"{self.extract_sequence_number(json_filepath)}_{frame_number}_{box_id}.pt"
                output_filepath = os.path.join(output_folder, output_filename)

                # Count the number of cells for the current box_id in the cells data
                num_cells = sum(1 for cell_info in frame_info.get('cells', []) if cell_info[2] == box_id)

                # Save the PyTorch tensor with image and number of cells
                torch.save([torch.from_numpy(grayscale_image).float(), num_cells], output_filepath)

    def process_sequences(self, num_frames=30):
        # List to store sequence numbers
        sequence_numbers = []

        # Loop through all files in the JSON folder
        for json_filename in os.listdir(self.json_folder):
            if json_filename.endswith(".json"):
                # Extract sequence number from the JSON filename
                sequence_number = self.extract_sequence_number(json_filename)
                sequence_numbers.append(sequence_number)

        # Process frames for train sequences
        for sequence_number in sequence_numbers:
            json_filepath = os.path.join(self.json_folder, f"{sequence_number}.json")
            tif_filepath = os.path.join(self.tif_folder, f"{sequence_number.replace('_', '-')}.tif")
            self.process_frames(json_filepath, tif_filepath, os.path.join(self.output_folder, "train"), num_frames)


class TIFProcessor:
    def __init__(self, filepath, output_dir):
        self.filepath = filepath
        self.output_dir = output_dir
        self.array = None
        self.gray_arr = None
        self.mask = None
        self.frame_index = None

    def process_frame(self, frame_index, threshold=50, min_consecutive_frames=20):
        # TIF file to RGB numpy array
        self.array = tiff.imread(self.filepath, key=frame_index)
        # Convert to grayscale
        self.gray_arr = self.convert_to_grayscale()
        # Crop frame if larger than 1100 pixels
        self.crop_frame()
        # Apply Sobel filter and generate mask
        self.apply_sobel_filter()

        # Calculate mean along the y-axis for the mask
        mask_mean = np.mean(self.mask, axis=0)

        # Identify droplet segments on the x-axis
        droplet_segments_x = self.identify_droplet_segments(mask_mean, threshold, min_consecutive_frames)

        # Remove droplet segments less than 350 pixels long
        droplet_segments_x = [segment for segment in droplet_segments_x if segment[1] - segment[0] >= 350]

        # Further segment on the y-axis
        droplet_segments_y = self.segment_on_y_axis(droplet_segments_x, threshold, min_consecutive_frames)

        # Remove y-axis segments less than 350 pixels long
        droplet_segments_y = [segment for segment in droplet_segments_y if segment[1] - segment[0] >= 350]

        # Save each droplet as a PyTorch tensor
        for idx, (start_y, end_y) in enumerate(droplet_segments_y):
            droplet_image = torch.from_numpy(self.gray_arr[start_y:end_y + 1, :]).float()
            frame_data = {
                'sequence_name': os.path.splitext(os.path.basename(self.filepath))[0],
                'frame_id': frame_index + 1,
                'droplet_index': idx,
                'droplet_segments_x': [droplet_segments_x[idx]],
                'droplet_segments_y': [(start_y, end_y)],
            }
            droplet_tensor = [droplet_image, frame_data]

            # Save droplet tensor
            output_filename = f"{os.path.splitext(os.path.basename(self.filepath))[0]}_{frame_index + 1}_{idx}.pt"
            output_filepath = os.path.join(self.output_dir, output_filename)
            torch.save(droplet_tensor, output_filepath)

    def crop_frame(self, max_height=1100):
        # Crop frame if height is above max_height
        if self.array.shape[0] > max_height:
            self.array = self.array[:max_height, :]

    def convert_to_grayscale(self):
        return np.dot(self.array[..., :3], [0.2989, 0.5870, 0.1140])

    def apply_sobel_filter(self):
        # Apply Sobel filter to the grayscale image
        sobel_x = cv2.Sobel(self.gray_arr, cv2.CV_64F, 1, 0, ksize=3)
        sobel_y = cv2.Sobel(self.gray_arr, cv2.CV_64F, 0, 1, ksize=3)
        self.mask = np.sqrt(sobel_x**2 + sobel_y**2)
        self.frame_index = frame_index  # You may want to set the frame_index here

    def identify_droplet_segments(self, signal, threshold, min_consecutive_frames):
        # Identify segments where the signal exceeds the threshold for at least min_consecutive_frames frames
        segments = []
        droplet_started = False
        consecutive_below_threshold = 0

        for i, value in enumerate(signal):
            if value > threshold and not droplet_started:
                droplet_started = True
                start_index = i
                consecutive_below_threshold = 0
            elif value <= threshold and droplet_started:
                consecutive_below_threshold += 1
                if consecutive_below_threshold >= min_consecutive_frames:
                    droplet_started = False
                    end_index = i - min_consecutive_frames
                    segments.append((start_index, end_index))
                    consecutive_below_threshold = 0

        # If a droplet continues to the end of the image, consider it
        if droplet_started:
            end_index = len(signal) - 1
            segments.append((start_index, end_index))

        return segments

    def segment_on_y_axis(self, droplet_segments_x, threshold, min_consecutive_frames):
        # Further segment each droplet segment on the y-axis
        droplet_segments_y = []

        for start_x, end_x in droplet_segments_x:
            # Extract the region of interest from the mask
            droplet_roi = self.mask[:, start_x:end_x + 1]

            # Calculate mean along the x-axis for the droplet ROI
            roi_mean = np.mean(droplet_roi, axis=1)

            # Identify droplet segments on the y-axis
            droplet_segments_y.extend(self.identify_droplet_segments(roi_mean, threshold, min_consecutive_frames))

        return droplet_segments_y


In [None]:
#Step 2, import image and segment droplets

In [None]:
# Step 3, compare droplets and 