## Step 1: Extract frames from video

In [2]:
import cv2
import os
import numpy as np

MAX_FRAMES = 78000
def extract_frames(video_path, output_folder, num_frames=1):
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    # Get total number of frames
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Ensure output directory exists
    os.makedirs(output_folder, exist_ok=True)

    # Select 120 evenly spaced frame indices
    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)

    count = 0
    for i in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frame_path = os.path.join(output_folder, f'frame_{count:03d}.png')
            cv2.imwrite(frame_path, frame)
            count += 1
        else:
            print(f"Warning: Could not read frame at index {i}")

    # Release video capture
    cap.release()
    print(f"Extracted {count} frames to {output_folder}")


video_path = "20211021_HR13_1_test_converted.mp4"
output_folder = "data"

extract_frames(video_path, output_folder)



Extracted 1 frames to data


## Step 2: Apply Thresholding to clean the images

In [3]:
import os
from PIL import Image
import numpy as np

input_folder = 'data'
output_folder = 'masked_data'

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Threshold for dark gray (everything <= this will be black)
threshold = 90

# Process all images
for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        # Open image in grayscale
        img = Image.open(input_path).convert('L')
        img_array = np.array(img)

        # Pixels <= 50 → black (0), others → white (255)
        thresholded_array = np.where(img_array <= threshold, 0, 255).astype(np.uint8)

        # Save the result
        thresholded_img = Image.fromarray(thresholded_array)
        thresholded_img.save(output_path)

print("Thresholding applied.")


Thresholding applied.


## Step 3: Color surface

In [4]:
import os
from PIL import Image, ImageDraw

# Paths
input_folder = 'masked_data'           
output_folder = 'masked_data'   

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

blue = (0, 0, 255)

# Loop through all images in the input folder
for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        # Open image
        with Image.open(input_path) as img:
            img = img.convert('RGB')  # Ensure it's in RGB
            width, height = img.size
            start_y = int(height * 0.67)  # Bottom 30% starts here

            # Draw blue rectangle over the bottom 30%
            draw = ImageDraw.Draw(img)
            draw.rectangle([0, start_y, width, height], fill=blue)

            # Save the result
            img.save(output_path)

## Step 4: Color the droplet

In [5]:
import os
import numpy as np
import cv2
from PIL import Image


os.makedirs(output_folder, exist_ok=True)

# Colors in BGR
MAGENTA = (255, 0, 255)

# Threshold for "dark" pixels (R, G, B all <= this)
dark_threshold = 50

for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        # Load image in full color (keep blue surface)
        img = cv2.imread(input_path)

        # Find dark pixels: where all channels are <= threshold
        dark_mask = np.all(img <= dark_threshold, axis=-1)

        # Color those pixels magenta
        img[dark_mask] = MAGENTA

        # Save result
        Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).save(output_path)

print("Dark pixels colored magenta (blue preserved). Saved in 'masked_data'")


Dark pixels colored magenta (blue preserved). Saved in 'masked_data'


## Step 5: Fill droplet

In [6]:
import os
import cv2
import numpy as np
from PIL import Image

os.makedirs(output_folder, exist_ok=True)

# Define colors in BGR
MAGENTA = np.array([255, 0, 255])
BLUE = np.array([255, 0, 0])
WHITE = np.array([255, 255, 255])

def create_mask(img, color, tolerance=30):
    lower = np.clip(color - tolerance, 0, 255)
    upper = np.clip(color + tolerance, 0, 255)
    return cv2.inRange(img, lower, upper)

for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        img_path = os.path.join(input_folder, filename)
        save_path = os.path.join(output_folder, filename)

        img = cv2.imread(img_path)

        # --- Step 1: Fill the droplet using contour ---
        magenta_mask = create_mask(img, MAGENTA, tolerance=30)
        contours, _ = cv2.findContours(magenta_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cv2.drawContours(img, contours, -1, MAGENTA.tolist(), thickness=cv2.FILLED)

        # --- Step 2: Neighborhood Rules ---

        # Recalculate masks
        magenta_mask = create_mask(img, MAGENTA, tolerance=30) // 255
        blue_mask = create_mask(img, BLUE, tolerance=30) // 255
        white_mask = create_mask(img, WHITE, tolerance=30) // 255

        # Rule 1: White pixel with magenta above AND blue below → becomes magenta
        magenta_top_kernel = np.array([
            [1, 1, 1],
            [0, 0, 0],
            [0, 0, 0]
        ], dtype=np.uint8)

        blue_bottom_kernel = np.array([
            [0, 0, 0],
            [0, 0, 0],
            [1, 1, 1]
        ], dtype=np.uint8)

        magenta_top_neighbors = cv2.filter2D(magenta_mask, -1, magenta_top_kernel)
        blue_bottom_neighbors = cv2.filter2D(blue_mask, -1, blue_bottom_kernel)

        white_pixels = (white_mask == 1)
        condition_fill = (magenta_top_neighbors >= 2) & (blue_bottom_neighbors >= 2)
        img[np.logical_and(white_pixels, condition_fill)] = MAGENTA

        # Rule 2: Magenta pixel with many white + some blue → becomes white
        kernel = np.ones((3, 3), np.uint8)
        white_neighbors = cv2.filter2D(white_mask, -1, kernel)
        blue_neighbors = cv2.filter2D(blue_mask, -1, kernel)

        magenta_pixels = (magenta_mask == 1)
        condition_empty = np.logical_and(white_neighbors >= 4, blue_neighbors >= 1)
        img[np.logical_and(magenta_pixels, condition_empty)] = WHITE

        # --- Step 3: Keep only largest magenta region (remove little blobs) ---
        post_magenta_mask = create_mask(img, MAGENTA, 30)
        num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(post_magenta_mask)
        if num_labels > 1:
            largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
            cleaned_mask = (labels == largest_label).astype(np.uint8) * 255

            # Reset all magenta to white
            img[post_magenta_mask > 0] = WHITE
            # Restore only the main droplet
            img[cleaned_mask == 255] = MAGENTA

        # --- Save result ---
        Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).save(save_path)

print("Filled droplet, applied neighborhood rules, and removed small blobs.")

Filled droplet, applied neighborhood rules, and removed small blobs.


## Step 6: Dataset preparation: U-Net / DeepLabV3+ / YOLACT (Object Segmentation)

These models use:
- 📄 1 image = input image
- 🎨 1 label mask = multi-class

We will use 1 grayscale mask with 3 class labels:
- 0 = background
- 1 = droplet
- 2 = surface

In [7]:
import os
import cv2
import numpy as np
from PIL import Image

input_folder = 'masked_data'
output_folder = 'segmentation_masks'

os.makedirs(output_folder, exist_ok=True)

# BGR colors
MAGENTA = np.array([255, 0, 255])
BLUE = np.array([255, 0, 0])
WHITE = np.array([255, 255, 255])

def create_mask(img, color, tolerance=30):
    lower = np.clip(color - tolerance, 0, 255)
    upper = np.clip(color + tolerance, 0, 255)
    return cv2.inRange(img, lower, upper)

for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(input_folder, filename)
        img = cv2.imread(img_path)

        # Create grayscale mask
        mask = np.zeros(img.shape[:2], dtype=np.uint8)

        droplet_mask = create_mask(img, MAGENTA) > 0
        surface_mask = create_mask(img, BLUE) > 0
        # print(f"{filename} → droplet: {np.sum(droplet_mask)}, surface: {np.sum(surface_mask)}")

        mask[droplet_mask] = 1  # class 1: droplet
        mask[surface_mask] = 2  # class 2: surface
        # background stays 0

        out_path = os.path.join(output_folder, os.path.splitext(filename)[0] + "_mask.png")
        Image.fromarray(mask).save(out_path)

print("Grayscale segmentation masks saved in 'segmentation_masks'")


Grayscale segmentation masks saved in 'segmentation_masks'


#### _Prepare binary masks for droplet and surface_

In [8]:
import os
import numpy as np
from PIL import Image

input_folder = 'segmentation_masks'
droplet_output = 'droplet_masks'
surface_output = 'surface_masks'

os.makedirs(droplet_output, exist_ok=True)
os.makedirs(surface_output, exist_ok=True)

for filename in os.listdir(input_folder):
    if filename.endswith('_mask.png'):
        path = os.path.join(input_folder, filename)
        mask = np.array(Image.open(path))

        # Droplet mask: white (255) where mask == 1
        droplet_mask = np.where(mask == 1, 255, 0).astype(np.uint8)
        Image.fromarray(droplet_mask).save(os.path.join(droplet_output, filename))

        # Surface mask: white (255) where mask == 2
        surface_mask = np.where(mask == 2, 255, 0).astype(np.uint8)
        Image.fromarray(surface_mask).save(os.path.join(surface_output, filename))

print("Binary droplet/surface masks saved with white foreground")


Binary droplet/surface masks saved with white foreground


## Step 7: Dataset preparation: YOLO / R-CNN / Faster R-CNN (Object Detection)

These models need:
- 📄 Bounding boxes around each object
- 🏷️ Separate .txt or .xml (YOLO/COCO/VOC) label files

### Prepare YOLO
**Class IDs are converted**: 1 → 0 for droplet, 2 → 1 for surface (YOLO expects 0-based IDs)

In [None]:
import os
import numpy as np
from PIL import Image

label_mask_folder = 'segmentation_masks'
image_folder = 'data'
output_label_folder = 'yolo_labels'

os.makedirs(output_label_folder, exist_ok=True)

# Image size is needed for normalization
for filename in os.listdir(label_mask_folder):
    if filename.endswith('_mask.png'):
        mask_path = os.path.join(label_mask_folder, filename)
        img_name = filename.replace('_mask.png', '.png')
        img_path = os.path.join(image_folder, img_name)

        # Load grayscale mask and original image to get size
        mask = np.array(Image.open(mask_path))
        img = Image.open(img_path)
        img_w, img_h = img.size

        annotations = []

        for class_id in [1, 2]:  # droplet and surface
            # Find where this class is in the mask
            coords = np.column_stack(np.where(mask == class_id))
            if coords.size == 0:
                continue  # no object of this class

            y_min, x_min = coords.min(axis=0)
            y_max, x_max = coords.max(axis=0)

            # Convert to YOLO format (normalized center x/y, width, height)
            x_center = ((x_min + x_max) / 2) / img_w
            y_center = ((y_min + y_max) / 2) / img_h
            box_width = (x_max - x_min) / img_w
            box_height = (y_max - y_min) / img_h

            annotations.append(f"{class_id - 1} {x_center:.6f} {y_center:.6f} {box_width:.6f} {box_height:.6f}")

        # Write .txt label
        label_path = os.path.join(output_label_folder, img_name.replace('.png', '.txt'))
        with open(label_path, 'w') as f:
            f.write("\n".join(annotations))

print("YOLO labels generated in 'yolo_labels'")


YOLO labels generated in 'yolo_labels'


### Convert masks into COCO format for R-CNN / Faster R-CNN

In [None]:
import os
import numpy as np
import json
from PIL import Image
from tqdm import tqdm

label_mask_folder = 'segmentation_masks'   # grayscale masks with 0/1/2
image_folder = 'data'               # original RGB images
output_json = 'coco_annotations.json'

# COCO structure
coco = {
    "images": [],
    "annotations": [],
    "categories": [
        {"id": 1, "name": "droplet"},
        {"id": 2, "name": "surface"}
    ]
}

annotation_id = 1
image_id = 1

for filename in tqdm(os.listdir(label_mask_folder)):
    if not filename.endswith('_mask.png'):
        continue

    mask_path = os.path.join(label_mask_folder, filename)
    img_name = filename.replace('_mask.png', '.png')
    img_path = os.path.join(image_folder, img_name)

    # Load
    mask = np.array(Image.open(mask_path))
    img = Image.open(img_path)
    width, height = img.size

    # Add image info
    coco['images'].append({
        "file_name": img_name,
        "height": height,
        "width": width,
        "id": image_id
    })

    for class_id in [1, 2]:  # droplet, surface
        positions = np.column_stack(np.where(mask == class_id))
        if positions.size == 0:
            continue

        y_min, x_min = positions.min(axis=0)
        y_max, x_max = positions.max(axis=0)
        box_width = x_max - x_min
        box_height = y_max - y_min

        # COCO expects [x, y, width, height]
        coco['annotations'].append({
            "id": annotation_id,
            "image_id": image_id,
            "category_id": class_id,
            "bbox": [int(x_min), int(y_min), int(box_width), int(box_height)],
            "area": int(box_width * box_height),
            "iscrowd": 0
        })
        annotation_id += 1

    image_id += 1

# Save
with open(output_json, 'w') as f:
    json.dump(coco, f, indent=4)

print(f"COCO JSON annotations saved to {output_json}")

100%|██████████| 120/120 [00:02<00:00, 54.86it/s]

COCO JSON annotations saved to coco_annotations.json





### _Save all the data_

In [None]:
!zip -r processed_data.zip . -x "*/sample_data/*"