In [25]:
import cv2
from preprocess import crop, padder, crop_to_coordinates
from patchify import patchify
import os
import numpy as np
import matplotlib.pyplot as plt

patch_size = 256

def patch_dataset(dataset_path, patched_dataset_path):
    subdirs = ['train']
    
    for subdir in subdirs:
        # Create the corresponding patched directory
        patched_subdir = os.path.join(patched_dataset_path, subdir)

        # Get the full path to the original subdirectory
        original_subdir = os.path.join(dataset_path, subdir)
        original_subdir = original_subdir + '/images/images'
        
        # Iterate over all files in the original subdirectory
        for filename in os.listdir(original_subdir):
            if filename.endswith('.png'):  # Check for image files
                # Read the image
                image_path = os.path.join(original_subdir, filename)
                image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

                mask_path = image_path.replace('.png', '.tif').replace('images/images', 'masks/masks')
                filename_mask = filename.replace('.png', '.tif')
                mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

                if mask is None:
                    print(f"Mask not found: {filename_mask}. Skipping...")
                    continue
                
                # Preprocess the image
                image, stats, centroids = crop(image, 5)
                image = padder(image, patch_size)
                
                # Patch the image
                patches = patchify(image, (patch_size, patch_size), step=patch_size)
                patches = patches.reshape(-1, patch_size, patch_size, 1)

                # Save the patches
                for i in range(patches.shape[0]):
                    patch = patches[i, :, :]
                    patch_filename = f"{os.path.splitext(filename)[0]}_{i:02}.png"
                    path = patched_subdir + '/images/images/' + patch_filename
                    print(path)
                    cv2.imwrite(path, patch)

                # Time for some mask preprocessing
                mask = crop_to_coordinates(mask, stats, centroids)
                mask = padder(mask, patch_size)

                #Getting them mask patches
                mask_patches = patchify(mask, (patch_size, patch_size), step=patch_size)
                mask_patches = mask_patches.reshape(-1, patch_size, patch_size, 1)

                # Changing the directory where the patches should be saved
                mask_patched_subdir = patched_subdir.replace('images/images', 'masks/masks')

                # Saving patches
                for i in range(mask_patches.shape[0]):
                    mask_patch = mask_patches[i, :, :]
                    patch_filename = f"{os.path.splitext(filename_mask)[0]}_{i:02}.tif"
                    path = patched_subdir + '/masks/masks/' + patch_filename
                    print(path)
                    cv2.imwrite(path, mask_patch)
                
        print("Done with a directory")


patch_dataset('data_v3_unprocessed', 'data_v3_processed')

data_v3_processed/train/images/images/28_10_07_00.png
data_v3_processed/train/images/images/28_10_07_01.png
data_v3_processed/train/images/images/28_10_07_02.png
data_v3_processed/train/images/images/28_10_07_03.png
data_v3_processed/train/images/images/28_10_07_04.png
data_v3_processed/train/images/images/28_10_07_05.png
data_v3_processed/train/images/images/28_10_07_06.png
data_v3_processed/train/images/images/28_10_07_07.png
data_v3_processed/train/images/images/28_10_07_08.png
data_v3_processed/train/images/images/28_10_07_09.png
data_v3_processed/train/images/images/28_10_07_10.png
data_v3_processed/train/images/images/28_10_07_11.png
data_v3_processed/train/images/images/28_10_07_12.png
data_v3_processed/train/images/images/28_10_07_13.png
data_v3_processed/train/images/images/28_10_07_14.png
data_v3_processed/train/images/images/28_10_07_15.png
data_v3_processed/train/images/images/28_10_07_16.png
data_v3_processed/train/images/images/28_10_07_17.png
data_v3_processed/train/imag

In [6]:
### METHOD IF ANNOTATING DATA WITH MODEL PREDICTIONS ###

import cv2
from preprocess import crop, padder, crop_to_coordinates
from patchify import patchify
import os
import numpy as np
import matplotlib.pyplot as plt

patch_size = 256

def patch_dataset(dataset_path, patched_dataset_path):
    
    # Iterate over all files in the original subdirectory
    for filename in os.listdir(dataset_path):
        if filename.endswith('.png'):  # Check for image files
            # Read the image
            image_path = os.path.join(dataset_path, filename)
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

            mask_path = image_path.replace('.png', '.tif')
            filename_mask = filename.replace('.png', '.tif')
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

            if mask is None:
                print(f"Mask not found: {filename_mask}. Skipping...")
                continue
            
            # Preprocess the image
            image, stats, centroids = crop(image, 5)
            image = padder(image, patch_size)
            
            # Patch the image
            patches = patchify(image, (patch_size, patch_size), step=patch_size)
            patches = patches.reshape(-1, patch_size, patch_size, 1)
            print(patches.shape)
            

            # Save the patches
            for i in range(patches.shape[0]):
                patch = patches[i, :, :]
                patch_filename = f"{os.path.splitext(filename)[0]}_{i:02}.png"
                #cv2.imwrite(os.path.join(patched_dataset_path, patch_filename), patch)

            #Getting them mask patches
            mask_patches = patchify(mask, (patch_size, patch_size), step=patch_size)
            mask_patches = mask_patches.reshape(-1, patch_size, patch_size, 1)
            print(mask_patches.shape)

            # Saving patches
            for i in range(mask_patches.shape[0]):
                mask_patch = mask_patches[i, :, :]
                patch_filename = f"{os.path.splitext(filename_mask)[0]}_{i:02}.tif"
                #cv2.imwrite(os.path.join(patched_dataset_path, patch_filename), mask_patch)

    print("Done with a directory")


patch_dataset('images', 'data_v2_processed')

(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 256, 1)
(49, 256, 

In [27]:
# Path to the current image folder
import shutil
base_dir = 'data_v2_processed/test/images/images'
target_dir = 'data_v3_processed/test/images/images'

# Create the target directory if it doesn't exist
os.makedirs(target_dir, exist_ok=True)

# Move all .png files into the new subfolder
for filename in os.listdir(base_dir):
    if filename.lower().endswith('.png'):
        src = os.path.join(base_dir, filename)
        dst = os.path.join(target_dir, filename)
        shutil.move(src, dst)

print("All PNG files moved successfully.")

All PNG files moved successfully.


In [37]:
import shutil
import os

source_dir = "data_v3_processed"   # The full dataset directory
backup_dir = "data_v3_backup"      # Backup destination

# If backup_dir already exists, remove it first or use dirs_exist_ok=True (Python 3.8+)
shutil.copytree(source_dir, backup_dir, dirs_exist_ok=True)

print(f"Backup completed: {backup_dir}")


Backup completed: data_v3_backup


In [38]:
import os
import glob
import shutil
import numpy as np
import cv2
import matplotlib.pyplot as plt

# ==============================
# CONFIG
# ==============================
IMAGE_DIR = 'data_v3_processed/train/images/images'
MASK_DIR  = 'data_v3_processed/train/masks/masks'

DAY_TO_CHECK   = '15'     # day used to decide keep vs drop
IMAGE_EXT      = 'png'    # extension for images
MASK_EXT       = 'tif'    # extension for masks

SHOW_IMAGES    = False     # preview one image+mask for each removed patch
CONFIRM_DELETE = True    # set True to actually move/delete files
BACKUP_DIR     = None     # e.g. 'quarantine_empty_patches' to move instead of delete


# ==============================
# HELPERS
# ==============================
def parse_filename(fname):
    """
    Expect pattern: EXP_PLATE_DAY_PATCH.ext  (e.g., 28_01_15_03.tif)
    Returns (experiment, plate, day, patch).
    """
    name, _ = os.path.splitext(fname)
    parts = name.split('_')
    if len(parts) != 4:
        raise ValueError(f"Unexpected filename pattern: {fname}")
    return parts[0], parts[1], parts[2], parts[3]


def collect_files_for_patch(experiment, plate, patch):
    """Return all image+mask filepaths across *all* days for the given patch."""
    img_pattern  = os.path.join(IMAGE_DIR, f"{experiment}_{plate}_*_{patch}.{IMAGE_EXT}")
    mask_pattern = os.path.join(MASK_DIR,  f"{experiment}_{plate}_*_{patch}.{MASK_EXT}")
    return sorted(glob.glob(img_pattern)) + sorted(glob.glob(mask_pattern))


def show_example(files):
    """Show first image + first mask found in files."""
    img_arr = None
    mask_arr = None
    for f in files:
        if f.endswith(f".{IMAGE_EXT}") and img_arr is None:
            img_arr = plt.imread(f)
        if f.endswith(f".{MASK_EXT}") and mask_arr is None:
            mask_arr = cv2.imread(f)
        if img_arr is not None and mask_arr is not None:
            break
    if img_arr is None and mask_arr is None:
        return
    plt.figure(figsize=(8, 4))
    if img_arr is not None:
        plt.subplot(1, 2, 1)
        plt.imshow(img_arr, cmap='gray')
        plt.title("Image Example")
        plt.axis('off')
    if mask_arr is not None:
        plt.subplot(1, 2, 2)
        plt.imshow(mask_arr, cmap='gray')
        plt.title(f"Mask Example (Day {DAY_TO_CHECK})")
        plt.axis('off')
    plt.show()


def ensure_backup_subdirs():
    if BACKUP_DIR is None:
        return
    os.makedirs(BACKUP_DIR, exist_ok=True)
    os.makedirs(os.path.join(BACKUP_DIR, "images"), exist_ok=True)
    os.makedirs(os.path.join(BACKUP_DIR, "masks"), exist_ok=True)


def backup_or_delete(filepath):
    """Move to BACKUP_DIR preserving relative subdir; else delete."""
    if BACKUP_DIR is not None:
        ensure_backup_subdirs()
        rel = None
        if filepath.startswith(os.path.abspath(IMAGE_DIR)) or os.path.basename(os.path.dirname(filepath)) == os.path.basename(IMAGE_DIR):
            rel = os.path.join("images", os.path.basename(filepath))
        elif filepath.startswith(os.path.abspath(MASK_DIR)) or os.path.basename(os.path.dirname(filepath)) == os.path.basename(MASK_DIR):
            rel = os.path.join("masks", os.path.basename(filepath))
        else:
            # fallback: just dump in root
            rel = os.path.basename(filepath)
        dest = os.path.join(BACKUP_DIR, rel)
        shutil.move(filepath, dest)
    else:
        os.remove(filepath)


# ==============================
# STEP 1: Find all masks for decision day
# ==============================
pattern_day = os.path.join(MASK_DIR, f"*_*_{DAY_TO_CHECK}_*.{MASK_EXT}")
day_masks = sorted(glob.glob(pattern_day))

empty_keys = set()

# ==============================
# STEP 2: Identify empty patches (no positive pixel) at decision day
# ==============================
for mask_path in day_masks:
    fname = os.path.basename(mask_path)
    experiment, plate, day, patch = parse_filename(fname)
    mask = cv2.imread(mask_path)
    if np.all(mask == 0):
        empty_keys.add((experiment, plate, patch))

print(f"Found {len(empty_keys)} empty patches (no positive pixels on day {DAY_TO_CHECK}).")

# ==============================
# STEP 3: Preview files to be removed
# ==============================
patch_to_files = {}
total_file_count = 0

for (experiment, plate, patch) in sorted(empty_keys):
    files = collect_files_for_patch(experiment, plate, patch)
    patch_to_files[(experiment, plate, patch)] = files
    total_file_count += len(files)

    print(f"\n--- PATCH {experiment}_{plate}_xx_{patch} ---")
    if files:
        print("Files that would be removed:")
        for f in files:
            print(f"  {f}")
    else:
        print("  (No files found — nothing to remove?)")

    if SHOW_IMAGES:
        show_example(files)

print(f"\nTotal files that would be removed: {total_file_count}")

# ==============================
# STEP 4: Optional deletion / backup
# ==============================
if CONFIRM_DELETE:
    print("\nCONFIRM_DELETE=True -> removing files...")
    removed = 0
    for files in patch_to_files.values():
        for f in files:
            if os.path.exists(f):
                backup_or_delete(f)
                removed += 1
    print(f"Done. Removed {removed} files{' (moved to backup)' if BACKUP_DIR else ''}.")
else:
    print("\nDry run only. No files were removed. Set CONFIRM_DELETE = True to proceed.")


Found 271 empty patches (no positive pixels on day 15).

--- PATCH 28_01_xx_00 ---
Files that would be removed:
  data_v3_processed/train/images/images/28_01_01_00.png
  data_v3_processed/train/images/images/28_01_02_00.png
  data_v3_processed/train/images/images/28_01_03_00.png
  data_v3_processed/train/images/images/28_01_04_00.png
  data_v3_processed/train/images/images/28_01_05_00.png
  data_v3_processed/train/images/images/28_01_06_00.png
  data_v3_processed/train/images/images/28_01_07_00.png
  data_v3_processed/train/images/images/28_01_08_00.png
  data_v3_processed/train/images/images/28_01_09_00.png
  data_v3_processed/train/images/images/28_01_10_00.png
  data_v3_processed/train/images/images/28_01_11_00.png
  data_v3_processed/train/images/images/28_01_12_00.png
  data_v3_processed/train/images/images/28_01_13_00.png
  data_v3_processed/train/images/images/28_01_14_00.png
  data_v3_processed/train/images/images/28_01_15_00.png
  data_v3_processed/train/masks/masks/28_01_01_0