### Load Data

conda activate preprocess_data

In [35]:
import glob
import sys
import os
import re
import numpy as np
import tifffile as tiff
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import random

folder_path = r"C:\Users\durrlab-asong\OneDrive - Johns Hopkins\MUSE-BIT\MUSE_BIT_HE_Preprocessing_Code\20251119_MUSE-BIT_preprocessing_training_data"
sys.path.append(folder_path)
from processing_functions import *


## Break up BIT and MUSE images into training patches. 

### Load IN FOCUS MUSE BIT DATA

In [48]:
# Load files
root = r'C:\Users\durrlab-asong\OneDrive - Johns Hopkins\MUSE-BIT\20250721_duodenum_JH09118690\MUSE-BIT\duo_5_submucosa\in_focus'
BIT_tif_files = glob.glob(os.path.join(root, '*BIT*.tif'))
MUSE_Blue_tif_files = glob.glob(os.path.join(root, '*reg_MUSE_blue*.tif'))
MUSE_Green_tif_files = glob.glob(os.path.join(root, '*MUSE_green*.tif'))

data_root = os.path.join(root, 'crop')
os.makedirs(data_root, exist_ok=True)

print("Number of BIT files found:", len(BIT_tif_files))
print("Number of MUSE Blue files found:", len(MUSE_Blue_tif_files))
print("Number of MUSE Green files found:", len(MUSE_Green_tif_files))

Number of BIT files found: 38
Number of MUSE Blue files found: 38
Number of MUSE Green files found: 38


In [49]:
# Index by (X, Y, Z)
bit_dict = index_files_by_xyz(BIT_tif_files)
blue_dict = index_files_by_xyz(MUSE_Blue_tif_files)
green_dict = index_files_by_xyz(MUSE_Green_tif_files)

# Find common keys (i.e., matching (X, Y, Z))
common_keys = set(bit_dict) & set(blue_dict) & set(green_dict)

# Sort for consistency
common_keys = sorted(common_keys)

# Create matched trios
matched_trios = [(bit_dict[k], blue_dict[k], green_dict[k]) for k in common_keys]
print(len(matched_trios), "matched trios found")

38 matched trios found


# Crop Images and Save

In [50]:
img_width = 2560
img_height = 2160

crop_width = 2000
crop_height = 1900

x0 = (img_width - crop_width) // 2   # 180
y0 = (img_height - crop_height) // 2 # 180

top_left = (x0, y0)                  # (180, 180)
bottom_right = (x0 + crop_width, y0 + crop_height)  # (2, 1979)


clahe_clipLimit=2.0
clahe_tileGridSize=8
clahe = cv2.createCLAHE(clipLimit=clahe_clipLimit, tileGridSize=(clahe_tileGridSize, clahe_tileGridSize))
if len(BIT_tif_files) == len(MUSE_Blue_tif_files) and len(BIT_tif_files) == len(MUSE_Green_tif_files):

    for idx, trio in enumerate(matched_trios):
        xyz0 = extract_xyz(trio[0])
        xyz1 = extract_xyz(trio[1])
        xyz2 = extract_xyz(trio[2])

        if xyz0 == xyz1 == xyz2:
            print("✅ All images in the trio have the same X, Y, Z:", xyz0)
        else:
            print("❌ Trio mismatch:")

        print("BIT: ", trio[0])
        print("Blue:", trio[1])
        print("Green:", trio[2])

        seed_val = random.randint(1, 1000)
        BIT_img = crop_image(tiff.imread(trio[0]), top_left, bottom_right)
        MUSE_blue_img = crop_image(tiff.imread(trio[1]), top_left, bottom_right)
        MUSE_green_img = crop_image(tiff.imread(trio[2]), top_left, bottom_right)

        # Flat field correct
        BIT_img = flatfield_correct(BIT_img, sigma=50)
        MUSE_blue_img = flatfield_correct(MUSE_blue_img, sigma=50)
        MUSE_green_img = flatfield_correct(MUSE_green_img, sigma=50)

        # Recast to uint16 after normalization
        BIT_img = cv2.normalize(BIT_img, None, 0, 65535, cv2.NORM_MINMAX).astype(np.uint16)
        MUSE_blue_img = cv2.normalize(MUSE_blue_img, None, 0, 65535, cv2.NORM_MINMAX).astype(np.uint16)
        MUSE_green_img = cv2.normalize(MUSE_green_img, None, 0, 65535, cv2.NORM_MINMAX).astype(np.uint16)

        # Median Filter to reduce granularity
        medfilter_kernel_size = 3
        BIT_img = cv2.medianBlur(BIT_img, medfilter_kernel_size)
        MUSE_blue_img = cv2.medianBlur(MUSE_blue_img, medfilter_kernel_size)
        MUSE_green_img = cv2.medianBlur(MUSE_green_img, medfilter_kernel_size)

        # CLAHE Contrast Enhancement
        BIT_img = clahe.apply(BIT_img)
        MUSE_blue_img = clahe.apply(MUSE_blue_img)
        MUSE_green_img = clahe.apply(MUSE_green_img)

        # Patch saturated pixels
        #BIT_img = patch_saturated_pixels(BIT_img, saturation_value=90, neighborhood_size=5)

        # Normalize from 0 to 255 and convert to uint8 for saving as tif
        #BIT_img = cv2.normalize(BIT_img, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
        #MUSE_blue_img = cv2.normalize(MUSE_blue_img, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
        #MUSE_green_img = cv2.normalize(MUSE_green_img, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)

        BIT_base = os.path.basename(trio[0])
        MUSE_Blue_base = os.path.basename(trio[1])
        MUSE_Green_base = os.path.basename(trio[2])


        Image.fromarray(BIT_img).save(os.path.join(data_root, f'crop_flat_clahe_{BIT_base}'))
        Image.fromarray(MUSE_blue_img).save(os.path.join(data_root, f'crop_flat_clahe_{MUSE_Blue_base}'))
        Image.fromarray(MUSE_green_img).save(os.path.join(data_root, f'crop_flat_clahe_{MUSE_Green_base}'))  


crop_params = {
    "top_left": top_left,
    "bottom_right": bottom_right
}

with open(os.path.join(data_root, "crop_params.txt"), "w") as f:
    for key, value in crop_params.items():
        f.write(f"{key}: {value}\n")


        

✅ All images in the trio have the same X, Y, Z: (0, 0, 2)
BIT:  C:\Users\durrlab-asong\OneDrive - Johns Hopkins\MUSE-BIT\20250721_duodenum_JH09118690\MUSE-BIT\duo_5_submucosa\in_focus\BIT_duo_5_X=0_Y=0_Z=2_expTime=7ms.tif
Blue: C:\Users\durrlab-asong\OneDrive - Johns Hopkins\MUSE-BIT\20250721_duodenum_JH09118690\MUSE-BIT\duo_5_submucosa\in_focus\reg_MUSE_blue_duo_5_X=0_Y=0_Z=2_expTime=2000ms.tif
Green: C:\Users\durrlab-asong\OneDrive - Johns Hopkins\MUSE-BIT\20250721_duodenum_JH09118690\MUSE-BIT\duo_5_submucosa\in_focus\MUSE_green_duo_5_X=0_Y=0_Z=2_expTime=2000ms.tif
✅ All images in the trio have the same X, Y, Z: (0, 1, 3)
BIT:  C:\Users\durrlab-asong\OneDrive - Johns Hopkins\MUSE-BIT\20250721_duodenum_JH09118690\MUSE-BIT\duo_5_submucosa\in_focus\BIT_duo_5_X=0_Y=1_Z=3_expTime=6ms.tif
Blue: C:\Users\durrlab-asong\OneDrive - Johns Hopkins\MUSE-BIT\20250721_duodenum_JH09118690\MUSE-BIT\duo_5_submucosa\in_focus\reg_MUSE_blue_duo_5_X=0_Y=1_Z=3_expTime=2000ms.tif
Green: C:\Users\durrlab-aso