2D_Train_Validation

In [None]:
import os
import re
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Regular expressions for extracting parts from file paths
GET_CASE_AND_DATE = re.compile(r"case[0-9]{1,3}_day[0-9]{1,3}")
GET_SLICE_NUM = re.compile(r"slice_[0-9]{1,4}")
IMG_SHAPE = re.compile(r"_[0-9]{1,3}_[0-9]{1,3}_")

# Define classes for image segmentation
CLASSES = ["large_bowel", "small_bowel", "stomach"]

# Mapping from class ID to RGB color
color2id = {(0, 0, 0): 0, (0, 0, 255): 1, (0, 255, 0): 2, (255, 0, 0): 3}
id2color = {v: k for k, v in color2id.items()}

def get_folder_files(folder_path, only_ids):
    print(f"Searching in folder: {folder_path}")  # Debugging statement
    relevant_imgs = []
    img_ids = []
    for dirpath, _, files in os.walk(folder_path):
        for filename in files:
            src_file_path = os.path.join(dirpath, filename)
            try:
                case_day = GET_CASE_AND_DATE.search(src_file_path).group()
                slice_id = GET_SLICE_NUM.search(src_file_path).group()
                image_id = f"{case_day}_{slice_id}"
                if image_id in only_ids:
                    relevant_imgs.append(src_file_path)
                    img_ids.append(image_id)
            except AttributeError:
                continue  # Skip files that do not match the pattern
    print(f"Found {len(relevant_imgs)} relevant images.")  # Debugging statement
    return relevant_imgs, img_ids

def rgb_to_onehot_to_gray(rgb_arr, color_map=id2color):
    num_classes = len(color_map)
    shape = rgb_arr.shape[:2] + (num_classes,)
    arr = np.zeros(shape, dtype=np.float32)

    for i, cls in enumerate(color_map):
        arr[:, :, i] = np.all(rgb_arr.reshape((-1, 3)) == color_map[i], axis=1).reshape(shape[:2])
    
    return arr.argmax(-1)

def rle_decode(mask_rle, shape):
    """Decode run-length encoding into a binary mask."""
    s = np.fromstring(mask_rle, dtype=int, sep=' ')
    starts, lengths = s[0::2] - 1, s[1::2]
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

def load_img(img_path):
    """Load and normalize an image from a file path."""
    img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED).astype(np.float32)
    img = (img - img.min()) / (img.max() - img.min()) * 255.0
    img = img.astype(np.uint8)
    return np.tile(img[..., None], 3)  # Convert grayscale to RGB

def create_and_write_img_msk(file_paths, file_ids, save_img_dir, save_msk_dir, main_df, mask_rgb):
    """Process and save images and masks from file paths."""
    for file_path, file_id in tqdm(zip(file_paths, file_ids), total=len(file_ids)):
        image = load_img(file_path)
        img_df = main_df[main_df["id"] == file_id]
        img_shape = tuple(map(int, IMG_SHAPE.search(file_path).group()[1:-1].split('_')))[::-1]
        mask_image = np.zeros(img_shape + (3,), dtype=np.uint8)

        for i, class_name in enumerate(CLASSES):
            class_rows = img_df[img_df["class"] == class_name]
            if not class_rows.empty:
                rle = class_rows.iloc[0]['segmentation']
                mask_image[..., i] = rle_decode(rle, img_shape) * 255

        gray_mask = rgb_to_onehot_to_gray(mask_image)
        filename = GET_CASE_AND_DATE.search(file_path).group() + "_" + os.path.basename(file_path)
        
        cv2.imwrite(os.path.join(save_img_dir, filename), image)
        cv2.imwrite(os.path.join(save_msk_dir, filename), gray_mask)

        if mask_rgb:
            color_mask_dir = f"{save_msk_dir}_rgb"
            os.makedirs(color_mask_dir, exist_ok=True)
            cv2.imwrite(os.path.join(color_mask_dir, filename), mask_image)

def main(csv_path, root_dir, mask_rgb=False):
    np.random.seed(42)
    df = pd.read_csv(csv_path).dropna()
    ids = df['id'].unique()

    orig_img_dir = os.path.join(root_dir, "Backup", "train")
    case_folders = os.listdir(orig_img_dir)
    
    train_img_dir = os.path.join(root_dir, "train", "images")
    train_msk_dir = os.path.join(root_dir, "train", "masks")
    valid_img_dir = os.path.join(root_dir, "valid", "images")
    valid_msk_dir = os.path.join(root_dir, "valid", "masks")
    
    for path in [train_img_dir, train_msk_dir, valid_img_dir, valid_msk_dir]:
        os.makedirs(path, exist_ok=True)

    for folder in case_folders:
        folder_path = os.path.join(orig_img_dir, folder)
        imgs, img_ids = get_folder_files(folder_path, ids)
        if imgs:  # Check if there are images to process
            train_imgs, valid_imgs, train_ids, valid_ids = train_test_split(imgs, img_ids, train_size=0.8, random_state=42)
            create_and_write_img_msk(train_imgs, train_ids, train_img_dir, train_msk_dir, df, mask_rgb)
            create_and_write_img_msk(valid_imgs, valid_ids, valid_img_dir, valid_msk_dir, df, mask_rgb)
        else:
            print(f"No images to process in {folder_path}")  # Debugging statement

if __name__ == "__main__":
    main(csv_path='/Users/probio/Downloads/Medical_seg/Backup/train.csv', root_dir='/Users/probio/Downloads/Medical_seg/', mask_rgb=True)


2D_70train_10Valid_5Test

In [None]:
import os
import re
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Regular expressions for extracting parts from file paths
GET_CASE_AND_DATE = re.compile(r"case[0-9]{1,3}_day[0-9]{1,3}")
GET_SLICE_NUM = re.compile(r"slice_[0-9]{1,4}")
IMG_SHAPE = re.compile(r"_[0-9]{1,3}_[0-9]{1,3}_")

# Define classes for image segmentation
CLASSES = ["large_bowel", "small_bowel", "stomach"]

# Mapping from class ID to RGB color
color2id = {(0, 0, 0): 0, (0, 0, 255): 1, (0, 255, 0): 2, (255, 0, 0): 3}
id2color = {v: k for k, v in color2id.items()}

def get_folder_files(folder_path, only_ids):
    print(f"Searching in folder: {folder_path}")  # Debugging statement
    relevant_imgs = []
    img_ids = []
    for dirpath, _, files in os.walk(folder_path):
        for filename in files:
            src_file_path = os.path.join(dirpath, filename)
            try:
                case_day = GET_CASE_AND_DATE.search(src_file_path).group()
                slice_id = GET_SLICE_NUM.search(src_file_path).group()
                image_id = f"{case_day}_{slice_id}"
                if image_id in only_ids:
                    relevant_imgs.append(src_file_path)
                    img_ids.append(image_id)
            except AttributeError:
                continue  # Skip files that do not match the pattern
    print(f"Found {len(relevant_imgs)} relevant images.")  # Debugging statement
    return relevant_imgs, img_ids


def rle_decode(mask_rle, shape):
    """Decode run-length encoding into a binary mask."""
    s = np.fromstring(mask_rle, dtype=int, sep=' ')
    starts, lengths = s[0::2] - 1, s[1::2]
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

def rgb_to_onehot_to_gray(rgb_arr, color_map=id2color):
    num_classes = len(color_map)
    shape = rgb_arr.shape[:2] + (num_classes,)
    arr = np.zeros(shape, dtype=np.float32)

    for i, cls in enumerate(color_map):
        arr[:, :, i] = np.all(rgb_arr.reshape((-1, 3)) == color_map[i], axis=1).reshape(shape[:2])
    
    return arr.argmax(-1)

def load_img(img_path):
    """Load and normalize an image from a file path."""
    img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED).astype(np.float32)
    img = (img - img.min()) / (img.max() - img.min()) * 255.0
    img = img.astype(np.uint8)
    return np.tile(img[..., None], 3)  # Convert grayscale to RGB

def create_and_write_img_msk(file_paths, file_ids, save_img_dir, save_msk_dir, main_df, mask_rgb):
    """Process and save images and masks from file paths."""
    for file_path, file_id in tqdm(zip(file_paths, file_ids), total=len(file_ids)):
        image = load_img(file_path)
        img_df = main_df[main_df["id"] == file_id]
        img_shape = tuple(map(int, IMG_SHAPE.search(file_path).group()[1:-1].split('_')))[::-1]
        mask_image = np.zeros(img_shape + (3,), dtype=np.uint8)

        for i, class_name in enumerate(CLASSES):
            class_rows = img_df[img_df["class"] == class_name]
            if not class_rows.empty:
                rle = class_rows.iloc[0]['segmentation']
                mask_image[..., i] = rle_decode(rle, img_shape) * 255

        gray_mask = rgb_to_onehot_to_gray(mask_image)
        filename = GET_CASE_AND_DATE.search(file_path).group() + "_" + os.path.basename(file_path)
        
        cv2.imwrite(os.path.join(save_img_dir, filename), image)
        cv2.imwrite(os.path.join(save_msk_dir, filename), gray_mask)

        if mask_rgb:
            color_mask_dir = f"{save_msk_dir}_rgb"
            os.makedirs(color_mask_dir, exist_ok=True)
            cv2.imwrite(os.path.join(color_mask_dir, filename), mask_image)

def main(csv_path, root_dir, mask_rgb=False):
    np.random.seed(42)
    df = pd.read_csv(csv_path).dropna()
    ids = df['id'].unique()

    orig_img_dir = os.path.join(root_dir, "Backup", "train")
    case_folders = os.listdir(orig_img_dir)
    
    train_img_dir = os.path.join(root_dir, "train", "images")
    train_msk_dir = os.path.join(root_dir, "train", "masks")
    valid_img_dir = os.path.join(root_dir, "valid", "images")
    valid_msk_dir = os.path.join(root_dir, "valid", "masks")
    test_img_dir = os.path.join(root_dir, "test", "images")
    test_msk_dir = os.path.join(root_dir, "test", "masks")
    
    for path in [train_img_dir, train_msk_dir, valid_img_dir, valid_msk_dir, test_img_dir, test_msk_dir]:
        os.makedirs(path, exist_ok=True)

    # Assign train, validation, and test cases
    train_folders, temp_folders = train_test_split(case_folders, train_size=70/85, random_state=42)
    valid_folders, test_folders = train_test_split(temp_folders, train_size=10/15, random_state=42)

    # Process each set
    for folder_set, img_dir, msk_dir in [(train_folders, train_img_dir, train_msk_dir),
                                         (valid_folders, valid_img_dir, valid_msk_dir),
                                         (test_folders, test_img_dir, test_msk_dir)]:
        for folder in folder_set:
            folder_path = os.path.join(orig_img_dir, folder)
            imgs, img_ids = get_folder_files(folder_path, ids)
            if imgs:  # Check if there are images to process
                create_and_write_img_msk(imgs, img_ids, img_dir, msk_dir, df, mask_rgb)
            else:
                print(f"No images to process in {folder_path}")  # Debugging statement

if __name__ == "__main__":
    main(csv_path='/Users/probio/Downloads/Medical_seg/Backup/train.csv', root_dir='/Users/probio/Downloads/Medical_seg/', mask_rgb=True)


2D.5_70train_10Valid_5Test

### Spacing

Spacing and Depth: The spacing parameter allows you to define how far apart the slices are, while depth controls how many slices are included in each stack. Adjust these parameters to fit the specific requirements of your application.

#### With unlimited stacking number

In [None]:
import os
import re
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Regular expressions for extracting parts from file paths
GET_CASE_AND_DATE = re.compile(r"case[0-9]{1,3}_day[0-9]{1,3}")
GET_SLICE_NUM = re.compile(r"slice_[0-9]{1,4}")
IMG_SHAPE = re.compile(r"_[0-9]{1,3}_[0-9]{1,3}_")

# Define classes for image segmentation
CLASSES = ["large_bowel", "small_bowel", "stomach"]

# Mapping from class ID to RGB color
color2id = {(0, 0, 0): 0, (0, 0, 255): 1, (0, 255, 0): 2, (255, 0, 0): 3}
id2color = {v: k for k, v in color2id.items()}

def get_folder_files(folder_path, only_ids):
    print(f"Searching in folder: {folder_path}")  # Debugging statement
    relevant_imgs = []
    img_ids = []
    for dirpath, _, files in os.walk(folder_path):
        for filename in files:
            src_file_path = os.path.join(dirpath, filename)
            try:
                case_day = GET_CASE_AND_DATE.search(src_file_path).group()
                slice_id = GET_SLICE_NUM.search(src_file_path).group()
                image_id = f"{case_day}_{slice_id}"
                if image_id in only_ids:
                    relevant_imgs.append(src_file_path)
                    img_ids.append(image_id)
            except AttributeError:
                continue  # Skip files that do not match the pattern
    print(f"Found {len(relevant_imgs)} relevant images.")  # Debugging statement
    return relevant_imgs, img_ids


def rle_decode(mask_rle, shape):
    """Decode run-length encoding into a binary mask."""
    s = np.fromstring(mask_rle, dtype=int, sep=' ')
    starts, lengths = s[0::2] - 1, s[1::2]
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

def get_slice_paths(middle_path, depth, spacing):
    """Generate file paths for a 2.5D image stack."""
    base_dir = os.path.dirname(middle_path)
    middle_slice_index = int(GET_SLICE_NUM.search(middle_path).group().split('_')[-1])
    slice_paths = []
    for i in range(-(depth // 2), depth // 2 + 1):
        if i != 0:
            slice_num = middle_slice_index + i * spacing
            new_path = middle_path.replace(f'slice_{middle_slice_index}', f'slice_{slice_num}')
            if os.path.exists(new_path):
                slice_paths.append(new_path)
            else:
                slice_paths.append(None)  # Handle missing slices if needed
    return slice_paths

def load_img_2p5d(middle_path, depth=3, spacing=2):
    """Load and stack images for 2.5D processing."""
    slice_paths = get_slice_paths(middle_path, depth, spacing)
    img_shape = tuple(map(int, IMG_SHAPE.search(middle_path).group()[1:-1].split('_')))[::-1]
    stack = np.zeros(img_shape + (depth,))
    for i, path in enumerate(slice_paths):
        if path and os.path.exists(path):
            img = cv2.imread(path, cv2.IMREAD_UNCHANGED).astype(np.float32)
            img = (img - img.min()) / (img.max() - img.min()) * 255.0
            stack[..., i] = img.astype(np.uint8)
        else:
            stack[..., i] = np.zeros(img_shape)  # Filling missing slices with zeros
    return stack

def create_and_write_img_msk_2p5d(file_paths, file_ids, save_img_dir, save_msk_dir, main_df, mask_rgb, depth, spacing):
    """Process and save images and masks from file paths for 2.5D images."""
    for file_path, file_id in tqdm(zip(file_paths, file_ids), total=len(file_ids)):
        image_stack = load_img_2p5d(file_path, depth, spacing)
        img_df = main_df[main_df["id"] == file_id]
        img_shape = tuple(map(int, IMG_SHAPE.search(file_path).group()[1:-1].split('_')))[::-1]
        mask_image = np.zeros(img_shape + (3,), dtype=np.uint8)

        for i, class_name in enumerate(CLASSES):
            class_rows = img_df[img_df["class"] == class_name]
            if not class_rows.empty:
                rle = class_rows.iloc[0]['segmentation']
                mask_image[..., i] = rle_decode(rle, img_shape) * 255

        gray_mask = rgb_to_onehot_to_gray(mask_image)
        filename = GET_CASE_AND_DATE.search(file_path).group() + "_" + os.path.basename(file_path)
        
        cv2.imwrite(os.path.join(save_img_dir, filename), image_stack)
        cv2.imwrite(os.path.join(save_msk_dir, filename), gray_mask)

        if mask_rgb:
            color_mask_dir = f"{save_msk_dir}_rgb"
            os.makedirs(color_mask_dir, exist_ok=True)
            cv2.imwrite(os.path.join(color_mask_dir, filename), mask_image)

def main(csv_path, root_dir, mask_rgb=False, depth=3, spacing=2):
    np.random.seed(42)
    df = pd.read_csv(csv_path).dropna()
    ids = df['id'].unique()

    orig_img_dir = os.path.join(root_dir, "Backup", "train")
    case_folders = os.listdir(orig_img_dir)
    
    train_img_dir = os.path.join(root_dir, "train", "images")
    train_msk_dir = os.path.join(root_dir, "train", "masks")
    valid_img_dir = os.path.join(root_dir, "valid", "images")
    valid_msk_dir = os.path.join(root_dir, "valid", "masks")
    test_img_dir = os.path.join(root_dir, "test", "images")
    test_msk_dir = os.path.join(root_dir, "test", "masks")
    
    for path in [train_img_dir, train_msk_dir, valid_img_dir, valid_msk_dir, test_img_dir, test_msk_dir]:
        os.makedirs(path, exist_ok=True)

    # Assign train, validation, and test cases
    train_folders, temp_folders = train_test_split(case_folders, train_size=70/85, random_state=42)
    valid_folders, test_folders = train_test_split(temp_folders, train_size=10/15, random_state=42)

    # Process each set
    for folder_set, img_dir, msk_dir in [(train_folders, train_img_dir, train_msk_dir),
                                         (valid_folders, valid_img_dir, valid_msk_dir),
                                         (test_folders, test_img_dir, test_msk_dir)]:
        for folder in folder_set:
            folder_path = os.path.join(orig_img_dir, folder)
            imgs, img_ids = get_folder_files(folder_path, ids)
            if imgs:  # Check if there are images to process
                create_and_write_img_msk_2p5d(imgs, img_ids, img_dir, msk_dir, df, mask_rgb, depth, spacing)
            else:
                print(f"No images to process in {folder_path}")  # Debugging statement

if __name__ == "__main__":
    main(csv_path='/Users/probio/Downloads/Medical_seg/Backup/train.csv', root_dir='/Users/probio/Downloads/Medical_seg/', mask_rgb=True)


# RGB stacking

In [6]:
import os
import re
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Regular expressions for extracting parts from file paths
GET_CASE_AND_DATE = re.compile(r"case[0-9]{1,3}_day[0-9]{1,3}")
GET_SLICE_NUM = re.compile(r"slice_[0-9]{1,4}")
IMG_SHAPE = re.compile(r"_[0-9]{1,3}_[0-9]{1,3}_")

# Define classes for image segmentation
CLASSES = ["large_bowel", "small_bowel", "stomach"]

# Mapping from class ID to RGB color
color2id = {(0, 0, 0): 0, (0, 0, 255): 1, (0, 255, 0): 2, (255, 0, 0): 3}
id2color = {v: k for k, v in color2id.items()}

def get_folder_files(folder_path, only_ids):
    print(f"Searching in folder: {folder_path}")  # Debugging statement
    relevant_imgs = []
    img_ids = []
    for dirpath, _, files in os.walk(folder_path):
        for filename in files:
            src_file_path = os.path.join(dirpath, filename)
            try:
                case_day = GET_CASE_AND_DATE.search(src_file_path).group()
                slice_id = GET_SLICE_NUM.search(src_file_path).group()
                image_id = f"{case_day}_{slice_id}"
                if image_id in only_ids:
                    relevant_imgs.append(src_file_path)
                    img_ids.append(image_id)
            except AttributeError:
                continue  # Skip files that do not match the pattern
    print(f"Found {len(relevant_imgs)} relevant images.")  # Debugging statement
    return relevant_imgs, img_ids

def rle_decode(mask_rle, shape):
    """Decode run-length encoding into a binary mask."""
    s = np.fromstring(mask_rle, dtype=int, sep=' ')
    starts, lengths = s[0::2] - 1, s[1::2]
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

def rgb_to_onehot_to_gray(rgb_arr, color_map=id2color):
    num_classes = len(color_map)
    shape = rgb_arr.shape[:2] + (num_classes,)
    arr = np.zeros(shape, dtype=np.float32)

    for i, cls in enumerate(color_map):
        arr[:, :, i] = np.all(rgb_arr.reshape((-1, 3)) == color_map[i], axis=1).reshape(shape[:2])
    
    return arr.argmax(-1)

def get_slice_paths(middle_path, spacing):
    """Generate file paths for a 2.5D image stack."""
    base_dir = os.path.dirname(middle_path)
    middle_slice_index = int(GET_SLICE_NUM.search(middle_path).group().split('_')[-1])
    slice_paths = []
    for i in range(-1, 2):  # -1 to 1 for 3 slices
        slice_num = middle_slice_index + i * spacing
        new_path = middle_path.replace(f'slice_{middle_slice_index}', f'slice_{slice_num}')
        if os.path.exists(new_path):
            slice_paths.append(new_path)
        else:
            slice_paths.append(None)  # Handle missing slices if needed
    return slice_paths

def load_img_2p5d(middle_path, spacing=2):
    """Load and stack images for 2.5D processing."""
    slice_paths = get_slice_paths(middle_path, spacing)
    img_shape = tuple(map(int, IMG_SHAPE.search(middle_path).group()[1:-1].split('_')))[::-1]
    stack = np.zeros(img_shape + (3,), dtype=np.uint8)  # 3 channels for RGB
    for i, path in enumerate(slice_paths):
        if path and os.path.exists(path):
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE).astype(np.float32)
            img = (img - img.min()) / (img.max() - img.min()) * 255.0
            stack[..., i] = img.astype(np.uint8)
        else:
            stack[..., i] = np.zeros(img_shape, dtype=np.uint8)  # Filling missing slices with zeros
    return stack



# def load_img_2p5d(middle_path, spacing=2):
#     slice_paths = get_slice_paths(middle_path, spacing)
#     img_shape = tuple(map(int, IMG_SHAPE.search(middle_path).group()[1:-1].split('_')))[::-1]
#     stack = np.zeros(img_shape + (3,), dtype=np.uint8)  # 3 channels for RGB

#     print(f"Loading slices for: {middle_path}")
#     for i, path in enumerate(slice_paths):
#         if path and os.path.exists(path):
#             img = cv2.imread(path, cv2.IMREAD_GRAYSCALE).astype(np.float32)
#             img = (img - img.min()) / (img.max() - img.min()) * 255.0
#             stack[..., i] = img.astype(np.uint8)
#             print(f"Slice {i} stats - Min: {img.min()}, Max: {img.max()}")  # Debug print
#         else:
#             stack[..., i] = np.zeros(img_shape, dtype=np.uint8)  # Filling missing slices with zeros
#             print(f"Slice {i} missing, filled with zeros.")

#     # Debugging: Show unique values in each channel
#     for i in range(3):
#         unique_values = np.unique(stack[..., i])
#         print(f"Unique values in channel {i}: {unique_values[:10]}... (total {len(unique_values)} unique values)")
    
#     return stack


def create_and_write_img_msk_2p5d(file_paths, file_ids, save_img_dir, save_msk_dir, main_df, mask_rgb, depth, spacing):
    """Process and save images and masks from file paths for 2.5D images."""
    for file_path, file_id in tqdm(zip(file_paths, file_ids), total=len(file_ids)):
        image_stack = load_img_2p5d(file_path, spacing)
        img_df = main_df[main_df["id"] == file_id]
        img_shape = tuple(map(int, IMG_SHAPE.search(file_path).group()[1:-1].split('_')))[::-1]
        mask_image = np.zeros(img_shape + (3,), dtype=np.uint8)

        for i, class_name in enumerate(CLASSES):
            class_rows = img_df[img_df["class"] == class_name]
            if not class_rows.empty:
                rle = class_rows.iloc[0]['segmentation']
                mask_image[..., i] = rle_decode(rle, img_shape) * 255

        gray_mask = rgb_to_onehot_to_gray(mask_image)
        filename = GET_CASE_AND_DATE.search(file_path).group() + "_" + os.path.basename(file_path)
        
        cv2.imwrite(os.path.join(save_img_dir, filename), image_stack)
        cv2.imwrite(os.path.join(save_msk_dir, filename), gray_mask)

        if mask_rgb:
            color_mask_dir = f"{save_msk_dir}_rgb"
            os.makedirs(color_mask_dir, exist_ok=True)
            cv2.imwrite(os.path.join(color_mask_dir, filename), mask_image)

def main(csv_path, root_dir, mask_rgb=False, depth=3, spacing=2):
    np.random.seed(42)
    df = pd.read_csv(csv_path).dropna()
    ids = df['id'].unique()

    orig_img_dir = os.path.join(root_dir, "Backup", "train")
    case_folders = os.listdir(orig_img_dir)
    
    train_img_dir = os.path.join(root_dir, "train", "images")
    train_msk_dir = os.path.join(root_dir, "train", "masks")
    valid_img_dir = os.path.join(root_dir, "valid", "images")
    valid_msk_dir = os.path.join(root_dir, "valid", "masks")
    test_img_dir = os.path.join(root_dir, "test", "images")
    test_msk_dir = os.path.join(root_dir, "test", "masks")
    
    for path in [train_img_dir, train_msk_dir, valid_img_dir, valid_msk_dir, test_img_dir, test_msk_dir]:
        os.makedirs(path, exist_ok=True)

    # Assign train, validation, and test cases
    train_folders, temp_folders = train_test_split(case_folders, train_size=70/85, random_state=42)
    valid_folders, test_folders = train_test_split(temp_folders, train_size=10/15, random_state=42)

    # Process each set
    for folder_set, img_dir, msk_dir in [(train_folders, train_img_dir, train_msk_dir),
                                         (valid_folders, valid_img_dir, valid_msk_dir),
                                         (test_folders, test_img_dir, test_msk_dir)]:
        for folder in folder_set:
            folder_path = os.path.join(orig_img_dir, folder)
            imgs, img_ids = get_folder_files(folder_path, ids)
            if imgs:  # Check if there are images to process
                create_and_write_img_msk_2p5d(imgs, img_ids, img_dir, msk_dir, df, mask_rgb, depth, spacing)
            else:
                print(f"No images to process in {folder_path}")  # Debugging statement


if __name__ == "__main__":
    main(csv_path='/Users/probio/Downloads/Medical_seg/Backup/train.csv', root_dir='/Users/probio/Downloads/Medical_seg/', mask_rgb=True)

Searching in folder: /Users/probio/Downloads/Medical_seg/Backup/train/case18
Found 287 relevant images.


100%|██████████| 287/287 [00:04<00:00, 65.32it/s]


Searching in folder: /Users/probio/Downloads/Medical_seg/Backup/train/case55
Found 159 relevant images.


100%|██████████| 159/159 [00:01<00:00, 86.71it/s]


Searching in folder: /Users/probio/Downloads/Medical_seg/Backup/train/case34
Found 132 relevant images.


100%|██████████| 132/132 [00:01<00:00, 82.33it/s]


Searching in folder: /Users/probio/Downloads/Medical_seg/Backup/train/case7
Found 149 relevant images.


100%|██████████| 149/149 [00:01<00:00, 89.45it/s]


Searching in folder: /Users/probio/Downloads/Medical_seg/Backup/train/case19
Found 384 relevant images.


 52%|█████▏    | 199/384 [00:02<00:02, 66.85it/s]


KeyboardInterrupt: 