Imports

In [1]:
import SimpleITK as sitk
import pandas as pd
from typing import Tuple, Dict, Any
from tqdm import tqdm
import numpy as np
import cv2
import os

Basic parameters

In [2]:
dataset_path = "datasets/Train"
label_file = "labels.csv"
dataset_name = "abus23_25"

output_folder = f"datasets/{dataset_name}_png"
slice_min_lesion_px = 25

use_classes = False  #If false, only one class is used (0)
val_frac = 0.2 # Validation fraction of the data

Load dataset info

In [3]:

def LoadAbus23(dataset_path, label_file):
    dataset = pd.read_csv(os.path.join(dataset_path, label_file))
    print("Dataset columns:", dataset.columns)
    return dataset

In [4]:
# Load dataset
dataset = LoadAbus23(dataset_path, label_file)


# Get class index
if 'label' in dataset.columns:
    classes = dataset['label'].unique().tolist()
    print("Dataset classes:", classes)
else:
    print("No classes found!")
    classes = []

Dataset columns: Index(['case_id', 'label', 'data_path', 'mask_path'], dtype='object')
Dataset classes: ['M', 'B']


Define function for reading NRRD files

In [5]:
def ReadDCM(filename: str) -> Tuple[sitk.Image, Dict[str, Any]]:
    reader = sitk.ImageFileReader()
    reader.SetFileName(filename)
    reader.LoadPrivateTagsOn()
    reader.ReadImageInformation()

    image = reader.Execute()
    metadata = {}
    for key in reader.GetMetaDataKeys():
        if reader.HasMetaDataKey(key):
            metadata[key] = reader.GetMetaData(key)     
            
    return image, metadata

Define function for 8-bits normalization

In [6]:
def normalize_8bits(image: np.ndarray):
    return (255.0 *(image - image.min()) / (image.max() - image.min())).astype(np.uint8)

### Create 8-bit slices and masks

In [7]:
images_folder = os.path.join(output_folder, "images")
masks_folder = os.path.join(output_folder, "masks")

In [8]:
os.makedirs(masks_folder, exist_ok=True)
os.makedirs(images_folder, exist_ok=True)


# Create slice from data
for _, row in tqdm(dataset.iterrows(), total=len(dataset)):
    
    # Load data and GT
    id = row.case_id
    data, metadata = ReadDCM(os.path.join(dataset_path, row.data_path.replace('\\','/')))
    mask, _ = ReadDCM(os.path.join(dataset_path, row.mask_path.replace('\\','/')))
    
    # Check image and mask size and get numpy arrays
    assert data.GetSize() == mask.GetSize()
    data_array = sitk.GetArrayFromImage(data)
    mask_array = sitk.GetArrayFromImage(mask)
    image_size = data_array.shape[2], data_array.shape[1]
    

    # Chech the labels
    if len(classes) > 0:
        label = row.label
        assert label in classes
    else:
        label = 0
    
    # For each slice
    for idx in range(len(data_array)): #first dimension is z in numpy (z,y,x)
        data_slice = data_array[idx, ...]
        mask_slice = mask_array[idx, ...]
         
        # Use only slices with mask data
        if np.sum(mask_slice) == 0:
            continue
        
        # Binarize mask_Slice
        mask_slice = (mask_slice > 0)
        if np.sum(mask_slice) < slice_min_lesion_px:
            continue
        
        mask_slice = mask_slice.astype(np.uint8) * 255
        
        # Normalize to 8-bits
        data_slice = normalize_8bits(data_slice)
        
        
        contours, _ = cv2.findContours(mask_slice, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        contours_list = [contour.squeeze().tolist() for contour in contours]
        
        image_name = f"{id:0>3}_{idx+1:0>3}"
        
        # Save image
        image_out = os.path.join(images_folder, f"{image_name}_{label}.png")
        cv2.imwrite(image_out, data_slice)
    
        # Save mask
        image_out = os.path.join(masks_folder, f"{image_name}_{label}.png")
        cv2.imwrite(image_out,  mask_slice)


100%|██████████| 100/100 [05:23<00:00,  3.23s/it]


### Create YOLO data

Format YOLOv8 segement:
- https://docs.ultralytics.com/datasets/segment/
- `<class-index> <x1> <y1> <x2> <y2> ... <xn> <yn>`
- Others: https://docs.ultralytics.com/yolov5/tutorials/train_custom_data/#13-prepare-dataset-for-yolov5

Format YOLOv8 detect:
- https://docs.ultralytics.com/datasets/detect/
- `<object-class> <x> <y> <width> <height>`

In [9]:
segmentation_data_path = os.path.join(output_folder, f"yolo_seg_data")
detection_data_path = os.path.join(output_folder, f"yolo_det_data")

In [10]:
import shutil

# Create folders
os.makedirs(segmentation_data_path, exist_ok=True)
os.makedirs(detection_data_path, exist_ok=True)
        
# List of image cases
list_cases = os.listdir(images_folder)

# Get classes
if not use_classes or any(len(item.split('_')) != 3 for item in list_cases):
    use_classes = False
    print("Classes are not used")
else:
    classes = sorted(set([ item[:-4].split('_')[-1] for item in list_cases]))
    print(classes)
    
# For each image
for image in tqdm(list_cases):
    
    # Check image name format
    assert len(image[:-4].split("_")) == 3
    
    # Get class label
    if use_classes:
        label = image[:-4].split("_")[-1]
        label_index = classes.index(label)
    else:
        label_index = 0
    
    # Read mask
    mask = cv2.imread(os.path.join(masks_folder, image), cv2.IMREAD_GRAYSCALE)
    mask_size = mask.shape[1], mask.shape[0]
    
    # Get contours
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours_list = [contour.squeeze().tolist() for contour in contours]
    
    if len(contours_list) > 1:
        print(f"More than 1 coutour found for image {image}")

    # Copy image files
    shutil.copyfile(os.path.join(images_folder, image), os.path.join(detection_data_path, image))
    shutil.copyfile(os.path.join(images_folder, image), os.path.join(segmentation_data_path, image))

    # Oepn label files
    det_fp = open(os.path.join(detection_data_path, f"{image[:-4]}.txt"), "w")
    seg_fp = open(os.path.join(segmentation_data_path, f"{image[:-4]}.txt"), "w")
        
    # For each countour
    for contour in contours_list:
        
        # Get lesion countour data
        x, y = [point[0] for point in contour], [point[1] for point in contour] #TODO: use numpy
        x1, x2, y1, y2 = min(x), max(x), min(y), max(y)
        center_x, center_y = (x1+x2)/(2*mask_size[0]), (y1+y2)/(2*mask_size[1])
        width, height = (x2-x1)/mask_size[0], (y2-y1)/mask_size[1]
        
        # Check lesion size
        if width < 0.001 or height < 0.001:
            print(f"Small lesion found in image ({image})")
            continue
        
        # Write labels
        det_str = f"{center_x:0.6f} {center_y:0.6f} {width:0.6f} {height:0.6f}"
        det_fp.write(f"{label_index} {det_str}\n")   
        seg_str = " ".join([f"{point[0]/mask_size[0]:0.6f} {point[1]/mask_size[1]:0.6f}" for point in contour])
        seg_fp.write(f"{label_index} {seg_str}\n")
        
    # Close files
    det_fp.close()
    seg_fp.close()


Classes are not used


  0%|          | 11/3170 [00:00<00:30, 105.18it/s]

More than 1 coutour found for image 094_248_M.png
More than 1 coutour found for image 093_193_M.png


  1%|          | 30/3170 [00:00<00:20, 150.34it/s]

More than 1 coutour found for image 007_274_B.png
More than 1 coutour found for image 084_215_B.png


  4%|▎         | 117/3170 [00:00<00:15, 202.71it/s]

More than 1 coutour found for image 020_337_M.png
More than 1 coutour found for image 036_265_M.png
More than 1 coutour found for image 093_196_M.png
More than 1 coutour found for image 026_177_B.png
More than 1 coutour found for image 084_211_B.png
More than 1 coutour found for image 087_226_M.png
More than 1 coutour found for image 068_199_M.png
More than 1 coutour found for image 087_225_M.png
More than 1 coutour found for image 024_145_M.png


  6%|▌         | 181/3170 [00:00<00:14, 206.42it/s]

More than 1 coutour found for image 019_241_M.png
More than 1 coutour found for image 066_234_M.png
More than 1 coutour found for image 019_200_M.png
More than 1 coutour found for image 012_193_M.png
More than 1 coutour found for image 022_275_M.png


  7%|▋         | 223/3170 [00:01<00:14, 205.71it/s]

More than 1 coutour found for image 084_212_B.png
More than 1 coutour found for image 022_301_M.png
More than 1 coutour found for image 022_265_M.png
More than 1 coutour found for image 084_221_B.png
More than 1 coutour found for image 022_300_M.png


  8%|▊         | 264/3170 [00:01<00:14, 195.53it/s]

More than 1 coutour found for image 012_192_M.png
More than 1 coutour found for image 077_237_M.png
More than 1 coutour found for image 046_158_B.png


 10%|▉         | 305/3170 [00:01<00:14, 199.90it/s]

More than 1 coutour found for image 074_158_B.png
More than 1 coutour found for image 047_160_M.png
More than 1 coutour found for image 059_233_M.png


 11%|█         | 348/3170 [00:01<00:14, 200.29it/s]

More than 1 coutour found for image 046_146_B.png
More than 1 coutour found for image 026_178_B.png


 13%|█▎        | 412/3170 [00:02<00:13, 203.07it/s]

More than 1 coutour found for image 068_200_M.png
More than 1 coutour found for image 046_155_B.png


 14%|█▍        | 453/3170 [00:02<00:14, 187.54it/s]

More than 1 coutour found for image 012_190_M.png
More than 1 coutour found for image 007_270_B.png


 16%|█▌        | 492/3170 [00:02<00:15, 178.44it/s]

More than 1 coutour found for image 046_175_B.png
More than 1 coutour found for image 024_164_M.png
More than 1 coutour found for image 068_178_M.png
More than 1 coutour found for image 057_199_B.png


 18%|█▊        | 565/3170 [00:02<00:15, 171.68it/s]

More than 1 coutour found for image 070_155_M.png
More than 1 coutour found for image 079_138_B.png
More than 1 coutour found for image 029_160_M.png
More than 1 coutour found for image 016_190_M.png
More than 1 coutour found for image 019_199_M.png


 20%|█▉        | 622/3170 [00:03<00:13, 182.35it/s]

More than 1 coutour found for image 066_209_M.png
More than 1 coutour found for image 034_250_M.png
More than 1 coutour found for image 055_252_B.png
More than 1 coutour found for image 059_234_M.png
More than 1 coutour found for image 084_209_B.png
More than 1 coutour found for image 012_251_M.png


 21%|██        | 664/3170 [00:03<00:13, 192.34it/s]

More than 1 coutour found for image 012_189_M.png
More than 1 coutour found for image 026_161_B.png
More than 1 coutour found for image 039_196_B.png


 22%|██▏       | 684/3170 [00:03<00:13, 189.86it/s]

More than 1 coutour found for image 012_201_M.png
More than 1 coutour found for image 022_310_M.png


 23%|██▎       | 740/3170 [00:03<00:14, 167.31it/s]

More than 1 coutour found for image 094_223_M.png
More than 1 coutour found for image 079_136_B.png
More than 1 coutour found for image 084_216_B.png
More than 1 coutour found for image 056_220_M.png
More than 1 coutour found for image 077_235_M.png
More than 1 coutour found for image 093_200_M.png


 25%|██▍       | 779/3170 [00:04<00:13, 177.59it/s]

More than 1 coutour found for image 074_166_B.png
More than 1 coutour found for image 055_255_B.png
More than 1 coutour found for image 070_156_M.png


 27%|██▋       | 842/3170 [00:04<00:12, 193.72it/s]

More than 1 coutour found for image 084_210_B.png
More than 1 coutour found for image 046_152_B.png
More than 1 coutour found for image 068_174_M.png


 28%|██▊       | 883/3170 [00:04<00:11, 197.08it/s]

More than 1 coutour found for image 039_197_B.png
More than 1 coutour found for image 066_235_M.png
More than 1 coutour found for image 093_199_M.png
More than 1 coutour found for image 059_214_M.png
More than 1 coutour found for image 006_175_M.png


 29%|██▉       | 924/3170 [00:04<00:11, 198.61it/s]

More than 1 coutour found for image 074_157_B.png


 31%|███       | 982/3170 [00:05<00:12, 170.89it/s]

More than 1 coutour found for image 068_198_M.png
More than 1 coutour found for image 021_261_M.png
More than 1 coutour found for image 007_271_B.png
More than 1 coutour found for image 021_262_M.png
More than 1 coutour found for image 012_191_M.png
More than 1 coutour found for image 066_236_M.png


 32%|███▏      | 1018/3170 [00:05<00:12, 166.85it/s]

More than 1 coutour found for image 094_246_M.png
More than 1 coutour found for image 026_176_B.png
More than 1 coutour found for image 046_177_B.png


 34%|███▍      | 1071/3170 [00:05<00:12, 165.16it/s]

More than 1 coutour found for image 055_254_B.png
More than 1 coutour found for image 076_221_M.png
More than 1 coutour found for image 022_269_M.png
More than 1 coutour found for image 037_245_M.png


 35%|███▍      | 1106/3170 [00:06<00:12, 165.89it/s]

More than 1 coutour found for image 084_213_B.png
More than 1 coutour found for image 079_140_B.png


 37%|███▋      | 1158/3170 [00:06<00:11, 168.13it/s]

More than 1 coutour found for image 099_294_M.png
More than 1 coutour found for image 036_266_M.png


 38%|███▊      | 1193/3170 [00:06<00:12, 152.47it/s]

More than 1 coutour found for image 093_198_M.png


 39%|███▉      | 1249/3170 [00:06<00:11, 172.00it/s]

More than 1 coutour found for image 000_262_M.png
More than 1 coutour found for image 007_273_B.png
More than 1 coutour found for image 066_240_M.png
More than 1 coutour found for image 087_229_M.png
More than 1 coutour found for image 072_237_M.png
More than 1 coutour found for image 093_194_M.png


 42%|████▏     | 1326/3170 [00:07<00:10, 179.34it/s]

More than 1 coutour found for image 022_274_M.png
More than 1 coutour found for image 006_172_M.png
More than 1 coutour found for image 034_254_M.png


 43%|████▎     | 1368/3170 [00:07<00:09, 192.35it/s]

More than 1 coutour found for image 097_233_B.png
More than 1 coutour found for image 066_244_M.png
More than 1 coutour found for image 012_188_M.png
More than 1 coutour found for image 012_199_M.png


 44%|████▍     | 1409/3170 [00:07<00:09, 193.94it/s]

More than 1 coutour found for image 056_222_M.png
More than 1 coutour found for image 059_333_M.png
More than 1 coutour found for image 026_179_B.png
More than 1 coutour found for image 036_268_M.png
More than 1 coutour found for image 012_204_M.png
More than 1 coutour found for image 046_173_B.png
More than 1 coutour found for image 066_210_M.png


 46%|████▋     | 1470/3170 [00:08<00:08, 199.46it/s]

More than 1 coutour found for image 084_214_B.png
More than 1 coutour found for image 088_179_M.png
More than 1 coutour found for image 093_195_M.png
More than 1 coutour found for image 012_200_M.png


 48%|████▊     | 1512/3170 [00:08<00:08, 203.76it/s]

More than 1 coutour found for image 046_157_B.png
More than 1 coutour found for image 072_236_M.png
More than 1 coutour found for image 059_217_M.png
More than 1 coutour found for image 059_337_M.png


 49%|████▉     | 1553/3170 [00:08<00:08, 191.54it/s]

More than 1 coutour found for image 093_231_M.png
More than 1 coutour found for image 008_202_M.png
More than 1 coutour found for image 059_336_M.png
More than 1 coutour found for image 084_223_B.png
More than 1 coutour found for image 076_222_M.png
More than 1 coutour found for image 022_267_M.png
More than 1 coutour found for image 034_249_M.png


 51%|█████     | 1614/3170 [00:08<00:07, 195.21it/s]

More than 1 coutour found for image 024_163_M.png
More than 1 coutour found for image 094_249_M.png


 53%|█████▎    | 1675/3170 [00:09<00:07, 190.77it/s]

More than 1 coutour found for image 046_156_B.png
More than 1 coutour found for image 068_179_M.png
More than 1 coutour found for image 061_177_B.png
More than 1 coutour found for image 059_232_M.png
More than 1 coutour found for image 076_223_M.png


 55%|█████▍    | 1738/3170 [00:09<00:07, 199.79it/s]

More than 1 coutour found for image 024_158_M.png
More than 1 coutour found for image 072_235_M.png
More than 1 coutour found for image 019_201_M.png
More than 1 coutour found for image 066_242_M.png
More than 1 coutour found for image 034_252_M.png


 56%|█████▌    | 1780/3170 [00:09<00:07, 198.55it/s]

More than 1 coutour found for image 070_154_M.png
More than 1 coutour found for image 068_177_M.png
More than 1 coutour found for image 022_268_M.png
More than 1 coutour found for image 059_203_M.png
More than 1 coutour found for image 093_197_M.png


 57%|█████▋    | 1821/3170 [00:09<00:06, 197.12it/s]

More than 1 coutour found for image 012_246_M.png
More than 1 coutour found for image 017_213_B.png
More than 1 coutour found for image 007_272_B.png
More than 1 coutour found for image 022_264_M.png
More than 1 coutour found for image 012_203_M.png


 59%|█████▉    | 1881/3170 [00:10<00:06, 192.92it/s]

More than 1 coutour found for image 020_312_M.png
More than 1 coutour found for image 077_238_M.png
More than 1 coutour found for image 012_197_M.png


 61%|██████    | 1923/3170 [00:10<00:06, 199.72it/s]

More than 1 coutour found for image 088_177_M.png
More than 1 coutour found for image 036_263_M.png
More than 1 coutour found for image 029_156_M.png
More than 1 coutour found for image 066_243_M.png


 63%|██████▎   | 1986/3170 [00:10<00:05, 201.45it/s]

More than 1 coutour found for image 059_213_M.png
More than 1 coutour found for image 022_270_M.png
More than 1 coutour found for image 036_264_M.png
More than 1 coutour found for image 045_158_M.png


 64%|██████▍   | 2029/3170 [00:10<00:05, 205.01it/s]

More than 1 coutour found for image 029_157_M.png
More than 1 coutour found for image 061_159_B.png
More than 1 coutour found for image 056_221_M.png


 65%|██████▌   | 2071/3170 [00:11<00:05, 198.37it/s]

More than 1 coutour found for image 000_265_M.png
More than 1 coutour found for image 046_150_B.png
More than 1 coutour found for image 059_210_M.png
More than 1 coutour found for image 070_153_M.png
More than 1 coutour found for image 012_194_M.png


 67%|██████▋   | 2133/3170 [00:11<00:05, 200.34it/s]

More than 1 coutour found for image 045_159_M.png
More than 1 coutour found for image 029_158_M.png
More than 1 coutour found for image 094_247_M.png


 69%|██████▊   | 2174/3170 [00:11<00:05, 188.76it/s]

More than 1 coutour found for image 088_178_M.png
More than 1 coutour found for image 098_230_M.png
More than 1 coutour found for image 022_309_M.png
More than 1 coutour found for image 019_197_M.png


 70%|██████▉   | 2212/3170 [00:11<00:05, 177.83it/s]

More than 1 coutour found for image 093_201_M.png
More than 1 coutour found for image 020_311_M.png
More than 1 coutour found for image 066_239_M.png
More than 1 coutour found for image 059_211_M.png


 71%|███████   | 2248/3170 [00:12<00:05, 173.06it/s]

More than 1 coutour found for image 034_251_M.png


 73%|███████▎  | 2302/3170 [00:12<00:05, 163.04it/s]

More than 1 coutour found for image 046_174_B.png
More than 1 coutour found for image 012_196_M.png
More than 1 coutour found for image 087_243_M.png
More than 1 coutour found for image 035_226_M.png


 75%|███████▍  | 2364/3170 [00:12<00:04, 188.84it/s]

More than 1 coutour found for image 077_236_M.png
More than 1 coutour found for image 012_195_M.png
More than 1 coutour found for image 017_215_B.png


 77%|███████▋  | 2428/3170 [00:13<00:03, 201.99it/s]

More than 1 coutour found for image 017_214_B.png
More than 1 coutour found for image 097_232_B.png
More than 1 coutour found for image 003_299_M.png
More than 1 coutour found for image 085_229_M.png


 78%|███████▊  | 2469/3170 [00:13<00:03, 197.46it/s]

More than 1 coutour found for image 035_225_M.png
More than 1 coutour found for image 022_276_M.png
More than 1 coutour found for image 098_229_M.png
More than 1 coutour found for image 093_233_M.png


 80%|███████▉  | 2529/3170 [00:13<00:03, 196.74it/s]

More than 1 coutour found for image 055_253_B.png
More than 1 coutour found for image 032_162_M.png
More than 1 coutour found for image 026_167_B.png
More than 1 coutour found for image 022_271_M.png


 81%|████████  | 2571/3170 [00:13<00:02, 201.94it/s]

More than 1 coutour found for image 026_180_B.png
More than 1 coutour found for image 000_263_M.png
More than 1 coutour found for image 022_266_M.png
More than 1 coutour found for image 093_192_M.png


 82%|████████▏ | 2613/3170 [00:13<00:02, 200.57it/s]

More than 1 coutour found for image 022_314_M.png
More than 1 coutour found for image 059_335_M.png
More than 1 coutour found for image 022_263_M.png
More than 1 coutour found for image 070_157_M.png


 84%|████████▍ | 2673/3170 [00:14<00:02, 185.37it/s]

More than 1 coutour found for image 084_222_B.png
More than 1 coutour found for image 070_158_M.png
More than 1 coutour found for image 029_159_M.png


 86%|████████▋ | 2735/3170 [00:14<00:02, 195.46it/s]

More than 1 coutour found for image 066_241_M.png
More than 1 coutour found for image 019_196_M.png
More than 1 coutour found for image 046_149_B.png


 88%|████████▊ | 2777/3170 [00:14<00:01, 201.15it/s]

More than 1 coutour found for image 059_338_M.png
More than 1 coutour found for image 045_160_M.png
More than 1 coutour found for image 099_295_M.png
More than 1 coutour found for image 006_171_M.png
More than 1 coutour found for image 022_302_M.png
More than 1 coutour found for image 076_224_M.png


 89%|████████▉ | 2820/3170 [00:15<00:01, 198.91it/s]

More than 1 coutour found for image 046_153_B.png
More than 1 coutour found for image 059_212_M.png
More than 1 coutour found for image 059_206_M.png


 91%|█████████ | 2881/3170 [00:15<00:01, 197.85it/s]

More than 1 coutour found for image 036_267_M.png


 92%|█████████▏| 2922/3170 [00:15<00:01, 200.13it/s]

More than 1 coutour found for image 034_253_M.png
More than 1 coutour found for image 059_215_M.png
More than 1 coutour found for image 079_139_B.png


 95%|█████████▍| 3008/3170 [00:15<00:00, 204.19it/s]

More than 1 coutour found for image 072_218_M.png
More than 1 coutour found for image 000_264_M.png
More than 1 coutour found for image 012_198_M.png


 96%|█████████▌| 3050/3170 [00:16<00:00, 201.29it/s]

More than 1 coutour found for image 077_243_M.png
More than 1 coutour found for image 000_261_M.png
More than 1 coutour found for image 020_313_M.png


 98%|█████████▊| 3091/3170 [00:16<00:00, 189.89it/s]

More than 1 coutour found for image 046_176_B.png
More than 1 coutour found for image 022_260_M.png
More than 1 coutour found for image 079_137_B.png


 99%|█████████▊| 3130/3170 [00:16<00:00, 180.74it/s]

More than 1 coutour found for image 057_198_B.png
More than 1 coutour found for image 093_232_M.png
More than 1 coutour found for image 019_202_M.png
More than 1 coutour found for image 094_244_M.png
More than 1 coutour found for image 022_311_M.png
More than 1 coutour found for image 012_202_M.png


100%|██████████| 3170/3170 [00:16<00:00, 188.00it/s]

More than 1 coutour found for image 017_212_B.png
More than 1 coutour found for image 089_182_M.png





### Create YOLO Train/Val split data

In [11]:
# Get patients list
list_items = [item[:-4] for item in os.listdir(detection_data_path) if item.endswith(".png")]
patients = list(set([ item.split("_")[0] for item in list_items]))

# Get lesion type per patient
if use_classes:
    patients_type = {}     
    for item in list_items:
        patient = item.split("_")[0] 
        label = item.split("_")[-1]
        if label not in patients_type:
            patients_type[label] = [patient]
        elif patient not in patients_type[label]:
            patients_type[label].append(patient)
else:
    patients_type = {0: patients}
    
print({k: len(v) for k, v in patients_type.items()})
    
# Distribute patients in Train/Val using val_frac with balanced lesion types
train_p = []
val_p = []
for k in patients_type.keys():
    num_train = int(len(patients_type[k])*(1-val_frac))
    train_type_p, val_type_p = patients_type[k][:num_train], patients_type[k][num_train:]
    train_p += train_type_p
    val_p += val_type_p
    
print(train_p, len(train_p))
print(val_p, len(val_p))
    
# Get distributed patient images for train and val
train = [ f"{item}.png" for item in list_items if item.split("_")[0] in train_p]
val = [ f"{item}.png" for item in list_items if item.split("_")[0] in val_p]

import random
random.seed(0)
random.shuffle(train)
random.shuffle(val)

# Generate train.txt file
train_det = os.path.join(output_folder, f"train_det.txt" ) 
with open(train_det, "w") as fp:
    fp.writelines([os.path.join(detection_data_path, t) + '\n' for t in train])
    
train_seg = os.path.join(output_folder, f"train_seg.txt" ) 
with open(train_seg, "w") as fp:
    fp.writelines([os.path.join(segmentation_data_path, t) + '\n' for t in train])
    
# Generate val.txt file
val_det = os.path.join(output_folder, f"val_det.txt")
with open(val_det, "w") as fp:
    fp.writelines([os.path.join(detection_data_path, v) + '\n' for v in val])
    
val_seg = os.path.join(output_folder, f"val_seg.txt")
with open(val_seg, "w") as fp:
    fp.writelines([os.path.join(segmentation_data_path, v) + '\n' for v in val])
    

{0: 100}
['064', '040', '081', '077', '055', '093', '054', '033', '083', '018', '051', '068', '036', '084', '065', '030', '058', '000', '004', '037', '002', '060', '046', '078', '088', '057', '052', '034', '008', '003', '095', '011', '020', '023', '049', '059', '076', '098', '028', '086', '048', '006', '045', '026', '001', '073', '017', '056', '024', '032', '085', '019', '094', '005', '069', '071', '063', '035', '042', '043', '029', '047', '013', '079', '041', '027', '015', '012', '021', '070', '050', '097', '096', '092', '072', '016', '091', '082', '044', '038'] 80
['022', '061', '066', '080', '039', '014', '009', '075', '099', '074', '007', '031', '089', '053', '062', '025', '067', '090', '010', '087'] 20
