## Process ABUS UDG

Imports

In [1]:
import SimpleITK as sitk
import pandas as pd
from typing import Tuple, Dict, Any
from tqdm import tqdm
import numpy as np
import cv2
import os

Basic parameters

In [2]:
dataset_path = "datasets/ABUS_LesionSegmentation"
image_folder = "ImageData"
label_folder = "Anotation"

dataset_name = "abusudg_25"

output_folder = f"datasets/{dataset_name}_png"
slice_min_lesion_px = 25

use_classes = False  #If false, only one class is used (0)
val_frac = 0.2 # Validation fraction of the data

Load dataset info

In [4]:
def LoadAbusUDG(dataset_path, image_folder, label_folder):
    # Load dataset
    dataset_list = []
    annotations_items = os.listdir(os.path.join(dataset_path, label_folder))
    for item in os.listdir(os.path.join(dataset_path, image_folder)):
        case_id = item[:-4]
        view = item[-6:-4]
        data_path = os.path.join(image_folder, item)
        mask_filename = item + '_0.dcm' #Notice that there is more than 1 for some
        if mask_filename not in annotations_items: 
            #print(f"File {mask_filename} does not exists!")
            continue
        mask_path = os.path.join(label_folder, mask_filename)
        dataset_list.append({
            "case_id": case_id,
            "view": view,
            "data_path": data_path,
            "mask_path": mask_path
        })
    dataset = pd.DataFrame(dataset_list)
        
    print("Dataset columns:", dataset.columns)
    return dataset


In [6]:
# Load dataset
dataset = LoadAbusUDG(dataset_path, image_folder, label_folder)


# Get class index
if 'label' in dataset.columns:
    classes = dataset['label'].unique().tolist()
    print("Dataset classes:", classes)
else:
    print("No classes found!")
    classes = []

Dataset columns: Index(['case_id', 'view', 'data_path', 'mask_path'], dtype='object')
No classes found!


Define function for reading NRRD files

In [7]:
def ReadDCM(filename: str) -> Tuple[sitk.Image, Dict[str, Any]]:
    reader = sitk.ImageFileReader()
    reader.SetFileName(filename)
    reader.LoadPrivateTagsOn()
    reader.ReadImageInformation()

    image = reader.Execute()
    metadata = {}
    for key in reader.GetMetaDataKeys():
        if reader.HasMetaDataKey(key):
            metadata[key] = reader.GetMetaData(key)     
            
    return image, metadata

Define function for 8-bits normalization

In [8]:
def normalize_8bits(image: np.ndarray):
    return (255.0 *(image - image.min()) / (image.max() - image.min())).astype(np.uint8)

### Create 8-bit slices and masks

In [9]:
images_folder = os.path.join(output_folder, "images")
masks_folder = os.path.join(output_folder, "masks")

In [10]:
os.makedirs(masks_folder, exist_ok=True)
os.makedirs(images_folder, exist_ok=True)


# Create slice from data
for _, row in tqdm(dataset.iterrows(), total=len(dataset)):
    
    # Load data and GT
    id = row.case_id
    data, metadata = ReadDCM(os.path.join(dataset_path, row.data_path.replace('\\','/')))
    mask, _ = ReadDCM(os.path.join(dataset_path, row.mask_path.replace('\\','/')))
    
    # Check image and mask size and get numpy arrays
    assert data.GetSize() == mask.GetSize()
    data_array = sitk.GetArrayFromImage(data)
    mask_array = sitk.GetArrayFromImage(mask)
    image_size = data_array.shape[2], data_array.shape[1]
    

    # Chech the labels
    if len(classes) > 0:
        label = row.label
        assert label in classes
    else:
        label = 0
    
    # For each slice
    for idx in range(len(data_array)): #first dimension is z in numpy (z,y,x)
        data_slice = data_array[idx, ...]
        mask_slice = mask_array[idx, ...]
         
        # Use only slices with mask data
        if np.sum(mask_slice) == 0:
            continue
        
        # Binarize mask_Slice
        mask_slice = (mask_slice > 0)
        if np.sum(mask_slice) < slice_min_lesion_px:
            continue
        
        mask_slice = mask_slice.astype(np.uint8) * 255
        
        # Normalize to 8-bits
        data_slice = normalize_8bits(data_slice)
        
        
        contours, _ = cv2.findContours(mask_slice, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        contours_list = [contour.squeeze().tolist() for contour in contours]
        
        image_name = f"{id:0>3}_{idx+1:0>3}"
        
        # Save image
        image_out = os.path.join(images_folder, f"{image_name}_{label}.png")
        cv2.imwrite(image_out, data_slice)
    
        # Save mask
        image_out = os.path.join(masks_folder, f"{image_name}_{label}.png")
        cv2.imwrite(image_out,  mask_slice)


100%|██████████| 75/75 [06:43<00:00,  5.38s/it]


### Create YOLO data

Format YOLOv8 segement:
- https://docs.ultralytics.com/datasets/segment/
- `<class-index> <x1> <y1> <x2> <y2> ... <xn> <yn>`
- Others: https://docs.ultralytics.com/yolov5/tutorials/train_custom_data/#13-prepare-dataset-for-yolov5

Format YOLOv8 detect:
- https://docs.ultralytics.com/datasets/detect/
- `<object-class> <x> <y> <width> <height>`

In [11]:
segmentation_data_path = os.path.join(output_folder, f"yolo_seg_data")
detection_data_path = os.path.join(output_folder, f"yolo_det_data")

In [12]:
import shutil

# Create folders
os.makedirs(segmentation_data_path, exist_ok=True)
os.makedirs(detection_data_path, exist_ok=True)
        
# List of image cases
list_cases = os.listdir(images_folder)

# Get classes
if not use_classes or any(len(item.split('_')) != 3 for item in list_cases):
    use_classes = False
    print("Classes are not used")
else:
    classes = sorted(set([ item[:-4].split('_')[-1] for item in list_cases]))
    print(classes)
    
# For each image
for image in tqdm(list_cases):
    
    # Check image name format
    assert len(image[:-4].split("_")) == 3
    
    # Get class label
    if use_classes:
        label = image[:-4].split("_")[-1]
        label_index = classes.index(label)
    else:
        label_index = 0
    
    # Read mask
    mask = cv2.imread(os.path.join(masks_folder, image), cv2.IMREAD_GRAYSCALE)
    mask_size = mask.shape[1], mask.shape[0]
    
    # Get contours
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours_list = [contour.squeeze().tolist() for contour in contours]
    
    if len(contours_list) > 1:
        print(f"More than 1 coutour found for image {image}")

    # Copy image files
    shutil.copyfile(os.path.join(images_folder, image), os.path.join(detection_data_path, image))
    shutil.copyfile(os.path.join(images_folder, image), os.path.join(segmentation_data_path, image))

    # Oepn label files
    det_fp = open(os.path.join(detection_data_path, f"{image[:-4]}.txt"), "w")
    seg_fp = open(os.path.join(segmentation_data_path, f"{image[:-4]}.txt"), "w")
        
    # For each countour
    for contour in contours_list:
        
        # Get lesion countour data
        x, y = [point[0] for point in contour], [point[1] for point in contour] #TODO: use numpy
        x1, x2, y1, y2 = min(x), max(x), min(y), max(y)
        center_x, center_y = (x1+x2)/(2*mask_size[0]), (y1+y2)/(2*mask_size[1])
        width, height = (x2-x1)/mask_size[0], (y2-y1)/mask_size[1]
        
        # Check lesion size
        if width < 0.001 or height < 0.001:
            print(f"Small lesion found in image ({image})")
            continue
        
        # Write labels
        det_str = f"{center_x:0.6f} {center_y:0.6f} {width:0.6f} {height:0.6f}"
        det_fp.write(f"{label_index} {det_str}\n")   
        seg_str = " ".join([f"{point[0]/mask_size[0]:0.6f} {point[1]/mask_size[1]:0.6f}" for point in contour])
        seg_fp.write(f"{label_index} {seg_str}\n")
        
    # Close files
    det_fp.close()
    seg_fp.close()


Classes are not used


  1%|          | 14/1766 [00:00<00:12, 135.05it/s]

More than 1 coutour found for image t0200181801al_169_0.png
More than 1 coutour found for image t0200181401ll_138_0.png
More than 1 coutour found for image t0200179801mr_271_0.png
More than 1 coutour found for image t0200192701lr_229_0.png
More than 1 coutour found for image t0680000501ll_178_0.png
More than 1 coutour found for image t0200067801ll_145_0.png
More than 1 coutour found for image t0200177701lr_175_0.png


  2%|▏         | 40/1766 [00:00<00:08, 207.10it/s]

More than 1 coutour found for image t0600010001al_111_0.png
More than 1 coutour found for image t0200067801ll_169_0.png


  6%|▌         | 103/1766 [00:00<00:06, 272.11it/s]

More than 1 coutour found for image t0200177401ll_204_0.png
More than 1 coutour found for image t0200075501ar_219_0.png
More than 1 coutour found for image t0200179801sr_181_0.png
More than 1 coutour found for image t0600002901ll_108_0.png
More than 1 coutour found for image t0200183001ll_201_0.png
More than 1 coutour found for image t0200177501ll_129_0.png
More than 1 coutour found for image t0200177701lr_176_0.png


 10%|▉         | 171/1766 [00:00<00:05, 308.05it/s]

More than 1 coutour found for image t0200181801al_187_0.png
More than 1 coutour found for image t0200072701lr_114_0.png
More than 1 coutour found for image t0200181401al_168_0.png
More than 1 coutour found for image t0600041801ml_116_0.png
More than 1 coutour found for image t0600009901ar_202_0.png
More than 1 coutour found for image t0200177401ll_197_0.png


 13%|█▎        | 234/1766 [00:00<00:04, 309.53it/s]

More than 1 coutour found for image t0200177501ll_128_0.png
More than 1 coutour found for image t0200183001ll_179_0.png
More than 1 coutour found for image t0680000301ll_058_0.png
More than 1 coutour found for image t0200119801lr_193_0.png
More than 1 coutour found for image t0200072701lr_058_0.png
More than 1 coutour found for image t0200068101ll_229_0.png


 17%|█▋        | 307/1766 [00:01<00:04, 334.93it/s]

More than 1 coutour found for image t0200181401al_151_0.png
More than 1 coutour found for image t0200071401ar_101_0.png
More than 1 coutour found for image t0200072701lr_091_0.png
More than 1 coutour found for image t0650011001sr_152_0.png
More than 1 coutour found for image t0200177501ml_116_0.png
More than 1 coutour found for image t0600008201al_166_0.png
More than 1 coutour found for image t0200128301lr_078_0.png
More than 1 coutour found for image t0200119801lr_178_0.png
More than 1 coutour found for image t0200177501ml_094_0.png
More than 1 coutour found for image t0200008102ar_123_0.png
More than 1 coutour found for image t0200073701ar_111_0.png
More than 1 coutour found for image t0200119801lr_192_0.png
More than 1 coutour found for image t0200125801lr_216_0.png
More than 1 coutour found for image t0200181401ll_147_0.png
More than 1 coutour found for image t0200192101lr_205_0.png
More than 1 coutour found for image t0200073701mr_128_0.png


 23%|██▎       | 414/1766 [00:01<00:03, 345.21it/s]

More than 1 coutour found for image t0650009701ml_224_0.png
More than 1 coutour found for image t0200192701lr_203_0.png
More than 1 coutour found for image t0200177501al_122_0.png
More than 1 coutour found for image t0200067801ll_159_0.png
More than 1 coutour found for image t0200071401ar_097_0.png
More than 1 coutour found for image t0200178701ll_190_0.png
More than 1 coutour found for image t0200192701lr_204_0.png
More than 1 coutour found for image t0200178701ll_213_0.png
More than 1 coutour found for image t0200181801al_171_0.png
More than 1 coutour found for image t0200125801lr_218_0.png
More than 1 coutour found for image t0200177501al_123_0.png


 28%|██▊       | 486/1766 [00:01<00:03, 350.10it/s]

More than 1 coutour found for image t0200126401ar_066_0.png
More than 1 coutour found for image t0200125801lr_219_0.png
More than 1 coutour found for image t0200177701lr_184_0.png
More than 1 coutour found for image t0200069701al_163_0.png
More than 1 coutour found for image t0200069701al_170_0.png
More than 1 coutour found for image t0600008701al_143_0.png
More than 1 coutour found for image t0200183001al_187_0.png
More than 1 coutour found for image t0600007001al_188_0.png
More than 1 coutour found for image t0600008301lr_088_0.png
More than 1 coutour found for image t0200126401ar_063_0.png
More than 1 coutour found for image t0200068801ll_122_0.png
More than 1 coutour found for image t0200179801mr_282_0.png
More than 1 coutour found for image t0650004401ar_276_0.png


 32%|███▏      | 564/1766 [00:01<00:03, 366.01it/s]

More than 1 coutour found for image t0200181401ll_157_0.png
More than 1 coutour found for image t0200181401ll_141_0.png
More than 1 coutour found for image t0200177501ll_149_0.png
More than 1 coutour found for image t0650009701sl_180_0.png
More than 1 coutour found for image t0200183901lr_042_0.png
More than 1 coutour found for image t0600008701al_103_0.png
More than 1 coutour found for image t0200183901lr_054_0.png
More than 1 coutour found for image t0200182201lr_156_0.png
More than 1 coutour found for image t0200192701lr_202_0.png


 36%|███▌      | 637/1766 [00:01<00:03, 342.70it/s]

More than 1 coutour found for image t0200071401ar_099_0.png
More than 1 coutour found for image t0200177401al_185_0.png
More than 1 coutour found for image t0200181801al_190_0.png
More than 1 coutour found for image t0200183901lr_051_0.png
More than 1 coutour found for image t0200178701ll_192_0.png
More than 1 coutour found for image t0650004401ar_275_0.png
More than 1 coutour found for image t0200192701lr_218_0.png
More than 1 coutour found for image t0200177401al_179_0.png
More than 1 coutour found for image t0200119801lr_194_0.png
More than 1 coutour found for image t0200177701lr_185_0.png
More than 1 coutour found for image t0200178701ll_214_0.png


 40%|████      | 709/1766 [00:02<00:03, 338.07it/s]

More than 1 coutour found for image t0200072701lr_115_0.png
More than 1 coutour found for image t0200183901lr_053_0.png
More than 1 coutour found for image t0200177701lr_167_0.png
More than 1 coutour found for image t0200069701al_172_0.png
More than 1 coutour found for image t0200181801al_179_0.png
More than 1 coutour found for image t0600008301lr_087_0.png
More than 1 coutour found for image t0600041701ar_082_0.png
Small lesion found in image (t0600041701ar_082_0.png)
More than 1 coutour found for image t0200067801ll_143_0.png


 44%|████▍     | 780/1766 [00:02<00:02, 343.62it/s]

More than 1 coutour found for image t0200074001ar_081_0.png
More than 1 coutour found for image t0680003301lr_249_0.png
More than 1 coutour found for image t0600008701al_101_0.png
More than 1 coutour found for image t0200183901lr_052_0.png
More than 1 coutour found for image t0200177701lr_187_0.png
More than 1 coutour found for image t0600008301lr_057_0.png
More than 1 coutour found for image t0200131001lr_177_0.png
More than 1 coutour found for image t0600041601ar_132_0.png
More than 1 coutour found for image t0200067801ll_172_0.png


 48%|████▊     | 851/1766 [00:02<00:02, 344.56it/s]

More than 1 coutour found for image t0600006601ar_092_0.png
More than 1 coutour found for image t0200177501ll_151_0.png
More than 1 coutour found for image t0200191001lr_117_0.png
More than 1 coutour found for image t0650000701ll_217_0.png
More than 1 coutour found for image t0200203101ar_205_0.png
More than 1 coutour found for image t0200069701al_151_0.png
More than 1 coutour found for image t0600041601mr_152_0.png
More than 1 coutour found for image t0200128301lr_095_0.png
More than 1 coutour found for image t0200181401ml_120_0.png
More than 1 coutour found for image t0680003301lr_264_0.png
More than 1 coutour found for image t0200177501al_140_0.png
More than 1 coutour found for image t0200194101lr_194_0.png
More than 1 coutour found for image t0200067801ll_171_0.png


 52%|█████▏    | 921/1766 [00:02<00:02, 337.73it/s]

More than 1 coutour found for image t0600041701ar_081_0.png
More than 1 coutour found for image t0200119801lr_190_0.png
More than 1 coutour found for image t0600000101ll_156_0.png
More than 1 coutour found for image t0200177401al_167_0.png
More than 1 coutour found for image t0200179801sr_169_0.png
More than 1 coutour found for image t0600010001al_124_0.png


 56%|█████▌    | 989/1766 [00:03<00:02, 332.91it/s]

More than 1 coutour found for image t0200181401al_166_0.png
More than 1 coutour found for image t0600010001al_084_0.png
More than 1 coutour found for image t0600041601ar_143_0.png
More than 1 coutour found for image t0200069701al_178_0.png
More than 1 coutour found for image t0200192701lr_207_0.png
More than 1 coutour found for image t0200194101lr_171_0.png
More than 1 coutour found for image t0200181401ml_102_0.png
More than 1 coutour found for image t0200183001al_172_0.png
More than 1 coutour found for image t0200181401ll_139_0.png
More than 1 coutour found for image t0200183001al_188_0.png


 60%|█████▉    | 1056/1766 [00:03<00:02, 327.68it/s]

More than 1 coutour found for image t0200177401al_166_0.png
More than 1 coutour found for image t0200125801lr_201_0.png
More than 1 coutour found for image t0600008701al_102_0.png
More than 1 coutour found for image t0200192701lr_228_0.png
More than 1 coutour found for image t0200068801ll_114_0.png
More than 1 coutour found for image t0200183001ll_181_0.png
More than 1 coutour found for image t0200170701al_081_0.png
More than 1 coutour found for image t0200128301lr_102_0.png


 65%|██████▌   | 1154/1766 [00:03<00:01, 317.74it/s]

More than 1 coutour found for image t0200073701ar_088_0.png
More than 1 coutour found for image t0200128301lr_082_0.png
More than 1 coutour found for image t0200119801lr_179_0.png
More than 1 coutour found for image t0200072701lr_110_0.png
More than 1 coutour found for image t0200203101ar_219_0.png
More than 1 coutour found for image t0680000301ll_056_0.png
More than 1 coutour found for image t0650011001sr_153_0.png
More than 1 coutour found for image t0600010001al_120_0.png
More than 1 coutour found for image t0650011001sr_151_0.png
More than 1 coutour found for image t0200177401ll_184_0.png
More than 1 coutour found for image t0200192701lr_205_0.png


 67%|██████▋   | 1186/1766 [00:03<00:01, 299.84it/s]

More than 1 coutour found for image t0200177501ll_131_0.png
More than 1 coutour found for image t0200183001al_171_0.png
More than 1 coutour found for image t0200181401al_167_0.png
More than 1 coutour found for image t0200008102ar_130_0.png
More than 1 coutour found for image t0200072701lr_125_0.png
More than 1 coutour found for image t0200181801al_168_0.png
More than 1 coutour found for image t0200192701lr_201_0.png
More than 1 coutour found for image t0650009701sl_171_0.png
More than 1 coutour found for image t0680000301ll_081_0.png


 71%|███████   | 1246/1766 [00:03<00:01, 283.70it/s]

More than 1 coutour found for image t0200194101lr_188_0.png
More than 1 coutour found for image t0200194101lr_192_0.png
More than 1 coutour found for image t0680000301ll_080_0.png
More than 1 coutour found for image t0200191001lr_102_0.png
More than 1 coutour found for image t0200170701al_082_0.png
More than 1 coutour found for image t0600008301lr_085_0.png
More than 1 coutour found for image t0200182501ll_199_0.png
More than 1 coutour found for image t0200177701lr_186_0.png
More than 1 coutour found for image t0600009701ar_159_0.png


 77%|███████▋  | 1352/1766 [00:04<00:01, 324.04it/s]

More than 1 coutour found for image t0200008102ar_109_0.png
More than 1 coutour found for image t0200178701ll_218_0.png
More than 1 coutour found for image t0200178701ll_221_0.png
More than 1 coutour found for image t0650009701sl_178_0.png
More than 1 coutour found for image t0200177701lr_166_0.png
More than 1 coutour found for image t0600008201al_176_0.png
More than 1 coutour found for image t0200177501ll_150_0.png
More than 1 coutour found for image t0200069701al_173_0.png


 80%|████████  | 1417/1766 [00:04<00:01, 305.23it/s]

More than 1 coutour found for image t0200211801al_174_0.png
More than 1 coutour found for image t0200181801al_170_0.png
More than 1 coutour found for image t0200183901lr_055_0.png
More than 1 coutour found for image t0200183001ll_187_0.png
More than 1 coutour found for image t0600009701ar_177_0.png
More than 1 coutour found for image t0200128301lr_112_0.png
More than 1 coutour found for image t0650011001sr_168_0.png
More than 1 coutour found for image t0200125801lr_203_0.png
More than 1 coutour found for image t0650011001sr_150_0.png
More than 1 coutour found for image t0200181401ll_156_0.png
More than 1 coutour found for image t0600041601mr_150_0.png


 84%|████████▍ | 1480/1766 [00:04<00:00, 300.85it/s]

More than 1 coutour found for image t0600003401ml_029_0.png
More than 1 coutour found for image t0680003301lr_263_0.png
More than 1 coutour found for image t0200181401ml_108_0.png
More than 1 coutour found for image t0200067801ll_146_0.png
More than 1 coutour found for image t0200128301lr_099_0.png
More than 1 coutour found for image t0200128501ll_164_0.png
More than 1 coutour found for image t0200181401al_152_0.png
More than 1 coutour found for image t0200069701al_148_0.png
More than 1 coutour found for image t0600010001al_087_0.png
More than 1 coutour found for image t0200128301lr_106_0.png


 87%|████████▋ | 1541/1766 [00:04<00:00, 294.29it/s]

More than 1 coutour found for image t0200177701lr_177_0.png
More than 1 coutour found for image t0200181401ml_122_0.png
More than 1 coutour found for image t0200067801ll_144_0.png
More than 1 coutour found for image t0200181401al_169_0.png
More than 1 coutour found for image t0600007501al_202_0.png


 91%|█████████ | 1604/1766 [00:05<00:00, 301.62it/s]

More than 1 coutour found for image t0200119801lr_180_0.png
More than 1 coutour found for image t0600008201al_177_0.png
More than 1 coutour found for image t0200069701al_166_0.png
More than 1 coutour found for image t0200203101lr_209_0.png
More than 1 coutour found for image t0200179801sr_179_0.png
More than 1 coutour found for image t0600008201al_178_0.png
More than 1 coutour found for image t0200125801lr_217_0.png
More than 1 coutour found for image t0200181801al_167_0.png
More than 1 coutour found for image t0650009701ml_223_0.png
More than 1 coutour found for image t0200181401ml_105_0.png


 95%|█████████▍| 1669/1766 [00:05<00:00, 300.63it/s]

More than 1 coutour found for image t0200203101lr_188_0.png
More than 1 coutour found for image t0200191001lr_114_0.png
More than 1 coutour found for image t0200177701lr_168_0.png
More than 1 coutour found for image t0200181401ll_158_0.png
More than 1 coutour found for image t0200128301lr_113_0.png
More than 1 coutour found for image t0200008102ar_108_0.png
More than 1 coutour found for image t0680003301lr_246_0.png
More than 1 coutour found for image t0200183001ll_188_0.png
More than 1 coutour found for image t0600000101ll_157_0.png
More than 1 coutour found for image t0600003401ml_030_0.png
More than 1 coutour found for image t0200191001lr_101_0.png
More than 1 coutour found for image t0200177501ml_115_0.png
More than 1 coutour found for image t0600007001al_196_0.png


 98%|█████████▊| 1739/1766 [00:05<00:00, 322.50it/s]

More than 1 coutour found for image t0200177501al_141_0.png
More than 1 coutour found for image t0200183001ll_202_0.png
More than 1 coutour found for image t0200131401ll_264_0.png
More than 1 coutour found for image t0650009701sl_172_0.png
More than 1 coutour found for image t0200192101lr_206_0.png
More than 1 coutour found for image t0200178701ll_193_0.png
More than 1 coutour found for image t0200179801mr_269_0.png
More than 1 coutour found for image t0600008301lr_089_0.png
More than 1 coutour found for image t0200125801lr_202_0.png
More than 1 coutour found for image t0600001101al_209_0.png
More than 1 coutour found for image t0600009901ar_187_0.png
More than 1 coutour found for image t0600008701al_125_0.png


100%|██████████| 1766/1766 [00:05<00:00, 318.63it/s]

More than 1 coutour found for image t0200128301lr_103_0.png
More than 1 coutour found for image t0200178701ll_216_0.png
More than 1 coutour found for image t0200177401al_174_0.png
More than 1 coutour found for image t0200177401al_168_0.png





### Create YOLO Train/Val split data

In [13]:
# Get patients list
list_items = [item[:-4] for item in os.listdir(detection_data_path) if item.endswith(".png")]
patients = list(set([ item.split("_")[0] for item in list_items]))

# Get lesion type per patient
if use_classes:
    patients_type = {}     
    for item in list_items:
        patient = item.split("_")[0] 
        label = item.split("_")[-1]
        if label not in patients_type:
            patients_type[label] = [patient]
        elif patient not in patients_type[label]:
            patients_type[label].append(patient)
else:
    patients_type = {0: patients}
    
print({k: len(v) for k, v in patients_type.items()})
    
# Distribute patients in Train/Val using val_frac with balanced lesion types
train_p = []
val_p = []
for k in patients_type.keys():
    num_train = int(len(patients_type[k])*(1-val_frac))
    train_type_p, val_type_p = patients_type[k][:num_train], patients_type[k][num_train:]
    train_p += train_type_p
    val_p += val_type_p
    
print(train_p, len(train_p))
print(val_p, len(val_p))
    
# Get distributed patient images for train and val
train = [ f"{item}.png" for item in list_items if item.split("_")[0] in train_p]
val = [ f"{item}.png" for item in list_items if item.split("_")[0] in val_p]

import random
random.seed(0)
random.shuffle(train)
random.shuffle(val)

# Generate train.txt file
train_det = os.path.join(output_folder, f"train_det.txt" ) 
with open(train_det, "w") as fp:
    fp.writelines([os.path.join(detection_data_path, t) + '\n' for t in train])
    
train_seg = os.path.join(output_folder, f"train_seg.txt" ) 
with open(train_seg, "w") as fp:
    fp.writelines([os.path.join(segmentation_data_path, t) + '\n' for t in train])
    
# Generate val.txt file
val_det = os.path.join(output_folder, f"val_det.txt")
with open(val_det, "w") as fp:
    fp.writelines([os.path.join(detection_data_path, v) + '\n' for v in val])
    
val_seg = os.path.join(output_folder, f"val_seg.txt")
with open(val_seg, "w") as fp:
    fp.writelines([os.path.join(segmentation_data_path, v) + '\n' for v in val])
    

{0: 75}
['t0200170701al', 't0200179801mr', 't0200073701ar', 't0200181401al', 't0200181401ml', 't0650001601mr', 't0600041601ar', 't0600007201sr', 't0200184502al', 't0200068101ll', 't0200177501ll', 't0600000101ll', 't0650009701ml', 't0650011001sr', 't0600007501al', 't0200192101lr', 't0200203101lr', 't0200183001ll', 't0600001101al', 't0600001501al', 't0600008301lr', 't0680003301lr', 't0600008201al', 't0200071401ar', 't0200177401al', 't0200125801lr', 't0600008701al', 't0200131401ll', 't0600009901ar', 't0200183001al', 't0680000501ll', 't0200194101lr', 't0200068801ll', 't0200182201lr', 't0200179801sr', 't0200177501ml', 't0650004401ar', 't0600003401ml', 't0600006601ar', 't0600041701ar', 't0200072701lr', 't0200211801al', 't0200075501ar', 't0600010001al', 't0200178701ll', 't0200181801al', 't0200074001ar', 't0600009701ar', 't0200183901lr', 't0650009701sl', 't0200128301lr', 't0200008102ar', 't0200067801ll', 't0200119801lr', 't0600002901ll', 't0600041601mr', 't0200128501ll', 't0600006001ll', 't020