In [12]:
import cv2
import numpy as np
from pathlib import Path
import random
import os
from shutil import copy2

def resize_and_save_image(image_path, dest_path, scale_dims=(640, 640)):
    """
    Resize an image and save it to the destination path.
    """
    image = cv2.imread(str(image_path))
    image_resized = cv2.resize(image, scale_dims)
    cv2.imwrite(str(dest_path), image_resized)

def find_significant_points(contour, angle_threshold=30):
    """
    Simplify a contour to include points with significant changes in direction.
    """
    def angle_between(p1, p2, p3):
        """
        Calculate the angle between three points.
        """
        vector1 = p2 - p1
        vector2 = p3 - p2
        unit_vector1 = vector1 / np.linalg.norm(vector1)
        unit_vector2 = vector2 / np.linalg.norm(vector2)
        dot_product = np.dot(unit_vector1, unit_vector2)
        angle = np.arccos(dot_product)
        return np.degrees(angle)

    significant_points = [contour[0]]
    for i in range(1, len(contour) - 1):
        angle = angle_between(contour[i - 1][0], contour[i][0], contour[i + 1][0])
        if angle > angle_threshold:
            significant_points.append(contour[i])
    significant_points.append(contour[-1])
    return np.array(significant_points)

def convert_and_save_dataset(src_dir, dest_dir, val_ratio=0.1, img_dims=(640, 640)):
    """
    Process datasets, resize images and masks, and convert masks to YOLO format.
    """
    subsets = ['train', 'test']
    all_images = []

    for subset in subsets:
        images_dir = src_dir / subset / 'images'
        labels_dir = src_dir / subset / 'labels'
        dest_images_dir = dest_dir / subset / 'images'
        dest_labels_dir = dest_dir / subset / 'labels'
        
        dest_images_dir.mkdir(parents=True, exist_ok=True)
        dest_labels_dir.mkdir(parents=True, exist_ok=True)
        
        for image_path in images_dir.glob("*.png"):
            # Resize and save image
            dest_image_path = dest_images_dir / image_path.name
            resize_and_save_image(image_path, dest_image_path, img_dims)

            # Process and convert mask
            mask_name = image_path.name.replace("flair", "consensus")
            mask_path = labels_dir / mask_name
            mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
            mask_resized = cv2.resize(mask, img_dims)
            contours, _ = cv2.findContours(mask_resized, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            yolo_annotation_file = dest_labels_dir / mask_name.replace('consensus', 'flair').replace('.png', '.txt')
            with open(yolo_annotation_file, 'w') as file:
                for contour in contours:
                    significant_points = find_significant_points(contour)
                    raveled_points = significant_points.reshape(-1, 2)
                    rel_points = raveled_points / np.array(img_dims)
                    flat_points = rel_points.flatten()
                    yolo_format_str = "1 " + " ".join(map(str, flat_points))
                    file.write(yolo_format_str + '\n')
            
            if subset == 'train':
                all_images.append(dest_image_path)

    # Create validation set
    val_images = random.sample(all_images, int(len(all_images) * val_ratio))
    for image_path in val_images:
        dest_val_image_path = dest_dir / 'val' / 'images' / image_path.name
        dest_val_label_path = dest_dir / 'val' / 'labels' / image_path.name.replace('.png', '.txt')

        dest_val_image_path.parent.mkdir(parents=True, exist_ok=True)
        dest_val_label_path.parent.mkdir(parents=True, exist_ok=True)

        image_path.rename(dest_val_image_path)
        (dest_dir / 'train' / 'labels' / image_path.name.replace('.png', '.txt')).rename(dest_val_label_path)

# Define source and destination directories
src_dir = Path("C:/Users/zhuyi/Desktop/CREATIS/UNet_YOLO/MSLS-YOLOv8/UNet/data")
dest_dir = Path("C:/Users/zhuyi/Desktop/CREATIS/UNet_YOLO/MSLS-YOLOv8/YOLOv8/datasets")

# Convert and save datasets
convert_and_save_dataset(src_dir, dest_dir)

In [5]:
# import cv2
# import numpy as np
# from pathlib import Path
# import random
# import os
# from shutil import copy2, rmtree

# def resize_and_save_image(image_path, dest_path, scale_dims=(640, 640)):
#     image = cv2.imread(str(image_path))
#     image_resized = cv2.resize(image, scale_dims)
#     cv2.imwrite(str(dest_path), image_resized)

# def find_significant_points(contour, angle_threshold=30):
#     def angle_between(p1, p2, p3):
#         vector1 = p2 - p1
#         vector2 = p3 - p2
#         unit_vector1 = vector1 / np.linalg.norm(vector1)
#         unit_vector2 = vector2 / np.linalg.norm(vector2)
#         dot_product = np.dot(unit_vector1, unit_vector2)
#         angle = np.arccos(dot_product)
#         return np.degrees(angle)

#     significant_points = [contour[0]]
#     for i in range(1, len(contour) - 1):
#         angle = angle_between(contour[i - 1][0], contour[i][0], contour[i + 1][0])
#         if angle > angle_threshold:
#             significant_points.append(contour[i])
#     significant_points.append(contour[-1])
#     return np.array(significant_points)

# def convert_and_save_dataset(src_dir, dest_dir, val_ratio=0.1, img_dims=(640, 640)):
#     subsets = ['train', 'test']
#     all_images = []

#     for subset in subsets:
#         images_dir = src_dir / subset / 'images'
#         labels_dir = src_dir / subset / 'labels'
#         dest_images_dir = dest_dir / subset / 'images'
#         dest_labels_dir = dest_dir / subset / 'labels'
        
#         dest_images_dir.mkdir(parents=True, exist_ok=True)
#         dest_labels_dir.mkdir(parents=True, exist_ok=True)
        
#         for image_path in images_dir.glob("*.png"):
#             dest_image_path = dest_images_dir / image_path.name
#             resize_and_save_image(image_path, dest_image_path, img_dims)

#             mask_name = image_path.name.replace("flair", "consensus")
#             mask_path = labels_dir / mask_name
#             mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
#             mask_resized = cv2.resize(mask, img_dims)
#             contours, _ = cv2.findContours(mask_resized, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
#             yolo_annotation_file = dest_labels_dir / mask_name.replace('.png', '.txt')
#             with open(yolo_annotation_file, 'w') as file:
#                 for contour in contours:
#                     significant_points = find_significant_points(contour)
#                     raveled_points = significant_points.reshape(-1, 2)
#                     rel_points = raveled_points / np.array(img_dims)
#                     flat_points = rel_points.flatten()
#                     yolo_format_str = "1 " + " ".join(map(str, flat_points))
#                     file.write(yolo_format_str + '\n')

#             if subset == 'train':
#                 all_images.append(dest_image_path)

#     val_images = random.sample(all_images, int(len(all_images) * val_ratio))
#     for image_path in val_images:
#         dest_val_image_path = dest_dir / 'val' / 'images' / image_path.name
#         dest_val_label_path = dest_dir / 'val' / 'labels' / image_path.name.replace('flair', 'consensus').replace('.png', '.txt')

#         dest_val_image_path.parent.mkdir(parents=True, exist_ok=True)
#         dest_val_label_path.parent.mkdir(parents=True, exist_ok=True)

#         image_path.rename(dest_val_image_path)
#         (dest_dir / 'train' / 'labels' / image_path.name.replace('flair', 'consensus').replace('.png', '.txt')).rename(dest_val_label_path)

#     # Remove images and annotations with no tumors (empty annotations)
#     for subset in ['train', 'val']:
#         labels_dir = dest_dir / subset / 'labels'
#         images_dir = dest_dir / subset / 'images'
#         for label_file in labels_dir.glob("*.txt"):
#             if os.path.getsize(label_file) == 0:  # Check if file is empty
#                 os.remove(label_file)  # Remove the empty label file
#                 corresponding_img_file = images_dir / label_file.name.replace('consensus', 'flair').replace('.txt', '.png')
#                 if corresponding_img_file.exists():
#                     os.remove(corresponding_img_file)  # Remove the corresponding image file

# # Define source and destination directories
# src_dir = Path("C:/Users/zhuyi/Desktop/CREATIS/UNet_YOLO/MSLS-YOLOv8/UNet/data")
# dest_dir = Path("C:/Users/zhuyi/Desktop/CREATIS/UNet_YOLO/MSLS-YOLOv8/YOLOv8/datasets")

# # Convert and save datasets
# convert_and_save_dataset(src_dir, dest_dir)
