In [1]:
import os
import shutil
import random  # 데이터 섞기를 위한 라이브러리
import cv2
import numpy as np

iou_scores=[]
def create_new_data_directory(original_data_dir, new_data_dir, split_ratio=0.8):
    # Ensure the new data directory exists
    if not os.path.exists(new_data_dir):
        os.makedirs(new_data_dir)

    # Define paths for images and labels
    new_images_dir = os.path.join(new_data_dir, 'images')
    new_labels_dir = os.path.join(new_data_dir, 'labels')

    # Create train and val directories
    for subdir in ['train', 'val']:
        os.makedirs(os.path.join(new_images_dir, subdir), exist_ok=True)
        os.makedirs(os.path.join(new_labels_dir, subdir), exist_ok=True)

    # List all image files and label files
    def list_files(base_dir):
        image_files = []
        label_files = []
        for cityname in os.listdir(base_dir):
            img_dir = os.path.join(base_dir, cityname, 'img')
            label_dir = os.path.join(base_dir, cityname, 'new_txt')
            if os.path.exists(img_dir) and os.path.exists(label_dir):
                for img_file in os.listdir(img_dir):
                    if img_file.endswith(('.png', '.jpg', '.jpeg')):
                        image_files.append(os.path.join(img_dir, img_file))
                        label_file = os.path.join(label_dir, os.path.splitext(img_file)[0] + '.txt')
                        if os.path.exists(label_file):
                            label_files.append(label_file)
        return list(zip(image_files, label_files))

    # Get all files and shuffle them
    all_files = list_files(os.path.join(original_data_dir, 'train'))
    random.shuffle(all_files)  # 섞기

    # Split files into train and validation sets
    split_index = int(len(all_files) * split_ratio)
    train_files = all_files[:split_index]
    val_files = all_files[split_index:]

    # Copy files for both train and validation sets
    copy_and_rename_files(train_files, 'train', new_images_dir, new_labels_dir)
    copy_and_rename_files(val_files, 'val', new_images_dir, new_labels_dir)

def copy_and_rename_files(files, split_type, new_images_dir, new_labels_dir):
    txt_file_path = os.path.join(new_data_dir, f'{split_type}.txt')
    with open(txt_file_path, 'w') as txt_file:
        for image_path, label_path in files:
            # Safely split the path
            path_parts = os.path.normpath(image_path).split(os.sep)

            # Generate a unique ID
            if len(path_parts) >= 3:
                unique_id = f"{path_parts[-3]}_{path_parts[-1].split('.')[0]}"
            else:
                unique_id = path_parts[-1].split('.')[0]

            # Create new paths for image and label files
            img_ext = os.path.splitext(image_path)[1]
            new_image_name = f'{unique_id}{img_ext}'
            dest_image_path = os.path.join(new_images_dir, split_type, new_image_name)

            # Copy image file
            shutil.copy(image_path, dest_image_path)

            # Copy and reformat label file
            new_label_name = f'{unique_id}.txt'
            dest_label_path = os.path.join(new_labels_dir, split_type, new_label_name)
            reformat_label_file(label_path, dest_label_path, image_path)

            # Write the new image path to the txt file
            txt_file.write(dest_image_path + '\n')

def reformat_label_file(src_label_path, dest_label_path, image_path):
    image = cv2.imread(image_path)
    height, width, _ = image.shape

    mask_dir = os.path.join(os.path.dirname(image_path), '..', 'instance')
    mask_path = os.path.join(mask_dir, os.path.basename(image_path))
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

    if mask is None:
        print(f"Warning: Mask not found for {image_path}. Skipping...")
        return

    with open(src_label_path, 'r') as src_file:
        lines = src_file.readlines()
    with open(dest_label_path, 'w') as dest_file:
        for line_num, line in enumerate(lines):
            parts = line.strip().split()
            if len(parts) >= 10:
                x1, y1, x2, y2, class_id, loc_id, brake, incatlft, incatrht, hazlit = map(float, parts[:10])

                instance_mask = np.zeros_like(mask)
                instance_mask[mask == int(line_num)] = 255

                new_instance_mask = remove_noise(instance_mask)
                polygons = mask2polygon(new_instance_mask)
                recreated_mask = recreate_mask_from_polygons(polygons, instance_mask.shape)

                iou_score = calculate_iou(instance_mask, recreated_mask)
                if iou_score > 0.01:
                    iou_scores.append(iou_score)

                    normalized_contours = []
                    for polygon in polygons:
                        normalized_contours.extend([
                            (polygon[i] / width if i % 2 == 0 else polygon[i] / height)
                            for i in range(len(polygon))
                        ])
                    new_label_line = f'{int(class_id)} {int(loc_id)} {int(brake)} {int(incatlft)} {int(incatrht)} {int(hazlit)} ' + ' '.join(map(str, normalized_contours))
                    dest_file.write(new_label_line + '\n')

def mask2polygon(image, mode=cv2.CHAIN_APPROX_TC89_KCOS):
    contours, _ = cv2.findContours(image, cv2.RETR_CCOMP, mode)
    polygons = [contour.flatten().tolist() for contour in contours if len(contour) >= 3]
    return polygons

def remove_noise(mask):
    kernel = np.ones((5, 5), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    return mask

def calculate_iou(mask1, mask2):
    intersection = np.logical_and(mask1, mask2).sum()
    union = np.logical_or(mask1, mask2).sum()
    return intersection / union if union != 0 else 0

def recreate_mask_from_polygons(polygons, size):
    mask = np.zeros(size, dtype=np.uint8)
    for polygon in polygons:
        contour = np.array(polygon).reshape((-1, 2))
        cv2.fillPoly(mask, [contour], 255)
    return mask

# Run the script
original_data_dir = './'
new_data_dir = './processed_data'
create_new_data_directory(original_data_dir, new_data_dir, split_ratio=0.8)

# Calculate and print average IoU
if len(iou_scores) > 0:
    avg_iou = sum(iou_scores) / len(iou_scores)
    print(f"Average IoU between img vs polygon: {avg_iou}")
else:
    print("No IoU scores available.")

Average IoU between img vs polygon: 0.9660969036924436
