In [None]:
#Base 데이터로 YOLO포멧 만들기
#세부분으로 박스를 나눈다. (손가락, 팔, 손등)
#가장 작은 x,y 가장 큰 x,y가 기준준

import os
import json
import cv2
import shutil

# Define directories
label_dir = "/data/ephemeral/home/MCG/data/train/outputs_json"
input_dir = "/data/ephemeral/home/MCG/data/train/DCM"
output_dir = "/data/ephemeral/home/MCG/data/yolo_dataFormat/"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Calculate bounding box for a set of points
def calculate_bounding_box(points):
    if not points:
        return None
    x_values = [p[0] for p in points]
    y_values = [p[1] for p in points]
    return min(x_values), min(y_values), max(x_values), max(y_values)

# Normalize bounding box to YOLO format
def normalize_bbox(image_shape, bbox):
    if bbox is None:
        return None
    height, width = image_shape[:2]
    x_min, y_min, x_max, y_max = bbox
    x_center = (x_min + x_max) / 2 / width
    y_center = (y_min + y_max) / 2 / height
    box_width = (x_max - x_min) / width
    box_height = (y_max - y_min) / height
    return x_center, y_center, box_width, box_height

# Define class mappings
class_map = {
    "finger": 0,
    "radius_ulna": 1,
    "others": 2
}

# Process each patient's images
for patient_id in os.listdir(label_dir):
    patient_label_path = os.path.join(label_dir, patient_id)
    patient_image_path = os.path.join(input_dir, patient_id)
    
    if not os.path.isdir(patient_label_path):
        continue

    for json_file in os.listdir(patient_label_path):
        if json_file.endswith(".json"):
            # Load JSON data
            json_path = os.path.join(patient_label_path, json_file)
            with open(json_path, "r") as f:
                data = json.load(f)

            # Prepare image path
            image_name = json_file.replace(".json", ".png")
            image_path = os.path.join(patient_image_path, image_name)
            
            # Skip if the corresponding image does not exist
            if not os.path.isfile(image_path):
                continue

            # Read the image to get dimensions
            image = cv2.imread(image_path)
            if image is None:
                continue

            # Initialize groups
            finger_points = []
            radius_ulna_points = []
            others_points = []

            # Categorize points based on labels
            for annotation in data['annotations']:
                label = annotation['label']
                points = annotation['points']
                
                if 'finger' in label.lower():
                    finger_points.extend(points)
                elif label in ['Radius', 'Ulna']:
                    radius_ulna_points.extend(points)
                else:
                    others_points.extend(points)

            # Calculate bounding boxes for each group
            finger_box = calculate_bounding_box(finger_points)
            radius_ulna_box = calculate_bounding_box(radius_ulna_points)
            others_box = calculate_bounding_box(others_points)

            # Normalize bounding boxes
            yolo_annotations = []
            if finger_box is not None:
                yolo_annotations.append((class_map["finger"], *normalize_bbox(image.shape, finger_box)))
            if radius_ulna_box is not None:
                yolo_annotations.append((class_map["radius_ulna"], *normalize_bbox(image.shape, radius_ulna_box)))
            if others_box is not None:
                yolo_annotations.append((class_map["others"], *normalize_bbox(image.shape, others_box)))

            # Save YOLO annotation to a text file
            annotation_file = os.path.join(output_dir, f"{patient_id}_{image_name.replace('.png', '.txt')}")
            with open(annotation_file, "w") as f:
                for annotation in yolo_annotations:
                    class_id, x_center, y_center, box_width, box_height = annotation
                    f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {box_width:.6f} {box_height:.6f}\n")
            
            # Copy the image to the output directory
            output_image_path = os.path.join(output_dir, f"{patient_id}_{image_name}")
            shutil.copy(image_path, output_image_path)

print("Annotations and images have been successfully processed and saved in YOLO format.")


In [None]:
#같은 환자 손 안겹치게 train, valid나누기

import os
import shutil
import random

# Directories
yolo_annotations_dir = "/data/ephemeral/home/MCG/data/yolo_dataFormat"
train_dir = "/data/ephemeral/home/MCG/data/yolo_dataset_split/train"
valid_dir = "/data/ephemeral/home/MCG/data/yolo_dataset_split/valid"

# Create directories for train and validation sets
os.makedirs(train_dir, exist_ok=True)
os.makedirs(valid_dir, exist_ok=True)

# Group annotation files by patient ID
annotation_files = [
    os.path.join(yolo_annotations_dir, file)
    for file in os.listdir(yolo_annotations_dir)
    if file.endswith(".txt")
]

# Group files by patient ID
patient_groups = {}
for file in annotation_files:
    patient_id = os.path.basename(file).split('_')[0]  # Extract patient ID (e.g., "ID001")
    if patient_id not in patient_groups:
        patient_groups[patient_id] = []
    patient_groups[patient_id].append(file)

# Shuffle patient IDs
patient_ids = list(patient_groups.keys())
random.shuffle(patient_ids)

# Split patient IDs into train (85%) and valid (15%)
split_index = int(len(patient_ids) * 0.85)
train_patient_ids = patient_ids[:split_index]
valid_patient_ids = patient_ids[split_index:]

# Collect files for train and valid
train_files = [file for pid in train_patient_ids for file in patient_groups[pid]]
valid_files = [file for pid in valid_patient_ids for file in patient_groups[pid]]

# Helper function to move files
def move_files(file_list, target_dir):
    for file in file_list:
        # Move annotation file
        shutil.copy(file, os.path.join(target_dir, os.path.basename(file)))
        
        # Move the corresponding image file
        image_file = file.replace(".txt", ".png")
        if os.path.isfile(image_file):
            shutil.copy(image_file, os.path.join(target_dir, os.path.basename(image_file)))

# Move files to respective directories
move_files(train_files, train_dir)
move_files(valid_files, valid_dir)

print(f"Train and validation datasets created.")
print(f"Train files: {len(train_files)}, Validation files: {len(valid_files)}")


In [None]:
#yaml파일 생성

import os

# Define directories
dataset_dir = "/data/ephemeral/home/MCG/data/yolo_dataset_split"
train_dir = os.path.join(dataset_dir, "train")
valid_dir = os.path.join(dataset_dir, "valid")
yaml_file_path = "/data/ephemeral/home/MCG/data/yolo_dataset_split/data.yaml"

# Define class names
classes = ["finger", "radius_ulna", "others"]

# Generate YAML content
yaml_content = f"""
train: {train_dir}
val: {valid_dir}

# Number of classes
nc: {len(classes)}

# Class names
names: {classes}
"""

# Write to data.yaml
with open(yaml_file_path, "w") as yaml_file:
    yaml_file.write(yaml_content)

print(f"data.yaml file has been created at: {yaml_file_path}")
