In [None]:
import os
import shutil
import numpy as np
from sklearn.model_selection import GroupKFold
import random

# 데이터 경로 설정
IMAGE_ROOT = "/data/ephemeral/home/MCG/data/train/DCM"
LABEL_ROOT = "/data/ephemeral/home/MCG/data/train/outputs_json"
OUTPUT_DIR = "/data/ephemeral/home/MCG/data/groupKFold_seed21"

# 랜덤 시드 설정
RANDOM_SEED = 21
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

# 데이터 준비
pngs = {
    os.path.relpath(os.path.join(root, fname), start=IMAGE_ROOT)
    for root, _dirs, files in os.walk(IMAGE_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".png"
}

jsons = {
    os.path.relpath(os.path.join(root, fname), start=LABEL_ROOT)
    for root, _dirs, files in os.walk(LABEL_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".json"
}

assert len(pngs) == len(jsons), "Mismatch between PNG and JSON files!"

pngs = sorted(pngs)
jsons = sorted(jsons)

# 그룹 설정
filenames = np.array(pngs)
labelnames = np.array(jsons)
groups = [os.path.dirname(fname) for fname in filenames]

# GroupKFold 생성
gkf = GroupKFold(n_splits=5)

# 출력 디렉토리 생성
os.makedirs(OUTPUT_DIR, exist_ok=True)

# GroupKFold를 통해 Fold별 데이터 저장
for fold_idx, (train_idx, val_idx) in enumerate(gkf.split(filenames, np.zeros(len(filenames)), groups)):
    fold_dir = os.path.join(OUTPUT_DIR, f"fold{fold_idx + 1}")
    train_image_dir = os.path.join(fold_dir, "train", "Image")
    train_label_dir = os.path.join(fold_dir, "train", "Label")
    val_image_dir = os.path.join(fold_dir, "val", "Image")
    val_label_dir = os.path.join(fold_dir, "val", "Label")

    # Fold 디렉토리 및 하위 폴더 생성
    os.makedirs(train_image_dir, exist_ok=True)
    os.makedirs(train_label_dir, exist_ok=True)
    os.makedirs(val_image_dir, exist_ok=True)
    os.makedirs(val_label_dir, exist_ok=True)

    # Training 데이터 복사
    for idx in train_idx:
        # 이미지 복사
        src_image_path = os.path.join(IMAGE_ROOT, filenames[idx])
        dst_image_path = os.path.join(train_image_dir, os.path.basename(filenames[idx]))
        shutil.copy2(src_image_path, dst_image_path)

        # 라벨 복사
        src_label_path = os.path.join(LABEL_ROOT, labelnames[idx])
        dst_label_path = os.path.join(train_label_dir, os.path.basename(labelnames[idx]))
        shutil.copy2(src_label_path, dst_label_path)

    # Validation 데이터 복사
    for idx in val_idx:
        # 이미지 복사
        src_image_path = os.path.join(IMAGE_ROOT, filenames[idx])
        dst_image_path = os.path.join(val_image_dir, os.path.basename(filenames[idx]))
        shutil.copy2(src_image_path, dst_image_path)

        # 라벨 복사
        src_label_path = os.path.join(LABEL_ROOT, labelnames[idx])
        dst_label_path = os.path.join(val_label_dir, os.path.basename(labelnames[idx]))
        shutil.copy2(src_label_path, dst_label_path)

    print(f"Fold {fold_idx + 1} saved: {len(train_idx)} train files, {len(val_idx)} val files.")

print(f"Data split completed and saved in {OUTPUT_DIR}")



In [None]:



import os
import json
import shutil
import numpy as np
import cv2
from PIL import Image

# 경로 설정
source_dir = "/data/ephemeral/home/MCG/data/groupKFold_seed21/fold1"
target_dir = "/data/ephemeral/home/MCG/data/UNet3+Data"

# 클래스 매핑
CLASSES = [
    'finger-1', 'finger-2', 'finger-3', 'finger-4', 'finger-5',
    'finger-6', 'finger-7', 'finger-8', 'finger-9', 'finger-10',
    'finger-11', 'finger-12', 'finger-13', 'finger-14', 'finger-15',
    'finger-16', 'finger-17', 'finger-18', 'finger-19', 'Trapezium',
    'Trapezoid', 'Capitate', 'Hamate', 'Scaphoid', 'Lunate',
    'Triquetrum', 'Pisiform', 'Radius', 'Ulna',
]
CLASS_MAPPING = {cls_name: idx for idx, cls_name in enumerate(CLASSES, start=1)}

def create_directories(base_dir):
    """UNet3+ 데이터 구조에 맞게 디렉토리 생성"""
    os.makedirs(os.path.join(base_dir, "train", "images"), exist_ok=True)
    os.makedirs(os.path.join(base_dir, "train", "mask"), exist_ok=True)
    os.makedirs(os.path.join(base_dir, "val", "images"), exist_ok=True)
    os.makedirs(os.path.join(base_dir, "val", "mask"), exist_ok=True)

def parse_json_label(json_path, mask_shape):
    """
    JSON 파일에서 라벨 정보를 읽어 멀티클래스 마스크 이미지 생성.
    """
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    mask = np.zeros(mask_shape, dtype=np.uint8)
    
    for obj in data["annotations"]:
        class_name = obj.get("label", "")
        if class_name not in CLASS_MAPPING:
            continue
        class_id = CLASS_MAPPING[class_name]
        
        polygon = np.array(obj["points"], dtype=np.int32)
        cv2.fillPoly(mask, [polygon], color=class_id)
    
    return Image.fromarray(mask)

def process_data(source_dir, target_dir, mask_shape=(2048, 2048)):
    """데이터 복사 및 마스크 생성"""
    create_directories(target_dir)
    
    for split in ["train", "val"]:
        image_src_dir = os.path.join(source_dir, split, "Image")
        label_src_dir = os.path.join(source_dir, split, "Label")
        
        image_dest_dir = os.path.join(target_dir, split, "images")
        mask_dest_dir = os.path.join(target_dir, split, "mask")
        
        image_files = sorted([f for f in os.listdir(image_src_dir) if f.endswith(('.png', '.jpg', '.jpeg'))])
        label_files = sorted([f for f in os.listdir(label_src_dir) if f.endswith('.json')])
        
        assert len(image_files) == len(label_files), "이미지와 라벨 파일 수가 맞지 않습니다."
        
        for index, (img_file, lbl_file) in enumerate(zip(image_files, label_files)):
            # 새 파일명 설정
            image_filename = f"image_{index}_0.png"
            mask_filename = f"mask_{index}_0.png"
            
            # 이미지 복사
            shutil.copy(os.path.join(image_src_dir, img_file), os.path.join(image_dest_dir, image_filename))
            
            # 라벨에서 마스크 생성
            json_path = os.path.join(label_src_dir, lbl_file)
            mask = parse_json_label(json_path, mask_shape)
            mask.save(os.path.join(mask_dest_dir, mask_filename))

# 실행
process_data(source_dir, target_dir, mask_shape=(2048, 2048))



In [None]:



import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# 마스크 이미지 경로
mask_path = "/data/ephemeral/home/MCG/data/UNet3+Data/train/mask/image1661144206667.png"

# 마스크 이미지 불러오기
mask = Image.open(mask_path)
mask_array = np.array(mask)

# 유효 클래스 ID 확인
unique_values = np.unique(mask_array)
print("Unique class IDs in the mask:", unique_values)

# 마스크 시각화
plt.figure(figsize=(10, 10))
plt.title("Mask Visualization")
plt.imshow(mask_array, cmap='tab20', interpolation='nearest')  # tab20 색상맵 사용
plt.colorbar(ticks=unique_values, label="Class ID")  # 색상 범례 추가
plt.show()
