In [2]:
import os
import numpy as np
import nibabel as nib
import json

# 划分数据

In [3]:
# 获取数据集
ROOT_PATH = "D:/zlx/Medical_Image_Segmentation/data/patches/"
IMAGE_PATH = os.path.join(ROOT_PATH, "image")
LABEL_PATH = os.path.join(ROOT_PATH, "label")

image_files = [os.path.join(IMAGE_PATH, f) for f in os.listdir(IMAGE_PATH) if f.endswith(".nii.gz")]
label_files = [os.path.join(LABEL_PATH, f) for f in os.listdir(LABEL_PATH) if f.endswith(".nii.gz")]

# 确保图像和标签数量匹配
if len(image_files) != len(label_files):
    raise ValueError("图像和标签文件数量不匹配，请检查数据。")

# 设置交叉验证的折数
num_folds = 1
num_total = len(image_files)
indices = np.arange(num_total)
np.random.seed(42)
np.random.shuffle(indices)

# 创建多折数据集划分
fold_size = num_total // num_folds
folds = []
for i in range(num_folds):
    val_indices = indices[i * fold_size: (i + 1) * fold_size] if i != num_folds - 1 else indices[i * fold_size:]
    train_indices = np.setdiff1d(indices, val_indices)

    train_images = [image_files[idx] for idx in train_indices]
    train_labels = [label_files[idx] for idx in train_indices]
    val_images = [image_files[idx] for idx in val_indices]
    val_labels = [label_files[idx] for idx in val_indices]

    folds.append({
        'train_images': train_images,
        'train_labels': train_labels,
        'val_images': val_images,
        'val_labels': val_labels
    })

# 打印每一折的数据大小
for i, fold in enumerate(folds):
    print(f"折 {i + 1}:")
    print("训练集大小:", len(fold['train_images']))
    print("验证集大小:", len(fold['val_images']))

# 保存每一折的数据集列表到文件
OUTPUT_PATH = "D:/zlx/Medical_Image_Segmentation/data/splits/"
os.makedirs(OUTPUT_PATH, exist_ok=True)

for i, fold in enumerate(folds):
    split_file_path = os.path.join(OUTPUT_PATH, f"fold_{i + 1}.json")
    with open(split_file_path, 'w') as f:
        json.dump(fold, f, indent=4)
    print(f"保存折 {i + 1} 的数据列表到 {split_file_path}")

折 1:
训练集大小: 0
验证集大小: 7062
保存折 1 的数据列表到 D:/zlx/Medical_Image_Segmentation/data/splits/fold_1.json
