In [3]:
import os
import json
import random
from pathlib import Path

In [4]:
image_dir = Path("./chgh")    
label_suffix = "_gt" 
output_json = "dataset_splits.json"

In [5]:
all_pairs = []
for img in sorted(image_dir.glob("*.nii.gz")):
    name = img.stem.replace(".nii", "") 
    if name.endswith(label_suffix.strip("_")):
        continue  

    label = image_dir / f"{name}{label_suffix}.nii.gz"
    if label.exists():
        all_pairs.append({
            "image": img.name,
            "label": label.name
        })

random.shuffle(all_pairs)
N = len(all_pairs)

In [6]:
# training 75% validation 15% test 10%
num_train = int(N * 0.75)
num_val   = int(N * 0.15)
num_test  = N - num_train - num_val

train_pairs = all_pairs[:num_train]
val_pairs   = all_pairs[num_train:num_train + num_val]
test_pairs  = all_pairs[num_train + num_val:]

In [7]:
data = {
    "train": train_pairs,
    "val": val_pairs,
    "test": test_pairs
}

with open(output_json, "w") as f:
    json.dump(data, f, indent=4)

print(f"Total images: {N}")
print(f"Train: {len(train_pairs)}")
print(f"Val:   {len(val_pairs)}")
print(f"Test:  {len(test_pairs)}")
print(f"Saved JSON to {output_json}")

Total images: 59
Train: 44
Val:   8
Test:  7
Saved JSON to dataset_splits.json
