In [None]:
#!/usr/bin/env python3
import json
import random
import os

# reproducibility
random_seed = 42
random.seed(random_seed)

# task and paths
task = "classification"  # or "classification"
task = "segmentation"  # or "classification"
input_json_path = f'../../Annotations/aaai26_main_annotation_{task}.json'
output_dir = '../../Annotations/splits/subjects/'

# ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# load the full annotation
with open(input_json_path, 'r') as f:
    data = json.load(f)

# extract and shuffle subjects
subjects = data.get('subjects', [])
random.shuffle(subjects)

# compute split sizes
n_total = len(subjects)
n_train = int(0.6 * n_total)
n_val   = int(0.2 * n_total)
# remainder goes to test
n_test  = n_total - n_train - n_val

train_subjects = subjects[:n_train]
val_subjects   = subjects[n_train:n_train+n_val]
test_subjects  = subjects[n_train+n_val:]

# helper to write a split file
def save_split(split_subjects, split_name):
    split_data = {"subjects": split_subjects}
    path = os.path.join(output_dir, f'aaai26_{split_name}_split_{task}.json')
    with open(path, 'w') as f:
        json.dump(split_data, f, indent=4)
    print(f"Saved {split_name} split with {len(split_subjects)} subjects to {path}")

# write out each split
save_split(train_subjects, 'train')
save_split(val_subjects,   'val')
save_split(test_subjects,  'test')


Saved train split with 37 subjects to ../../Annotations/splits/subjects/aaai26_train_split_classification.json
Saved val split with 12 subjects to ../../Annotations/splits/subjects/aaai26_val_split_classification.json
Saved test split with 13 subjects to ../../Annotations/splits/subjects/aaai26_test_split_classification.json
