In [None]:
import json
import random
import os


# Set a random seed for reproducibility
random_seed = 42
random.seed(random_seed)


# Load the original annotation file
input_json_path = '../../Annotations/main_annotation.json'  # Replace with your input JSON file path
output_dir = '../../Annotations/splits/subjects'  # Output directory for split files



# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Load the dataset
with open(input_json_path, 'r') as f:
    data = json.load(f)

# Create empty lists for train, val, and test
train_set = {"subjects": []}
val_set = {"subjects": []}
test_set = {"subjects": []}

# Split ratio for train, validation, and test
train_ratio = 0.6
val_ratio = 0.2
test_ratio = 0.2

# Shuffle the subjects before splitting
random.shuffle(data['subjects'])

# Determine the number of subjects for each split
num_subjects = len(data['subjects'])
num_train = int(train_ratio * num_subjects)
num_val = int(val_ratio * num_subjects)
num_test = num_subjects - num_train - num_val

# Split subjects into train, val, and test sets
train_subjects = data['subjects'][:num_train]
val_subjects = data['subjects'][num_train:num_train + num_val]
test_subjects = data['subjects'][num_train + num_val:]

# Add subjects to respective sets
train_set['subjects'].extend(train_subjects)
val_set['subjects'].extend(val_subjects)
test_set['subjects'].extend(test_subjects)

# Save split JSON files
def save_split_file(split_data, split_name):
    with open(os.path.join(output_dir, f'{split_name}_split.json'), 'w') as f:
        json.dump(split_data, f, indent=4)

# Save the train, validation, and test splits
save_split_file(train_set, 'train')
save_split_file(val_set, 'val')
save_split_file(test_set, 'test')

print(f"Splits saved in directory: {output_dir}")


Splits saved in directory: ../../Annotations/splits/subjects
