In [6]:
import os
import json
import random
import glob

def create_dataset_json_multisite(root_dir, output_folder):
    # Get a list of all image files in the root directory
    subject_list = glob.glob(os.path.join(root_dir, '*.png'))  # Update the file extension if necessary
    
    # Group hospitals
    hospital_groups = {
        '43': 'hospital43.json',
        '55,33': 'hospital55_33.json',
        '54,18': 'hospital54_18.json'
    }
    
    for hospital_ids, output_filename in hospital_groups.items():
        # Filter images for the current hospital group
        filtered_images = [file for file in subject_list if any(f"_hospital_{hospital_id}_" in file for hospital_id in hospital_ids.split(','))]
        
        # Shuffle the images and split them into train and val sets (70-30 split)
        random.shuffle(filtered_images)
        num_validation = int(len(filtered_images) * 0.3)
        train_data = filtered_images[:-num_validation]
        val_data = filtered_images[-num_validation:]
        
        # Create the dataset dictionary
        dataset = {"train": [], "val": []}
        for file in train_data:
            dataset["train"].append({
                "image_path": file,
                "hospital_id": hospital_ids,
                "label": os.path.basename(file).split('_')[4].split('.')[0]  # Extract label information, adjust the index accordingly
            })
        for file in val_data:
            dataset["val"].append({
                "image_path": file,
                "hospital_id": hospital_ids,
                "label": os.path.basename(file).split('_')[4].split('.')[0]  # Extract label information, adjust the index accordingly
            })

        # Write the dataset to JSON file
        with open(os.path.join(output_folder, output_filename), 'w') as json_file:
            json.dump(dataset, json_file, indent=4)
            
        print(f"JSON file '{output_filename}' created for hospital IDs: {hospital_ids}")

# Example usage
root_directory = "/local/data1/honzh073/data/8bit_down224"
output_folder = "/local/data1/honzh073/local_repository/FL/learner_json"

create_dataset_json_multisite(root_directory, output_folder)


JSON file 'hospital43.json' created for hospital IDs: 43
JSON file 'hospital55_33.json' created for hospital IDs: 55,33
JSON file 'hospital54_18.json' created for hospital IDs: 54,18
