# MaxStyle preprocessed data

In [158]:
import os
import json
import shutil
import pandas as pd

from glob import glob
from tqdm import tqdm

In [159]:
site_dir = "/home/alvin/UltrAi/Datasets/ai_ready_datasets/other_datasets/MICCAI2022_multi_site_prostate_dataset/reorganized"
sites = glob(site_dir + "/*")
sites = [os.path.basename(site) for site in sites]
sites.sort()

# Separate images and labels into different folders
id = 0
orig_ids = []
new_ids = []
centers = []
for site in tqdm(sites):
    if site[0] not in ["A", "B", "C", "D", "E", "F", "G"]:
        continue
    print(site, end=": ")
    site_path = os.path.join(site_dir, site)
    patient_ids = glob(site_path + "/patient*")
    print("patients: ", len(patient_ids))

    for patient_id in patient_ids:
        img = os.path.join(patient_id, "t2_img_clipped.nii.gz")
        label = os.path.join(patient_id, "label_clipped.nii.gz")

        os.makedirs(os.path.join(site_dir, "imagesTr"), exist_ok=True)
        os.makedirs(os.path.join(site_dir, "labelsTr"), exist_ok=True)

        shutil.move(img, os.path.join(site_dir, "imagesTr", f"patient_{id}.nii.gz"))
        shutil.move(label, os.path.join(site_dir, "labelsTr", f"patient_{id}.nii.gz"))

        os.rmdir(patient_id)

        centers.append(site)
        orig_ids.append(os.path.basename(patient_id))
        new_ids.append(id)

        id += 1
    
    shutil.rmtree(site_path)

# Create csv of patient_ids and their centers
df = pd.DataFrame({"orig_id": orig_ids, "patientid": new_ids, "center": centers})
df.to_csv(os.path.join("raw_data", "prostate_patientid.csv"), index=False)
df

100%|██████████| 7/7 [00:00<00:00, 741.31it/s]

A-ISBI: patients:  30
B-ISBI_1.5: patients:  30
C-I2CVB: patients:  19
D-UCL: patients:  13
E-BIDMC: patients:  12
F-HK: patients:  12
G-MedicalDecathlon: patients:  32





Unnamed: 0,orig_id,patientid,center
0,patient_20,0,A-ISBI
1,patient_5,1,A-ISBI
2,patient_11,2,A-ISBI
3,patient_26,3,A-ISBI
4,patient_27,4,A-ISBI
...,...,...,...
143,patient_10,143,G-MedicalDecathlon
144,patient_25,144,G-MedicalDecathlon
145,patient_4,145,G-MedicalDecathlon
146,patient_28,146,G-MedicalDecathlon


## Convert to nnunet format

In [160]:
def convert_dataset(src_folder, dst_folder, imgs_dir, masks_dir, img_ext, test_data=False):
    images_folder = imgs_dir
    labels_folder = masks_dir

    imgs = glob(os.path.join(src_folder, images_folder , f"*{img_ext}"))
    # print("imgs: ", imgs)

    for i, img in enumerate(imgs):
        mask = os.path.join(src_folder, labels_folder, os.path.basename(img))
        # print("\timg: ", img)
        # print("\tmask: ", mask)
        img_fn = os.path.basename(img)
        id = int(img_fn.split(".")[0].split("_")[1])
        img_fn = f"patient_{str(id).zfill(4)}_0000{img_ext}"
        # print("\timg_fn: ", img_fn)
        os.rename(img, os.path.join(src_folder, images_folder, img_fn))

        mask_fn = os.path.basename(mask)
        mask_fn = f"patient_{str(id).zfill(4)}{img_ext}"
        # print("\tmask_fn: ", mask_fn)
        os.rename(mask, os.path.join(src_folder, labels_folder, mask_fn))

    return None

In [161]:
src_dir = site_dir
dst_dir = src_dir
img_ext = ".nii.gz"
imgs_dir = "imagesTr"
masks_dir = "labelsTr"

convert_dataset(src_dir, dst_dir, imgs_dir, masks_dir, img_ext)

# FedDG data

In [25]:
import os
import shutil

from glob import glob
from tqdm import tqdm

In [None]:
site_dir = "/home/alvin/UltrAi/Datasets/ai_ready_datasets/other_datasets/Processed_multisite_prostate_data_nii"
sites = os.listdir(site_dir)

# Separate images and labels into different folders
for site in tqdm(sites):
    print(site, end=": ")
    site_path = os.path.join(site_dir, site)
    cases = glob(site_path + "/*.nii.gz")
    cases = list(set([os.path.basename(case)[:6] for case in cases]))
    print("cases: ", len(cases))

    imgs = [os.path.join(site_path, case + ".nii.gz") for case in cases]
    labels = []
    for case in cases:
        label = os.path.join(site_path, case + "_segmentation.nii.gz")
        if not os.path.exists(label):
            label = os.path.join(site_path, case + "_Segmentation.nii.gz")
            if not os.path.exists(label):
                # print("Label for case {} not found".format(case))
                raise ValueError("Label for case {} not found".format(case))
        labels.append(label)

    os.makedirs(os.path.join(site_path, "images"), exist_ok=True)
    os.makedirs(os.path.join(site_path, "labels"), exist_ok=True)

    for img in imgs:
        shutil.move(img, os.path.join(site_path, "images", os.path.basename(img)))
    for label in labels:
        shutil.move(label, os.path.join(site_path, "labels", os.path.basename(label)))

# Rename label files to remove segmentation
for site in tqdm(sites):
    site_path = os.path.join(site_dir, site)
    labels = glob(os.path.join(site_path, "labels", "*.nii.gz"))
    for label in labels:
        new_label = label.replace("_segmentation", "").replace("_Segmentation", "")
        os.rename(label, new_label)

100%|██████████| 6/6 [00:00<00:00, 4389.64it/s]

BIDMC: cases:  0
BMC: cases:  0
UCL: cases:  0
RUNMC: cases:  0
HK: cases:  0
I2CVB: cases:  0



