In [1]:
import nibabel as nib
import numpy as np
import random
import json
import os
from tqdm import tqdm

In [5]:
synthetic_data_path = "./synthetic_data/prostate/"
gt_options = ["Low", "High"]

for k in tqdm(range(15000)):
    low_metadata_options = [
        {
            "age": random.randint(25, 50),
            "histology_type": "type_1",
            "psa": random.randint(20, 80) / 10,
            "neural_invasion": "False",
            "vascular_invasion": "False",
            "lymphatic_invasion": "False",
        },
        {
            "age": random.randint(25, 40),
            "histology_type": "type_1",
            "psa": random.randint(20, 60) / 10,
            "neural_invasion": "False",
            "vascular_invasion": "False",
            "lymphatic_invasion": "True",
        },
        {
            "age": random.randint(16, 40),
            "histology_type": "type_2",
            "psa": random.randint(10, 80) / 10,
            "neural_invasion": "False",
            "vascular_invasion": "True",
            "lymphatic_invasion": "False",
        },
        {
            "age": random.randint(20, 50),
            "histology_type": "type_3",
            "psa": random.randint(20, 80) / 10,
            "neural_invasion": "True",
            "vascular_invasion": "False",
            "lymphatic_invasion": "False",
        },
        {
            "age": random.randint(25, 50),
            "histology_type": "type_1",
            "psa": random.randint(20, 80) / 10,
            "neural_invasion": "False",
            "vascular_invasion": "False",
            "lymphatic_invasion": "False",
        },
        {
            "age": random.randint(25, 40),
            "histology_type": "type_1",
            "psa": random.randint(20, 80) / 10,
            "neural_invasion": "False",
            "vascular_invasion": "False",
            "lymphatic_invasion": "False",
        },
        {
            "age": random.randint(16, 40),
            "histology_type": "type_2",
            "psa": random.randint(10, 80) / 10,
            "neural_invasion": "True",
            "vascular_invasion": "False",
            "lymphatic_invasion": "False",
        },
        {
            "age": random.randint(20, 30),
            "histology_type": "type_4",
            "psa": random.randint(20, 40) / 10,
            "neural_invasion": "True",
            "vascular_invasion": "True",
            "lymphatic_invasion": "True",
        },
    ]
    high_metadata_options = [
        {
            "age": random.randint(27, 75),
            "histology_type": "type_4",
            "psa": random.randint(60, 150) / 10,
            "neural_invasion": "True",
            "vascular_invasion": "False",
            "lymphatic_invasion": "True",
        },
        {
            "age": random.randint(20, 60),
            "histology_type": "type_2",
            "psa": random.randint(40, 130) / 10,
            "neural_invasion": "True",
            "vascular_invasion": "False",
            "lymphatic_invasion": "True",
        },
        {
            "age": random.randint(30, 55),
            "histology_type": "type_3",
            "psa": random.randint(50, 130) / 10,
            "neural_invasion": "True",
            "vascular_invasion": "True",
            "lymphatic_invasion": "False",
        },
        {
            "age": random.randint(30, 55),
            "histology_type": "type_3",
            "psa": random.randint(50, 130) / 10,
            "neural_invasion": "False",
            "vascular_invasion": "True",
            "lymphatic_invasion": "True",
        },
        {
            "age": random.randint(30, 75),
            "histology_type": "type_4",
            "psa": random.randint(60, 130) / 10,
            "neural_invasion": "True",
            "vascular_invasion": "True",
            "lymphatic_invasion": "True",
        },
        {
            "age": random.randint(20, 60),
            "histology_type": "type_2",
            "psa": random.randint(40, 130) / 10,
            "neural_invasion": "False",
            "vascular_invasion": "False",
            "lymphatic_invasion": "True",
        },
        {
            "age": random.randint(20, 55),
            "histology_type": "type_1",
            "psa": random.randint(30, 110) / 10,
            "neural_invasion": "False",
            "vascular_invasion": "True",
            "lymphatic_invasion": "False",
        },
    ]
    rand_bounds = (random.randint(100, 150), random.randint(100, 150))
    low_image = np.zeros((240, 240))
    low_image[100:200, 100:200] = random.randint(5, 22)
    low_image[150:200, 150:200] = random.randint(5, 22)
    low_image[
        rand_bounds[0] : rand_bounds[1], rand_bounds[0] : rand_bounds[1]
    ] = random.randint(10, 20)
    high_image = np.zeros((240, 240))
    high_image[100:200, 100:200] = random.randint(13, 25)
    high_image[150:200, 150:200] = random.randint(13, 25)
    high_image[
        rand_bounds[0] : rand_bounds[1], rand_bounds[0] : rand_bounds[1]
    ] = random.randint(10, 20)

    case = f"case_00{k}"
    case_folder = os.path.join(synthetic_data_path, case)
    image_path = os.path.join(case_folder, f"{case}.nii.gz")
    metadata_path = os.path.join(case_folder, f"{case}.json")
    ground_truth_path = os.path.join(case_folder, f"{case}_ground_truth.json")

    gt = random.choices(population=gt_options, weights=[0.60, 0.40], k=1)[0]

    if gt == "Low":
        metadata_options = random.choices(
            population=[low_metadata_options, high_metadata_options],
            weights=[0.9, 0.1],
            k=1,
        )[0]
        metadata = random.choice(metadata_options)
        image = low_image
        image = nib.Nifti1Image(image, np.eye(4))
    if gt == "High":
        metadata_options = random.choices(
            population=[low_metadata_options, high_metadata_options],
            weights=[0.1, 0.9],
            k=1,
        )[0]
        metadata = random.choice(metadata_options)
        image = high_image
        image = nib.Nifti1Image(image, np.eye(4))

    os.makedirs(case_folder, exist_ok=True)
    nib.save(image, image_path)
    with open(metadata_path, "w") as f:
        json.dump(metadata, f)
    with open(ground_truth_path, "w") as f:
        json.dump(gt, f)

100%|████████████████████████████████████████████████████████████████████████████| 15000/15000 [00:40<00:00, 367.91it/s]


In [5]:
synthetic_data_path = "./synthetic_data/lung/"
progression_options = [True, False]

for k in tqdm(range(3000)):
    low_metadata_options = [
        {
            "gender": random.choice(["male", "female"]),
            "age": random.randint(20, 60),
            "smoking_status": random.choices(["yes", "no", "quit"], weights=[0.1, 0.7, 0.2])[0],
        },
    ]
    high_metadata_options = [
        {
            "gender": random.choice(["male", "female"]),
            "age": random.randint(30, 80),
            "smoking_status": random.choices(["yes", "no", "quit"], weights=[0.4, 0.3, 0.3])[0],
        },
    ]
    rand_bounds = (random.randint(100, 150), random.randint(100, 150))
    low_image = np.zeros((240, 240))
    low_image[100:200, 100:200] = random.randint(5, 22)
    low_image[150:200, 150:200] = random.randint(5, 22)
    low_image[
        rand_bounds[0] : rand_bounds[1], rand_bounds[0] : rand_bounds[1]
    ] = random.randint(10, 20)
    high_image = np.zeros((240, 240))
    high_image[100:200, 100:200] = random.randint(13, 25)
    high_image[150:200, 150:200] = random.randint(13, 25)
    high_image[
        rand_bounds[0] : rand_bounds[1], rand_bounds[0] : rand_bounds[1]
    ] = random.randint(10, 20)

    case = f"case_00{k}"
    case_folder = os.path.join(synthetic_data_path, case)
    image_path = os.path.join(case_folder, f"{case}.nii.gz")
    metadata_path = os.path.join(case_folder, f"{case}.json")
    ground_truth_path = os.path.join(case_folder, f"{case}_ground_truth.json")

    progression = random.choices(population=progression_options, weights=[0.85, 0.15], k=1)[0]

    if progression:
        metadata_options = random.choices(
            population=[high_metadata_options, high_metadata_options],
            weights=[0.9, 0.1],
            k=1,
        )[0]
        metadata = random.choice(metadata_options)
        image = low_image
        image = nib.Nifti1Image(image, np.eye(4))
        pfs = random.choices(population = [random.randint(50,100), random.randint(1,50)], weights=[0.2, 0.8], k=1)[0]
        if pfs > 100 - metadata["age"]:
            pfs = pfs*((100-metadata["age"])/100)
    else:
        metadata_options = random.choices(
            population=[low_metadata_options, high_metadata_options],
            weights=[0.1, 0.9],
            k=1,
        )[0]
        metadata = random.choice(metadata_options)
        image = high_image
        image = nib.Nifti1Image(image, np.eye(4))
        pfs = random.choices(population = [random.randint(50,100), random.randint(1,50)], weights=[0.9, 0.1], k=1)[0]
        if pfs > 100 - metadata["age"]:
            pfs = pfs*((100-metadata["age"])/100)

    gt = {'pfs': pfs, 'progression': progression}

    os.makedirs(case_folder, exist_ok=True)
    nib.save(image, image_path)
    with open(metadata_path, "w") as f:
        json.dump(metadata, f)
    with open(ground_truth_path, "w") as f:
        json.dump(gt, f)

100%|██████████████████████████████████████████████████████████████████████████████| 3000/3000 [00:07<00:00, 386.42it/s]
