In [1]:
import numpy as np
import json
import copy
import os
import random
import matplotlib.pyplot as plt
from matplotlib import patches
from PIL import Image

These are codes we used to compile id for all data. It only serves as a basic guideline, and the actual implementation depends on your specific scenarios.
Change the paths and details accordingly. 

In [2]:
cell_json = r"./cell data/cell_data.json"
chest_json = r"./chest data/data.json"
brats_json = r"./BRATS_DATASET/instances.json"
flare_json = r"./FLARE_DATASET/instances.json"
word_json = r"./WORD_DATASET/instances.json"
xray_json = r"./x_ray_disease/out_image/xray_instances.json"

output_json = r"./combined.json"

In [3]:
# image id format convention: XYYYYYY       X = dataset id, Y = image id
# mask id format convention: XYYYYYYZZZZ    X = dataset id, Y = image id, Z = mask id

cell_id = 1
chest_id = 2
brats_id = 3
flare_id = 4
word_id = 5
xray_id = 6


cell_id_head = 1000000
chest_id_head = 2000000
brats_id_head = 3000000
flare_id_head = 4000000
word_id_head = 5000000
xray_id_head = 6000000

In [5]:
def rename_files(image_dir, prefix):
    files = os.listdir(image_dir)
    
    for filename in files:
        old_file = os.path.join(image_dir, filename)
        if os.path.isfile(old_file):
            new_filename = prefix + filename
            new_file = os.path.join(image_dir, new_filename)
            os.rename(old_file, new_file)

rename_files(r'<PATH_TO_WORD_DATASET>\image',"WORD_")
rename_files(r'<PATH_TO_FLARE_DATASET>\image',"FLARE_")

In [6]:
combined_images = []
combined_annotations = []
combined_categories = []

with open(cell_json, 'r') as file:
    data = json.load(file)
    for img in data["images"]:
        img["id"] += cell_id_head
        combined_images.append(img)
    for ant in data["annotations"]:
        ant["image_id"] += cell_id_head
        ant["id"] += cell_id_head*10000
        ant["category_id"] = cell_id
        combined_annotations.append(ant)
    data["categories"]["id"] = cell_id
    combined_categories.append(data["categories"]) 
    print("Cell:")
    print(f"#Images: {len(data['images'])}, #bboxs: {len(data['annotations'])}")

with open(chest_json, 'r') as file:
    data = json.load(file)
    for img in data["images"]:
        img["id"] += chest_id_head
        combined_images.append(img)
    for ant in data["annotations"]:
        ant["id"] = (ant["image_id"] + chest_id_head) * 10000 + (ant["id"]%100)
        ant["image_id"] += chest_id_head
        ant["category_id"] = chest_id
        combined_annotations.append(ant)
    data["categories"]["id"] = chest_id
    combined_categories.append(data["categories"]) 
    print("Chest:")
    print(f"#Images: {len(data['images'])}, #bboxs: {len(data['annotations'])}")

with open(brats_json, 'r') as file:
    data = json.load(file)
    tmp_dict = {}
    for idx in range(1, len(data["images"])+1):
        img = data["images"][idx-1]
        tmp_dict[img["id"]] = [brats_id_head + idx, 0]
        img["id"] = brats_id_head + idx
        combined_images.append(img)
    for ant in data["annotations"]:
        tmp_dict[ant["image_id"]][1] += 1
    for ant in data["annotations"]:
        ant["id"] = int(tmp_dict[ant["image_id"]][1]) 
        tmp_dict[ant["image_id"]][1] -= 1
        ant["image_id"] = tmp_dict[ant["image_id"]][0]
        ant["id"] += int(ant["image_id"]) * 10000
        ant["category_id"] = brats_id
        combined_annotations.append(ant)
    data["categories"][0]["id"] = brats_id
    combined_categories.extend(data["categories"]) 
    print("Brain tumor:")
    print(f"#Images: {len(data['images'])}, #bboxs: {len(data['annotations'])}")

with open(flare_json, 'r') as file:
    data = json.load(file)
    tmp_dict = {}
    for idx in range(1, len(data["images"])+1):
        img = data["images"][idx-1]
        tmp_dict[img["id"]] = [flare_id_head + idx, 0]
        img["id"] = flare_id_head + idx
        img["file_name"] = "FLARE_" + img["file_name"]
        combined_images.append(img)
    for ant in data["annotations"]:
        tmp_dict[ant["image_id"]][1] += 1
    for ant in data["annotations"]:
        ant["id"] = int(tmp_dict[ant["image_id"]][1]) 
        tmp_dict[ant["image_id"]][1] -= 1
        ant["image_id"] = tmp_dict[ant["image_id"]][0]
        ant["id"] += int(ant["image_id"]) * 10000
        ant["category_id"] += 3
        combined_annotations.append(ant)
    for cat in data["categories"]:
        cat["id"] += 3
    combined_categories.extend(data["categories"]) 
    print("FLARE:")
    print(f"#Images: {len(data['images'])}, #bboxs: {len(data['annotations'])}")

with open(word_json, 'r') as file:
    data = json.load(file)
    tmp_dict = {}
    for idx in range(1, len(data["images"])+1):
        img = data["images"][idx-1]
        tmp_dict[img["id"]] = [word_id_head + idx, 0]
        img["id"] = word_id_head + idx
        img["file_name"] = "WORD_" + img["file_name"]
        combined_images.append(img)
    for ant in data["annotations"]:
        tmp_dict[ant["image_id"]][1] += 1
    for ant in data["annotations"]:
        ant["id"] = int(tmp_dict[ant["image_id"]][1]) 
        tmp_dict[ant["image_id"]][1] -= 1
        ant["image_id"] = tmp_dict[ant["image_id"]][0]
        ant["id"] += int(ant["image_id"]) * 10000
        ant["category_id"] += 100
        combined_annotations.append(ant)
    for cat in data["categories"]:
        cat["id"] += 100
    combined_categories.extend(data["categories"]) 
    print("WORD:")
    print(f"#Images: {len(data['images'])}, #bboxs: {len(data['annotations'])}")


with open(xray_json, 'r') as file:
    data = json.load(file)
    tmp_dict = {}
    for idx in range(1, len(data["images"])+1):
        img = data["images"][idx-1]
        tmp_dict[img["id"]] = [xray_id_head + idx, 0]
        img["id"] = xray_id_head + idx
        combined_images.append(img)
    for ant in data["annotations"]:
        tmp_dict[ant["image_id"]][1] += 1
    for ant in data["annotations"]:
        ant["id"] = int(tmp_dict[ant["image_id"]][1]) 
        tmp_dict[ant["image_id"]][1] -= 1
        ant["image_id"] = tmp_dict[ant["image_id"]][0]
        ant["id"] += int(ant["image_id"]) * 10000
        ant["category_id"] += 200
        combined_annotations.append(ant)
    for cat in data["categories"]:
        cat["id"] += 200
    combined_categories.extend(data["categories"]) 
    print("XRAY:")
    print(f"#Images: {len(data['images'])}, #bboxs: {len(data['annotations'])}")

combined_images = sorted(combined_images, key=lambda x: x['id'])
combined_annotations = sorted(combined_annotations, key=lambda x: x['id'])
combined_categories = sorted(combined_categories, key=lambda x: x['id'])

json_info = {"info": None, "licenses": None, "images": combined_images, 
             "annotations": combined_annotations, "categories": combined_categories}


class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NumpyEncoder, self).default(obj)

# with open(output_json, 'w') as file:
#     json.dump(json_info, file, indent=4, cls=NumpyEncoder)


Cell:
#Images: 229, #bboxs: 5509
Chest:
#Images: 566, #bboxs: 1132
Brain tumor:
#Images: 14720, #bboxs: 12984
FLARE:
#Images: 4794, #bboxs: 26802
WORD:
#Images: 9309, #bboxs: 27073
XRAY:
#Images: 4394, #bboxs: 19253


In [7]:
unique_pairs = set()
unique_categories = []

for category in combined_categories:
    pair = (category['supercategory'], category['name'])
    if pair not in unique_pairs:
        unique_pairs.add(pair)
        unique_categories.append(copy.deepcopy(category))

for idx, category in enumerate(unique_categories, start=1):
    category['id'] = idx

flag = False
for i in range(len(unique_categories)-1):
    if unique_categories[i]["name"] == 'Adrenal':
        unique_categories.pop(i)
        flag = True
    if flag:
        unique_categories[i]["id"] -= 1

In [8]:
unique_categories[-20:]

[{'supercategory': 'Abdomen', 'id': 14, 'name': 'Duodenum'},
 {'supercategory': 'Abdomen', 'id': 15, 'name': 'Colon'},
 {'supercategory': 'Abdomen', 'id': 16, 'name': 'Intestine'},
 {'supercategory': 'Abdomen', 'id': 17, 'name': 'Rectum'},
 {'supercategory': 'Abdomen', 'id': 18, 'name': 'Bladder'},
 {'supercategory': 'Abdomen', 'id': 19, 'name': 'Head of femur'},
 {'supercategory': 'XRAY', 'id': 20, 'name': 'Aortic enlargement'},
 {'supercategory': 'XRAY', 'id': 21, 'name': 'Atelectasis'},
 {'supercategory': 'XRAY', 'id': 22, 'name': 'Calcification'},
 {'supercategory': 'XRAY', 'id': 23, 'name': 'Cardiomegaly'},
 {'supercategory': 'XRAY', 'id': 24, 'name': 'Consolidation'},
 {'supercategory': 'XRAY', 'id': 25, 'name': 'ILD'},
 {'supercategory': 'XRAY', 'id': 26, 'name': 'Infiltration'},
 {'supercategory': 'XRAY', 'id': 27, 'name': 'Lung Opacity'},
 {'supercategory': 'XRAY', 'id': 28, 'name': 'Nodule/Mass'},
 {'supercategory': 'XRAY', 'id': 29, 'name': 'Other lesion'},
 {'supercategory'

In [9]:
combined_categories[-20:]

[{'supercategory': 'Abdomen', 'id': 110, 'name': 'Colon'},
 {'supercategory': 'Abdomen', 'id': 111, 'name': 'Intestine'},
 {'supercategory': 'Abdomen', 'id': 112, 'name': 'Adrenal'},
 {'supercategory': 'Abdomen', 'id': 113, 'name': 'Rectum'},
 {'supercategory': 'Abdomen', 'id': 114, 'name': 'Bladder'},
 {'supercategory': 'Abdomen', 'id': 115, 'name': 'Head of femur'},
 {'supercategory': 'XRAY', 'id': 200, 'name': 'Aortic enlargement'},
 {'supercategory': 'XRAY', 'id': 201, 'name': 'Atelectasis'},
 {'supercategory': 'XRAY', 'id': 202, 'name': 'Calcification'},
 {'supercategory': 'XRAY', 'id': 203, 'name': 'Cardiomegaly'},
 {'supercategory': 'XRAY', 'id': 204, 'name': 'Consolidation'},
 {'supercategory': 'XRAY', 'id': 205, 'name': 'ILD'},
 {'supercategory': 'XRAY', 'id': 206, 'name': 'Infiltration'},
 {'supercategory': 'XRAY', 'id': 207, 'name': 'Lung Opacity'},
 {'supercategory': 'XRAY', 'id': 208, 'name': 'Nodule/Mass'},
 {'supercategory': 'XRAY', 'id': 209, 'name': 'Other lesion'},
 {

In [10]:
# unique_categories = [{'supercategory': 'cell', 'id': 1, 'name': 'cell'},
#                     {'supercategory': 'chest', 'id': 2, 'name': 'lung'},
#                     {'supercategory': 'Brain', 'id': 3, 'name': 'tumor'},
#                     {'supercategory': 'Abdomen', 'id': 4, 'name': 'Liver'},
#                     {'supercategory': 'Abdomen', 'id': 5, 'name': 'Kidney'},
#                     {'supercategory': 'Abdomen', 'id': 6, 'name': 'Spleen'},
#                     {'supercategory': 'Abdomen', 'id': 7, 'name': 'Pancreas'},
#                     {'supercategory': 'Abdomen', 'id': 8, 'name': 'Aorta'},
#                     {'supercategory': 'Abdomen', 'id': 9, 'name': 'Inferior Vena Cava(IVC)'},
#                     {'supercategory': 'Abdomen', 'id': 10, 'name': 'Adrenal Gland'},
#                     {'supercategory': 'Abdomen', 'id': 11, 'name': 'Gallbladder'},
#                     {'supercategory': 'Abdomen', 'id': 12, 'name': 'Esophagus'},
#                     {'supercategory': 'Abdomen', 'id': 13, 'name': 'Stomach'},
#                     {'supercategory': 'Abdomen', 'id': 14, 'name': 'Duodenum'},
#                     {'supercategory': 'Abdomen', 'id': 15, 'name': 'Colon'},
#                     {'supercategory': 'Abdomen', 'id': 16, 'name': 'Intestine'},
#                     {'supercategory': 'Abdomen', 'id': 17, 'name': 'Rectum'},
#                     {'supercategory': 'Abdomen', 'id': 18, 'name': 'Bladder'},
#                     {'supercategory': 'Abdomen', 'id': 19, 'name': 'Head of femur'},
                    # {'supercategory': 'XRAY', 'id': 20, 'name': 'Aortic enlargement'},
                    # {'supercategory': 'XRAY', 'id': 21, 'name': 'Atelectasis'},
                    # {'supercategory': 'XRAY', 'id': 22, 'name': 'Calcification'},
                    # {'supercategory': 'XRAY', 'id': 23, 'name': 'Cardiomegaly'},
                    # {'supercategory': 'XRAY', 'id': 24, 'name': 'Consolidation'},
                    # {'supercategory': 'XRAY', 'id': 25, 'name': 'ILD'},
                    # {'supercategory': 'XRAY', 'id': 26, 'name': 'Infiltration'},
                    # {'supercategory': 'XRAY', 'id': 27, 'name': 'Lung Opacity'},
                    # {'supercategory': 'XRAY', 'id': 28, 'name': 'Nodule/Mass'},
                    # {'supercategory': 'XRAY', 'id': 29, 'name': 'Other lesion'},
                    # {'supercategory': 'XRAY', 'id': 30, 'name': 'Pleural effusion'},
                    # {'supercategory': 'XRAY', 'id': 31, 'name': 'Pleural thickening'},
                    # {'supercategory': 'XRAY', 'id': 32, 'name': 'Pneumothorax'},
                    # {'supercategory': 'XRAY', 'id': 33, 'name': 'Pulmonary fibrosis'}]

# combined_categories = [{'supercategory': 'cell', 'id': 1, 'name': 'cell'},
#                         {'supercategory': 'chest', 'id': 2, 'name': 'lung'},
#                         {'supercategory': 'Brain', 'id': 3, 'name': 'tumor'},
#                         {'supercategory': 'Abdomen', 'id': 4, 'name': 'Liver'},
#                         {'supercategory': 'Abdomen', 'id': 5, 'name': 'Kidney'},
#                         {'supercategory': 'Abdomen', 'id': 6, 'name': 'Spleen'},
#                         {'supercategory': 'Abdomen', 'id': 7, 'name': 'Pancreas'},
#                         {'supercategory': 'Abdomen', 'id': 8, 'name': 'Aorta'},
#                         {'supercategory': 'Abdomen', 'id': 9, 'name': 'Inferior Vena Cava(IVC)'},
#                         {'supercategory': 'Abdomen', 'id': 10, 'name': 'Adrenal Gland'},
#                         {'supercategory': 'Abdomen', 'id': 12, 'name': 'Gallbladder'},
#                         {'supercategory': 'Abdomen', 'id': 13, 'name': 'Esophagus'},
#                         {'supercategory': 'Abdomen', 'id': 14, 'name': 'Stomach'},
#                         {'supercategory': 'Abdomen', 'id': 15, 'name': 'Duodenum'},
#                         {'supercategory': 'Abdomen', 'id': 101, 'name': 'Liver'},
#                         {'supercategory': 'Abdomen', 'id': 102, 'name': 'Spleen'},
#                         {'supercategory': 'Abdomen', 'id': 103, 'name': 'Kidney'},
#                         {'supercategory': 'Abdomen', 'id': 105, 'name': 'Stomach'},
#                         {'supercategory': 'Abdomen', 'id': 106, 'name': 'Gallbladder'},
#                         {'supercategory': 'Abdomen', 'id': 107, 'name': 'Esophagus'},
#                         {'supercategory': 'Abdomen', 'id': 108, 'name': 'Pancreas'},
#                         {'supercategory': 'Abdomen', 'id': 109, 'name': 'Duodenum'},
#                         {'supercategory': 'Abdomen', 'id': 110, 'name': 'Colon'},
#                         {'supercategory': 'Abdomen', 'id': 111, 'name': 'Intestine'},
#                         {'supercategory': 'Abdomen', 'id': 112, 'name': 'Adrenal'},
#                         {'supercategory': 'Abdomen', 'id': 113, 'name': 'Rectum'},
#                         {'supercategory': 'Abdomen', 'id': 114, 'name': 'Bladder'},
#                         {'supercategory': 'Abdomen', 'id': 115, 'name': 'Head of femur'}
                        # {'supercategory': 'XRAY', 'id': 200, 'name': 'Aortic enlargement'},
                        #  {'supercategory': 'XRAY', 'id': 201, 'name': 'Atelectasis'},
                        #  {'supercategory': 'XRAY', 'id': 202, 'name': 'Calcification'},
                        #  {'supercategory': 'XRAY', 'id': 203, 'name': 'Cardiomegaly'},
                        #  {'supercategory': 'XRAY', 'id': 204, 'name': 'Consolidation'},
                        #  {'supercategory': 'XRAY', 'id': 205, 'name': 'ILD'},
                        #  {'supercategory': 'XRAY', 'id': 206, 'name': 'Infiltration'},
                        #  {'supercategory': 'XRAY', 'id': 207, 'name': 'Lung Opacity'},
                        #  {'supercategory': 'XRAY', 'id': 208, 'name': 'Nodule/Mass'},
                        #  {'supercategory': 'XRAY', 'id': 209, 'name': 'Other lesion'},
                        #  {'supercategory': 'XRAY', 'id': 210, 'name': 'Pleural effusion'},
                        #  {'supercategory': 'XRAY', 'id': 211, 'name': 'Pleural thickening'},
                        #  {'supercategory': 'XRAY', 'id': 212, 'name': 'Pneumothorax'},
                        #  {'supercategory': 'XRAY', 'id': 213, 'name': 'Pulmonary fibrosis'}]

old_to_new_id_map = {1:1,2:2,3:3,4:4,5:5,6:6,7:7,8:8,9:9,10:10,12:11,13:12,14:13,15:14,
                     101:4,102:6,103:5,105:13,106:11,107:12,108:7,109:14,
                     110:15,111:16,112:10,113:17,114:18,115:19,
                     200:20,201:21,202:22,203:23,204:24,205:25,206:26,207:27,208:28,209:29,210:30,
                     211:31,212:32,213:33}

for annotation in combined_annotations:
    old_id = annotation['category_id']
    annotation['category_id'] = old_to_new_id_map[old_id]

combined_images = sorted(combined_images, key=lambda x: x['id'])
combined_annotations = sorted(combined_annotations, key=lambda x: x['id'])

json_info = {"info": None, "licenses": None, "images": combined_images, 
             "annotations": combined_annotations, "categories": unique_categories}


class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NumpyEncoder, self).default(obj)

with open(output_json, 'w') as file:
    json.dump(json_info, file, indent=4, cls=NumpyEncoder)

Some code for visualization and verification:

In [None]:
image_folder = r".\combined_data"   # put all your images together

with open(r'.\combined.json', 'r') as f:
    instances = json.load(f)

categories_dict = {}
for i in instances["categories"]:
    categories_dict[i["id"]] = i["name"]

good = False
while True:
    image_file = random.choice(os.listdir(image_folder))
    # image_id = os.path.splitext(image_file)[0]  # Extract the image ID from the file name
    # print(image_id)
    # print(image_file)
    found = False
    for img in instances["images"]:
        if img["file_name"] == image_file:
            image_id = img["id"]
            found = True
            break
    if found:
        # print(image_id)
        # Find all bounding boxes that match the image ID
        matching_annotations = [annotation for annotation in instances["annotations"] if annotation["image_id"] == image_id]
        for ann in matching_annotations:
            if ann["category_id"] == 19:
                good = True

        # print(matching_annotations)
        image = Image.open(os.path.join(image_folder, image_file))
        if good:
            fig, ax = plt.subplots(1, figsize=(8,8))
            ax.imshow(image)
            for annotation in matching_annotations:
                bbox = annotation["bbox"]
                rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], linewidth=1, edgecolor='r', facecolor='none')
                ax.add_patch(rect)

                category_id = annotation["category_id"]
                category_info = categories_dict[category_id]
                plt.text(bbox[0], bbox[1], category_info, color='r')
            plt.title(image_file)
            plt.show()
            break
    else:
        pass
        # print("Not found")