In [None]:
import os
import openai
import json
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
from retry import retry
import numpy as np
import random

system_message = (
        "You are ChatGPT, a large language model trained by OpenAI, based on the GPT-4 architecture."
        "You will be given a medical image and one or multiple bounding box coordinates, "
        "These coordinates are in the form of bounding boxes, represented as (x1, y1, x2, y2) with floating numbers ranging from 0 to 1, "
        "as they are the coordinates of upper left corner and lower right corner of the bounding box, normalized by the size of original image.\n"
        "To accomplish this, you will use a detection model named grounding dino that can detect organ or any disease region in a medical image.\n"
        "You will first question yourself from a THIRD perspective as a USER about what do you need to do to address this task.\n"
        "And for this questioning, write an answer that you think would be appropriate. Your response's format should be:\n"
        "'Question: your question from a third perspective and the question should not contain things like 'using the provided coordinates' because the detection process will only condition on prompts \nAnswer: <thoughts> your thoughts about the question and the answer, and your intended approach including the tool you choose \n"
        "<actions> [{'tool name': API name, 'tool params': some parameters}]\n <values> the final results'. \n"
        "When questioning, you should mimic the user be questioning you how to achieve the goal.\n"
        "The API name in this case should be \"grounding dino\" or None. And you need to explicitly include the brackets like <actions> and strictly follow the template format.\n"
        "You will use these bounding boxes to tell the user the result of your detection.\n"
        "Ask diverse questions and give corresponding answers. Only include questions that have definite answers. Do not ask any questions that cannot be answered confidently.\n"
        "Feel free to have more variance in expression when asking the question, elaborating your thoughts, and stating the values, so long as they make sense and meet the previous requirements."
    )

user_message_1_list = {
    "abdomen":(
    "An abdominal 2D CT scan showing the kidney and liver.\n\n"
    "kidney: [0.25, 0.55, 0.41, 0.68],\n"
    "kidney: [0.58, 0.54, 0.77, 0.68],\n"
    "liver: [0.48, 0.25, 0.55, 0.29]\n"
    ),
    "lung":"a 2D x-ray chest scan showing lungs\nlung: [0.1, 0.05, 0.47, 0.78]\nlung: [0.55, 0.05, 0.87, 0.8]\n",
    "tumor":"an mri brain image with tumor\ntumor: [0.35, 0.35, 0.52, 0.61]\n",
    "disease": "a 2D x-ray chest scan showing disease\ncardiomegaly: [0.39, 0.46, 0.79, 0.6]\n"
}
user_message_2 = (
        "An abdominal 2D CT scan showing spleen, kidney, stomach and liver.\n\n"
        "spleen: [0.61, 0.3, 0.74, 0.5]\n" 
        "kidney: [0.27, 0.54, 0.36, 0.63]\n" 
        "stomach: [0.42, 0.33, 0.62, 0.46]\n"
)
user_message_3 = (
        "A histology image containing many cells\n\n" 
        "cells:[0.56,0.57,0.75,0.78],[0.48,0.33,0.64,0.58],[0.32,0.0,0.45,0.12],[0.85,0.61,0.98,0.81],[0.31,0.31,0.49,0.53],[0.63,0.33,0.79,0.57],[0.3,0.11,0.41,0.26],[0.42,0.86,0.52,1.0],[0.0,0.19,0.06,0.39],[0.76,0.93,0.84,1.0],[0.85,0.38,0.94,0.5],[-0.0,0.0,0.05,0.1]"
    )

assist_abdomen_qlist = [
    "Can you locate the kidney and liver in this image and and segment them?",
    "Could you identify the positions of the kidney and liver and segment them in this image?",
    "Could you show where the kidney and liver are located in this image and segment them?",
    "Can you highlight the kidney and liver in this image and segment them?",
    "Identify the kidney and segment the liver in this image",
    "Detect the kidney and segment the liver in this image"
]

assist_tumor_qlist = [
    "Can you locate the tumor in this brain image and and segment it?",
    "Could you identify the position of brain tumor and segment it in this image?",
    "Could you show where the tumor is located and segment it in this image?",
    "Can you highlight the brain tumor and segment it in this image?",
    "Identify the tumor and segment it in this brain image"
]

assist_disease_qlist = [
    "Can you locate the region of cardiomegaly in this image and confirm its presence?",
    "Could you identify the position of cardiomegaly in this chest image?",
    "Could you show where the cardiomegaly is located in this image?",
    "Can you highlight the cardiomegaly region in this image?",
    "Can you identify cardiomegaly in this xray image?"
]

assist_lung_qlist = [
    "Can you locate and segment the lungs in this chest image?",
    "Could you identify the position of lungs and segment them in this xray image?",
    "Could you show where the lungs are located in this image and do a segmenattion?",
    "Can you highlight the lungs in this image and segment them?",
    "Can you identify the lungs in this xray image and segment them?"
]

assist_2_qlist = [
    "Can you identify the spleen, kidney, stomach and liver and segment them in this abdominal 2D CT scan image?",
    "Is it possible to distinguish the spleen, kidney, stomach, and liver in this abdominal 2D CT scan and segment all of them?",
    "Could you point out the spleen, kidney, stomach, and liver in this 2D CT scan of the abdomen and do a segmentation?",
    "Identify the spleen, kidney, stomach and liver and segment them in this abdominal 2D CT scan image"
]

assist_3_qlist = [
    "Can you detect the cells in this histology image, give me the total numbers and segment them? ",
    "Could you detect, count and segment all the cells present in this histology image?",
    "Is it feasible to enumerate the cells visible and segment them in this histology image?",
    "Can this histology image be processed to reveal the total number of cells and segment them?",
    "Could you examine this histology image, provide a count of the cells and segment them?",
]

assistant_message_1_templates = {
    'abdomen': (
        "Question:\n\n"
        "<q>\n\n"
        "Answer:\n\n"
        "<thoughts> It would be beneficial to utilize a detection model to identify the organ, kidney and liver and then use a medical segmentation tool to give a result, which will provide more accurate results.\n\n"
        "<actions> [{'API name': 'grounding dino + MedSAM', 'API params': {'image': '<imagepath>', 'prompts': ['kidney', 'kidney', 'liver']}}]\n\n"
        "<values>"
        "Based on the bounding box coordinates detected, the image contains two kidneys and liver. Here’s a summary of their positions:\n\n"
        "1. The first kidney is located towards the left of the image, with bounding box coordinates of [  0.25,    0.55,  0.41,    0.68  ], the second kidney is showed on the right of the image, with bounding box coordinates of  [  0.58,    0.54,    0.77,   0.68   ], they are symmetrical along the midline in the image.\n\n"
        "2. The liver is located in the middle upper part of the image with bounding box coordinates [0.48,   0.25,   0.55,   0.29] and is relatively small, indicating that this image is taken at the beginning or end of the liver.\n\n"
        "Based on the output of grounding dino, the segmentation result is showed:"
    ),
    'tumor': (
        "Question:\n\n"
        "<q>\n\n"
        "Answer:\n\n"
        "<thoughts> I will use grounding dino to detect the tumor and then use MedSAM to segment it.\n\n"
        "<actions> [{'API name': 'grounding dino + MedSAM', 'API params': {'image': '<imagepath>', 'prompts': ['tumor']}}]\n\n"
        "<values>"
        "The detected tumor in the MRI brain image is located with bounding box coordinates [0.35, 0.35, 0.52, 0.61]. This indicates that the tumor is situated in the central region of the brain, slightly towards the right and extending downwards. This information is vital for medical evaluation and treatment planning.\n"
        "And I have segmented the tumor as below:"
    ),
    'disease': (
        "Question:\n\n"
        "<q>\n\n"
        "Answer:\n\n"
        "<thoughts>  Using the grounding dino detection and then MedSAM model will help in identifying and segmenting the presence and region of cardiomegaly in the chest X-ray scan.\n\n"
        "<actions> [{'API name': 'grounding dino + MedSAM', 'API params': {'image': '<imagepath>', 'prompts': ['cardiomegaly']}}]\n\n"
        "<values>"
        "Based on the grounding dino model output, the presence of cardiomegaly is indicated in the chest X-ray. The region of cardiomegaly is located with bounding box coordinates [0.39, 0.46, 0.79, 0.6]. This suggests an enlargement of the heart, as evidenced by the abnormal size and position within the chest cavity.\n\n"
        "The segmentation result is as below:"
    ),
    'lung': (
        "Question:\n\n"
        "<q>\n\n"
        "Answer:\n\n"
        "<thoughts>  Use the grounding dino to ground the lung and then use MedSAM to segment it could help in a more accurate result.\n\n"
        "<actions> [{'API name': 'grounding dino + MedSAM', 'API params': {'image': '<imagepath>', 'prompts': ['lung', 'lung']}}]\n\n"
        "<values>"
        "Based on the bounding box coordinates detected, the image contains two lungs. Here’s a summary of their positions:\n\n1. The first lung is located on the left side of the image, with bounding box coordinates of [0.1, 0.05, 0.47, 0.78].\n\n2. The second lung is positioned on the right side of the image, with bounding box coordinates of [0.55, 0.05, 0.87, 0.8].\n\nThis identification provides a clear understanding of the lungs' presence and their precise locations in the chest x-ray scan.\n\n"
        "And the segmentation result is as below: which may help you in further medical analysis."
    )
}

assistant_message_2_template = (
        "Question:\n\n"
        "<q>\n\n"
        "Answer:\n\n"
        "<thoughts> Employing the grounding dino detection model will precisely identify the spleen, kidney, and liver ,and then I will use the segmentation model to segment the label given the boundingbox. This will help in giving an accurate summary of the organs present in the image.\n\n"
        "<actions> [{'API name': 'grounding dino + MedSAM', 'API params': {'image': '<imagepath>', 'prompts': ['spleen', 'kidney', 'stomach']}}]\n\n"
        "<values> "
        "Here’s a summary:\n\n"
        "1. The spleen is positioned towards the right middle of the image, with bounding box coordinates of [ 0.61, 0.3, 0.74, 0.5 ].\n\n"
        "2. There is one kidney located towards the left middle of the image with bounding box coordinates [0.27, 0.54, 0.36, 0.63]. This suggests that the subject might have only one kidney or there may be a disparity between the two kidneys.\n\n"
        "3. The stomach is present in the middle part of the image with bounding box coordinates [0.42, 0.33,  0.62, 0.46 ].\n\n"
        "However, there is no liver detected in this image. This identification is based on the provided coordinates and the detection outcome from the grounding dino model, which may help you do more medical analysis.\n\n"
        "Finally, the segmentation result is as below:"
)

assistant_message_3_template =(
        "Question:\n\n"
        "<q>\n\n"
        "Answer:\n\n"
        "<thoughts> Employing a detection model and then segmentation model will precisely identify and segment the cell. This will help in giving an accurate result.\n\n"
        "<actions> [{'API name': 'grounding dino + MedSAM', 'API params': {'image': '<imagepath>', 'prompts': ['cell']}}]\n\n"
        "<values>"
        "According to the output of grounding dino, there are 12 cells in this image.\n\n"
        "I've segment all of them:"
)

@retry(exceptions=openai.error.RateLimitError, tries=3, delay=2, backoff=2)
def call_gpt4o(system_message, user_message_1, assistant_message_1, user_message_2, assistant_message_2, user_message_3, assistant_message_3, user_message_final, temperature=0.95, max_tokens=2000, top_p=0.95, top_k=None):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message_1},
        {"role": "assistant", "content": assistant_message_1},
        {"role": "user", "content": user_message_2},
        {"role": "assistant", "content": assistant_message_2},
        {"role": "user", "content": user_message_3},
        {"role": "assistant", "content": assistant_message_3},
        {"role": "user", "content": user_message_final}
    ]
    request_params = {
        "model": "gpt-4o",
        "messages": messages,
        "temperature": temperature,
        "max_tokens": max_tokens,
        "top_p": top_p,
        "frequency_penalty": 0,
        "presence_penalty": 0,
    }
    if top_k is not None:
        request_params["top_k"] = top_k
    response = openai.ChatCompletion.create(**request_params)
    return response['choices'][0]['message']['content']

def process(item, case_counter, Stop_cnt):
    
    prompts = item['prompt']
    phrases = item['phrases']
    
    if item['boxes']:
        if "Kidney." in prompts:
            # return None
            if case_counter["abdomen"] >= Stop_cnt:
                return None
            cat = "abdomen"
            user_message_final = "a 2D abdominal CT image containing " + ", ".join(phrases[:-1]) + " and " + phrases[-1] + "\n"
            for i, phrase in enumerate(phrases):
                user_message_final += f"{phrase}: {item['boxes'][i]}\n"
            assistant_message_1 = assistant_message_1_templates["abdomen"].replace("<q>", random.choice(assist_abdomen_qlist))
            user_message_1 = user_message_1_list["abdomen"]
        elif "lung" in prompts:
            if case_counter["lung"] >= Stop_cnt:
                return None
            cat = "lung"
            user_message_final = "a 2D x-ray chest scan showing lungs\n"
            for i, phrase in enumerate(phrases):
                user_message_final += f"{phrase}: {item['boxes'][i]}\n"
            assistant_message_1 = assistant_message_1_templates["lung"].replace("<q>", random.choice(assist_lung_qlist))
            user_message_1 = user_message_1_list["lung"]
        elif "cell" in prompts:
            if case_counter["cell"] >= Stop_cnt:
                return None
            cat = "cell"
            user_message_final = "a histology image containing many cells\n"
            for i, phrase in enumerate(phrases):
                user_message_final += f"{phrase}: {item['boxes'][i]}\n"
            assistant_message_1 = assistant_message_1_templates["abdomen"].replace("<q>", random.choice(assist_abdomen_qlist))
            user_message_1 = user_message_1_list["abdomen"]
        elif "tumor" in prompts:
            # return None
            if case_counter["tumor"] >= Stop_cnt:
                return None
            cat = "tumor"
            user_message_final = "an mri brain image with tumor\n"
            for i, phrase in enumerate(phrases):
                user_message_final += f"{phrase}: {item['boxes'][i]}\n"
            assistant_message_1 = assistant_message_1_templates["tumor"].replace("<q>", random.choice(assist_tumor_qlist))
            user_message_1 = user_message_1_list["tumor"]      
        else:
            return None
        
        
        assistant_message_2 = assistant_message_2_template.replace("<q>", random.choice(assist_2_qlist))
        assistant_message_3 = assistant_message_3_template.replace("<q>", random.choice(assist_3_qlist))

        try:
            thinkings = call_gpt4o(system_message, user_message_1, assistant_message_1, user_message_2, assistant_message_2, user_message_3, assistant_message_3, user_message_final)
            case_counter[cat] += 1
            if sum(case_counter.values())%100 == 0:
                print(case_counter)
            return {
                    "data": item["image_path"],
                    "prompt": item["prompt"],
                    "boxes": item["boxes"],
                    "logits": item["logits"],
                    "phrases": item["phrases"],
                    "category": item['category'],
                    "user_message": user_message_final,
                    "response": thinkings
                    }
        except:
            return None
    else:
        return None

def gen_intruct(api_key):
    # replact with your api here
    openai.api_key = api_key
    with open('./dino_final_example.json', 'r', encoding='utf-8') as f:
        data = json.load(f)
    results = []
    output_json = './dino_instruct_0529_1.json'
    if os.path.exists(output_json):
        with open(output_json, 'r') as file:
            results = json.load(file)

    # start_index = len(results)
    # data_to_process = data[start_index:]
    data_to_process, already = [], []
    case_counter = {"abdomen": 0, "lung": 0, "cell": 0, "tumor": 0, "disease": 0}
    for r in results:
        already.append(r["data"])
        if r["category"] == "mri":
            case_counter["tumor"] += 1
        elif r["category"] == "word" or r["category"] == "flare":
            case_counter["abdomen"] += 1
        elif r["category"] == "xray":
            case_counter["lung"] += 1
        elif r["category"] == "cell":
            case_counter["cell"] += 1 
        elif r["category"] == "x_ray_disease":
            case_counter["disease"] += 1
    for d in data:
        if d["image_path"] not in already:
            data_to_process.append(d)
    print(len(data), len(data_to_process))
    print(case_counter)
    Stop_cnt = 750

    with ThreadPoolExecutor(max_workers=24) as executor:
        futures = [executor.submit(process, data_to_process[i], case_counter, Stop_cnt) for i in range(len(data_to_process))]
        # futures = [executor.submit(process, data_to_process[i], case_counter, Stop_cnt) for i in range(0,30000,1500)]
        for future in tqdm(as_completed(futures), total=len(futures), desc="Processed", unit="iter"):
            result = future.result()
            if result:
                results.append(result)
                with open(output_json, 'w', encoding='utf-8') as f:
                    json.dump(results, f, ensure_ascii=False, indent=4)
                with open('./dino_instruct_0529_1_backup.json', 'w', encoding='utf-8') as f:
                    json.dump(results, f, ensure_ascii=False, indent=4)

In [None]:
# replace with your own api key.
api_key = "..."
gen_intruct(api_key)