In [None]:
# !git clone https://github.com/EXL-Health-AI-Research/MEDIQA-OE-2025.git

!pip install loguru



Collecting loguru
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Downloading loguru-0.7.3-py3-none-any.whl (61 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.6/61.6 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: loguru
Successfully installed loguru-0.7.3


In [None]:
import os
import json
from pprint import pprint
from IPython.display import Markdown
import numpy as np
import pandas as pd
import torch
from datasets import Dataset, load_dataset
from typing import Optional, Union, Dict, Iterator, List, Literal
from abc import ABC, abstractmethod
import requests
from loguru import logger
from openai import OpenAI
from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer

In [None]:
# class MedicalOrderDataLoader:
#     def __init__(self, trs_json_path: str):
#         if not os.path.exists(trs_json_path):
#             raise FileNotFoundError(f"Dataset file not found: {trs_json_path}")

#         self._validate_json_structure(trs_json_path)

#         # Load the dataset from the local JSON file
#         dataset_dict = load_dataset("json", data_files=trs_json_path)

#         # Access the splits from the loaded dataset dictionary
#         self.ds = dataset_dict["train"]
#         self.ds_val = dataset_dict["dev"]


#     def _validate_json_structure(self, trs_json_path: str) -> None:
#         with open(trs_json_path, "r") as f:
#             data = json.load(f)

#         if not isinstance(data, dict):
#             raise ValueError("JSON root should be a dictionary")

#         required_fields = ['train', 'dev']
#         for field in required_fields:
#             if field not in data:
#                 raise ValueError(f"Missing required field: {field}")

#         for split_name, split_data in data.items():
#             if not isinstance(split_data, list):
#                 raise ValueError(f"Split '{split_name}' should be a list, got {type(split_data)}")

#     def get_pandas(self) -> Union[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
#         if isinstance(self.ds, Dataset) and isinstance(self.ds_val, Dataset):
#             return self.ds.to_pandas(), self.ds_val.to_pandas() # type: ignore

In [None]:
class MedicalOrderDataLoader:
    def __init__(self, trs_json_path: str):
        if not os.path.exists(trs_json_path):
            raise FileNotFoundError(f"Dataset file not found: {trs_json_path}")

        with open(trs_json_path, "r") as f:
            data = json.load(f)

        if not isinstance(data, dict):
            raise ValueError("JSON root should be a dictionary")

        required_fields = ['train', 'dev']
        # required_fields = ['test']
        for field in required_fields:
            if field not in data:
                raise ValueError(f"Missing required field: {field}")
            if not isinstance(data[field], list):
                raise ValueError(f"Split '{field}' should be a list, got {type(data[field])}")

        # Create Dataset objects from the loaded lists
        self.ds = Dataset.from_list(data["train"])
        self.ds_val = Dataset.from_list(data["dev"])
        # self.ds = Dataset.from_list(data["test"])

    def get_pandas(self) -> Union[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
        if isinstance(self.ds, Dataset) and isinstance(self.ds_val, Dataset):
            return self.ds.to_pandas(), self.ds_val.to_pandas() # type: ignore
        return None, None # Return None if datasets are not loaded

In [None]:
class BaseOrderExtractionLM(ABC):
    @abstractmethod
    def infer(self, messages: List, max_new_tokens: int = 2048) -> str | None:
        pass

    def get_device_info(self) -> str:
        return "N/A"

    def token_count(self, messages: List) -> int:
        raise NotImplementedError("Token counting not supported for this backend.")

    def infer_stream(self, messages: List, max_new_tokens: int = 2048) -> Iterator[str]:
        raise NotImplementedError("Streaming not supported for this backend.")

In [None]:
class LocalOrderExtractorLM(BaseOrderExtractionLM):
    def __init__(self, model_name_or_path: str, device_map=None, load_processor=False):
        dtype = (
            torch.bfloat16
            if torch.cuda.get_device_capability()[0] >= 8
            else torch.float16
        )
        logger.info(f"Loading local model {model_name_or_path} with dtype {dtype}")

        self.model = AutoModelForCausalLM.from_pretrained(
            model_name_or_path, torch_dtype=dtype, device_map=device_map or "auto"
        )
        self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

        self.processor = None
        if load_processor:
            try:
                self.processor = AutoProcessor.from_pretrained(model_name_or_path)
            except Exception as e:
                logger.warning(f"Processor loading failed: {e}")

    def infer(self, messages: List, max_new_tokens: int = 2048) -> str:
        inputs = self.tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        ).to(self.model.device)

        input_len = inputs["input_ids"].shape[-1]

        with torch.inference_mode():
            output = self.model.generate(
                **inputs,
                do_sample=False,
                temperature=0.1,
                max_new_tokens=max_new_tokens,
            )
            generated = output[0][input_len:]

        return self.tokenizer.decode(generated, skip_special_tokens=True)

    def get_device_info(self):
        return str(self.model.device)

    def token_count(self, messages: List) -> int:
        input_ids = self.tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_tensors="pt",
        )["input_ids"]
        return input_ids.shape[-1]


class HostedOrderExtractionLM(BaseOrderExtractionLM):
    def __init__(self, model_name: str, api_base: str, api_key: str):
        self.model_name = model_name
        self.client = OpenAI(base_url=api_base, api_key=api_key)

    def infer(self, messages: List, max_new_tokens: int = 2048) -> str | None:
        response = self.client.chat.completions.create(
            model=self.model_name,
            messages=messages,
            temperature=0.1,
            max_tokens=max_new_tokens,
        )
        return response.choices[0].message.content

    def infer_stream(self, messages: List, max_new_tokens: int = 2048) -> Iterator[str]:
        stream = self.client.chat.completions.create(
            model=self.model_name,
            messages=messages,
            temperature=0.1,
            max_tokens=max_new_tokens,
            stream=True,
        )
        for chunk in stream:
            if chunk.choices[0].delta.content:
                yield chunk.choices[0].delta.content

    def get_device_info(self):
        return f"Remote: {self.client.base_url}"

    def token_count(self, messages: List) -> int:
        return self._tokenize(messages)

    def _tokenize(self, prompt: List) -> int:
        # Assuming vLLM
        url = f"{str(self.client.base_url).replace('/v1', '')}/tokenize"
        headers = {"Authorization": f"Bearer {self.client.api_key}"}
        data = {"model": self.model_name, "prompt": prompt}
        resp = requests.post(url, json=data, headers=headers)
        if resp.status_code != 200:
            raise RuntimeError(f"Tokenization failed: {resp.text}")

        return resp.json()["count"]


class OrderExtractionLM:
    def __init__(
        self,
        backend: Literal["local", "openai"],
        model_name_or_path: str,
        **kwargs,
    ):
        if backend == "local":
            self.impl = LocalOrderExtractorLM(model_name_or_path, **kwargs)
        elif backend == "openai":
            self.impl = HostedOrderExtractionLM(model_name_or_path, **kwargs)
        else:
            raise ValueError(f"Unsupported backend: {backend}")

    def infer(self, messages: List, max_new_tokens: int = 2048):
        return self.impl.infer(messages, max_new_tokens)

    def infer_stream(self, messages: List, max_new_tokens: int = 2048) -> Iterator[str]:
        return self.impl.infer_stream(messages, max_new_tokens)

    def get_device_info(self):
        return self.impl.get_device_info()

    def token_count(self, messages: List):
        return self.impl.token_count(messages)


In [None]:
data_loader = MedicalOrderDataLoader(trs_json_path="/content/orders_data_transcript.json")


ds,ds_val = data_loader.ds, data_loader.ds_val
# ds = data_loader.ds

In [None]:
ds_val

Dataset({
    features: ['id', 'expected_orders', 'transcript'],
    num_rows: 100
})

In [None]:
# OPENAI_API_BASE = "https://tasks-vessels-advantages-broadway.trycloudflare.com/v1" # Note the /v1 at the end
# OPENAI_API_KEY = "sk-IrR7Bwxtin0haWagUnPrSri5PurnUz86" # Make sure to replace with the right one
# OPENAI_API_BASE="https://ib69u5a95i9zvw-8000.proxy.runpod.net/v1"
# OPENAI_API_KEY="sk-c395fbc853ccdf090320d2a16b88e092"
# OPENAI_API_BASE = 'https://lb9bckyr171rj7-8000.proxy.runpod.net/v1'
OPENAI_API_BASE = 'https://doug-endorsed-priced-comparing.trycloudflare.com/v1'
OPENAI_API_KEY = 'sk-IrR7Bwxtin0haWagUnPrSri5PurnUz86'


In [None]:
lm = OrderExtractionLM(
    backend="openai",
    model_name_or_path="",
    api_base=OPENAI_API_BASE,
    api_key=OPENAI_API_KEY
)

In [None]:
print([model.id for model in lm.impl.client.models.list().data])

['google/medgemma-4b-it']


In [None]:
test_msg = [
    {
        "role": "system",
        "content": "You are a medical AI assistant how answers in one sentence.",
    },
    {
        "role": "user",
        "content": "Hi, what kind of assistant are you?",
    },
]

out = lm.infer(
    messages=test_msg
)

Markdown(out)

I am a medical AI assistant designed to provide information and support related to health and medicine.


# sample

In [None]:
# # Robust Medical Order Classification Prompt

#                         You are a medical expert classifying orders into types based on clinical intent and medical practice patterns.

#                         ## 📋 CLASSIFICATION CATEGORIES

#                         ### 1. *medication* - Drug-related orders
#                         - Prescriptions, dosages, pharmacy instructions
#                         - Starting, stopping, or changing medications
#                         - Injections, inhalers, topical treatments
#                         - Medication administration instructions

#                         ### 2. *lab* - Laboratory and diagnostic testing
#                         - Blood tests, urinalysis, cultures
#                         - Screening tests, panels, titers
#                         - Diagnostic examinations requiring sample collection
#                         - Point-of-care testing

#                         ### 3. *imaging* - Radiological and imaging studies
#                         - X-rays, CT scans, MRI, ultrasounds
#                         - Echocardiograms, mammograms
#                         - Any visual diagnostic study requiring imaging equipment
#                         - Radiological procedures

#                         ### 4. *followup* - Appointments and care continuity
#                         - Return visits, follow-up appointments
#                         - Scheduling future care
#                         - Care coordination instructions

#                         ## 🎯 CLASSIFICATION RULES

#                         - *Use clinical intent*, not just keywords
#                         - *Each order gets exactly one type*
#                         - *Maintain original description and reason text*
#                         - *Consider medical context and purpose*

#                         ## 📝 EXTENSIVE CLASSIFICATION EXAMPLES

#                         ### ✅ *MEDICATION* Examples:

#                         "lasix 40 milligrams a day"
#                         "stop advil"
#                         "prednisone forty milligrams one tablet a day five days"
#                         "increase the lisinopril 40 milligrams once a day"
#                         "albuterol inhaler two puffs every four hours as needed"
#                         "nitroglycerin pill underneath your tongue up to three every five minutes"
#                         "daily baby aspirin eighty one milligrams once a day"
#                         "antihistamine"
#                         "steroid creams"
#                         "insulin"
#                         "tramadol 50 milligrams every six hours as needed"
#                         "metformin 1000 mg twice a day"
#                         "ibuprofen 600 milligrams four times a day with food"
#                         "cortisone injection"
#                         "hold off norvasc"
#                         "antibiotics"
#                         "emollients three four times a day for the next couple of week"


#                         ### ✅ *LAB* Examples:

#                         "blood tests"
#                         "a1c"
#                         "lipid panel"
#                         "urinalysis urine"
#                         "urine culture"
#                         "complete metabolic panel"
#                         "psa blood"
#                         "blood cholesterol panel"
#                         "thyroid panel"
#                         "autoimmune panel"
#                         "pregnancy test"
#                         "covid test"
#                         "blood liver enzymes"
#                         "urine sample"
#                         "swab rapid strep test"
#                         "blood white blood cells two to three weeks"
#                         "western blot pcr"
#                         "cbc"
#                         "glucose test"
#                         "culture"


#                         ### ✅ *IMAGING* Examples:

#                         "chest x-ray
#                         "mri"
#                         "heart echocardiogram"
#                         "ultrasound"
#                         "mammogram"
#                         "cat scan of your chest chest"
#                         "lumbar spine mri"
#                         "echo in about two months"
#                         "pet ct ct"
#                         "imaging"
#                         "annual basis ultrasound"
#                         "echocardiogram"


#                         ### ✅ *FOLLOWUP* Examples:

#                         "come back couple days"
#                         "follow-up in a week's time"
#                         "next two weeks follow up"
#                         "arrange a review in a week"
#                         "one month follow-up"
#                         "make an appointment"
#                         "book another appointment, to see you"
#                         "follow-up twenty four hours post procedur"
#                         "schedule an appointment four weeks"
#                         "after you get the blood tests after that's all done come in to see me"
#                         "followup six to nine months"
#                         "follow-up"
#                         "following up"


#                         ## 🔍 CLASSIFICATION DECISION TREE

#                         *Step 1: Identify Primary Action*
#                         - Is it prescribing/administering a substance? → *medication*
#                         - Is it collecting/testing a sample? → *lab*
#                         - Is it creating visual/diagnostic images? → *imaging*
#                         - Is it scheduling future care? → *followup*

#                         *Step 2: Consider Clinical Context*
#                         - What is the healthcare provider's main intent?
#                         - What medical process is being initiated?
#                         - What outcome is expected?

#                         *Step 3: Apply Medical Logic*
#                         - Would this typically be handled by pharmacy? → *medication*
#                         - Would this require lab technician? → *lab*
#                         - Would this need imaging equipment? → *imaging*
#                         - Would this involve appointment scheduling? → *followup*

#                         ## ⚡ EDGE CASE GUIDELINES

#                         *Complex Orders:*
#                         - "mammogram in april of 2022 before you come back to see me" → *imaging* (primary action is imaging)
#                         - "blood tests after that's all done come in to see me" → *lab* (primary action is testing)
#                         - "after you get the blood tests after that's all done come in to see me or doctor ruth" → *followup* (primary action is appointment)

#                         *Medication vs Lab:*
#                         - "insulin" → *medication* (treatment)
#                         - "glucose test" → *lab* (testing)

#                         *Imaging vs Lab:*
#                         - "heart echocardiogram" → *imaging* (visual study)
#                         - "blood heart enzymes" → *lab* (blood test)

#                         ## 📤 OUTPUT FORMAT

#                         Return a single valid JSON array:

#                         json
#                         [
#                         {
#                             "order_type": "medication",
#                             "description": "original description text",
#                             "reason": "original reason text",
#                             "provenance": [turn_ids]
#                         },
#                         {
#                             "order_type": "lab",
#                             "description": "original description text",
#                             "reason": "original reason text",
#                             "provenance": [turn_ids]
#                         }
#                         ]


#                         ## 🎯 FINAL CLASSIFICATION CHECKLIST

#                         Before finalizing classification:
#                         - [ ] Does the order_type match the primary clinical action?
#                         - [ ] Would a healthcare professional agree with this classification?
#                         - [ ] Is the classification based on intent, not just keywords?
#                         - [ ] Are all original text fields preserved exactly?
#                         - [ ] Is the JSON format valid and complete?

#                         ## 📌 CRITICAL REMINDERS

#                         - *One type per order* - No exceptions
#                         - *Preserve original text* - Never modify descriptions or reasons
#                         - *Clinical logic first* - Keywords are secondary
#                         - *Valid JSON only* - Return properly formatted array
#                         - *Context matters* - Consider the medical workflow implications

#                         """

# prompt_2

In [None]:
# SYSTEM_PROMPT = """You are a medical AI assistant specialized in extracting medical orders from doctor-patient conversations.

# Your task is to identify and extract all medical orders explicitly mentioned by the doctor, including:
# 1. Medications (prescriptions, dosage changes)
# 2. Laboratory tests
# 3. Imaging studies
# 4. Follow-up

# For each order, extract the following:
# - order_type: "medication", "lab", "imaging", "followup"
# - description: Clear and specific, ideally under 20 words. If the description is long, split into multiple relevant orders.
# - reason: Medical condition or symptom being addressed. Keep this under 20 words as well.
# - provenance: Turn numbers where this order is mentioned

# Always split overly long orders into multiple records for clarity and precision.

# Return the results as a JSON list of objects."""


In [None]:
# INSTRUCTION_TEMPLATE = """Please extract all medical orders from the following doctor-patient conversation:

# CONVERSATION:
# {conversation}

# Extract all medical orders and return them as a JSON list with the following format:
# [
#   {{
#     "order_type": "medication|lab|imaging|followup|referral",
#     "description": "specific description of the order (max 20 words)",
#     "reason": "short medical condition or reason for the order (max 20 words)",
#     "provenance": [list of turn numbers where this order appears]
#   }}
# ]

# Split long or compound orders into multiple clear entries. Be precise with medical terminology and avoid redundancy.
# """


In [None]:
# SYSTEM_PROMPT = """You are a medical AI assistant specializing in extracting **explicit medical orders** from structured doctor-patient conversations.

# You will **reason step by step**, using the ReAct method:
# - First, read the conversation and identify any possible medical orders.
# - For each potential order, think aloud to explain:
#   - what the order is,
#   - what condition or symptom it is addressing,
#   - where in the transcript it was mentioned.
# - Then, act by returning the final output in strict JSON format, listing only clearly stated orders.

# A valid medical order must:
# - Come from the DOCTOR.
# - Be clearly and explicitly stated.
# - Fall into one of these types:
#   - "medication"
#   - "lab"
#   - "imaging"
#   - "followup"

# Each order should include:
# - "order_type": the category of order
# - "description": clear and specific text of the order
# - "reason": the medical reason or condition
# - "provenance": list of turn numbers where the order was mentioned
# """

# EXAMPLE_CONVERSATION = """
# Turn 126 - DOCTOR: so, for your first problem of your shortness of breath i think that you are in an acute heart failure exacerbation.
# Turn 127 - DOCTOR: i want to go ahead and, uh, put you on some lasix, 40 milligrams a day.
# Turn 138 - DOCTOR: for your second problem of your type i diabetes, um, let's go ahead... i wanna order a hemoglobin a1c for, um, uh, just in a, like a month or so.
# """

# EXAMPLE_THINK_ACT = """
# THOUGHT:
# - Turn 126 mentions "shortness of breath" and "acute heart failure".
# - Turn 127 includes an explicit medication order: "lasix, 40 milligrams a day".
# - Therefore, this is a medication order for heart failure.

# - Turn 138 mentions "type i diabetes" and ordering a "hemoglobin a1c".
# - This is a clear lab test order to manage diabetes.

# ACTION:
# [
#   {
#     "order_type": "medication",
#     "description": "lasix 40 milligrams a day",
#     "reason": "shortness of breath acute heart failure exacerbation",
#     "provenance": [126, 127]
#   },
#   {
#     "order_type": "lab",
#     "description": "hemoglobin a1c",
#     "reason": "type i diabetes",
#     "provenance": [138]
#   }
# ]
# """

# def build_prompt(conversation):
#     return f"""
# EXAMPLE CONVERSATION:
# {EXAMPLE_CONVERSATION}

# EXAMPLE THINKING AND EXTRACTION:
# {EXAMPLE_THINK_ACT}

# NOW APPLY THE SAME THINKING AND EXTRACTION TO THIS CONVERSATION:

# {conversation}

# Start with your THOUGHT, then give the ACTION as the final JSON output.
# """

# messages = [
#     {
#         "role": "system",
#         "content": SYSTEM_PROMPT,
#     },
#     {
#         "role": "user",
#         "content": build_prompt(conv),
#     }
# ]


# prompt 3

In [None]:
# 🔬 LAB EXAMPLES:
# - description: "hemoglobin A1C", reason: "type i diabetes"
# - description: "urinalysis", reason: "difficulty urinating"
# - description: "lipid panel", reason: "coronary artery disease"
# - description: "cbc with differential", reason: "hyperlipidemia"
# - description: "urine culture", reason: "possible urinary tract infection"
# - description: "blood PSA", reason: "prostate cancer screening"
# - description: "autoimmune panel", reason: "lupus flare concern"

# 💊 MEDICATION EXAMPLES:
# - description: "lasix 40 milligrams daily", reason: "shortness of breath from heart failure"
# - description: "metformin 500mg twice daily", reason: "type 2 diabetes"
# - description: "atorvastatin 20mg", reason: "high cholesterol"

# 🩻 IMAGING EXAMPLES:
# - description: "chest x-ray", reason: "evaluate fluid in lungs"
# - description: "CT abdomen", reason: "abdominal pain"
# - description: "MRI lumbar spine", reason: "back pain"

# 📅 FOLLOW-UP EXAMPLES:
# - description: "follow-up visit in 3 months", reason: "monitor blood pressure and cholesterol"
# - description: "cardiology referral", reason: "heart failure"
# - description: "endocrinology follow-up", reason: "diabetes management"

In [None]:
# SYSTEM_PROMPT = """
# You are a clinical AI assistant extracting structured medical orders from doctor-patient conversations.

# Your task is to extract all *explicit* medical orders suggested or prescribed by the doctor only. These include:
# 1. Medications
# 2. Laboratory tests
# 3. Imaging studies
# 4. Follow-up plans (including referrals)

# For each order, extract:
# - order_type: One of "medication", "lab", "imaging", or "followup"
# - description: Clear and concise medical order (max 20 words). Normalize terms (e.g., "a1c" → "hemoglobin A1C").
# - reason: Clinical reason or medical condition (max 20 words)
# - provenance: List of turn numbers where the order is mentioned

# Always split compound orders into multiple simple entries. Avoid vague entries like "labs" or "testing" unless context provides clarity.

# Use these examples as guidance:

# ## 📝 EXTENSIVE CLASSIFICATION EXAMPLES

#       ### ✅ *MEDICATION* Examples:

#       "lasix 40 milligrams a day"
#       "stop advil"
#       "prednisone forty milligrams one tablet a day five days"
#       "increase the lisinopril 40 milligrams once a day"
#       "albuterol inhaler two puffs every four hours as needed"
#       "nitroglycerin pill underneath your tongue up to three every five minutes"
#       "daily baby aspirin eighty one milligrams once a day"
#       "antihistamine"
#       "steroid creams"
#       "insulin"
#       "tramadol 50 milligrams every six hours as needed"
#       "metformin 1000 mg twice a day"
#       "ibuprofen 600 milligrams four times a day with food"
#       "cortisone injection"
#       "hold off norvasc"
#       "antibiotics"
#       "emollients three four times a day for the next couple of week"


#       ### ✅ *LAB* Examples:

#       "blood tests"
#       "a1c"
#       "lipid panel"
#       "urinalysis urine"
#       "urine culture"
#       "complete metabolic panel"
#       "psa blood"
#       "blood cholesterol panel"
#       "thyroid panel"
#       "autoimmune panel"
#       "pregnancy test"
#       "covid test"
#       "blood liver enzymes"
#       "urine sample"
#       "swab rapid strep test"
#       "blood white blood cells two to three weeks"
#       "western blot pcr"
#       "cbc"
#       "glucose test"
#       "culture"

#       ### ✅ *IMAGING* Examples:

#       "chest x-ray
#       "mri"
#       "heart echocardiogram"
#       "ultrasound"
#       "mammogram"
#       "cat scan of your chest chest"
#       "lumbar spine mri"
#       "echo in about two months"
#       "pet ct ct"
#       "imaging"
#       "annual basis ultrasound"
#       "echocardiogram"


#       ### ✅ *FOLLOWUP* Examples:

#       "come back couple days"
#       "follow-up in a week's time"
#       "next two weeks follow up"
#       "arrange a review in a week"
#       "one month follow-up"
#       "make an appointment"
#       "book another appointment, to see you"
#       "follow-up twenty four hours post procedur"
#       "schedule an appointment four weeks"
#       "after you get the blood tests after that's all done come in to see me"
#       "followup six to nine months"
#       "follow-up"
#       "following up"


# Instructions:
# - includes order which are suggested by prescribed by doctor
# - Ignore vague statements like “we’ll keep checking” unless a clear order is given
# - Normalize spelling variations (e.g., "a1c", "a1 c", "hemoglobin a1c" → "hemoglobin A1C")
# - Avoid repeating identical tests unless the timing or reason is different
# - Return only structured JSON data
# - Don't include past history of patient medication, labs, or imaging
# """


In [None]:
SYSTEM_PROMPT = """
You are a clinical AI assistant that extracts structured medical orders from doctor-patient conversations using ReAct (Reasoning and Acting) methodology.

TASK: Extract ALL explicit medical orders that the doctor suggests, prescribes, or recommends during the conversation.

CATEGORIES & EXAMPLES:
1. MEDICATION: "lasix 40 milligrams daily", "stop advil", "prednisone 40mg once daily 5 days", "increase lisinopril 40mg once daily", "albuterol inhaler 2 puffs q4h prn", "daily baby aspirin 81mg", "hold norvasc", "antibiotics", "insulin", "tramadol 50mg q6h prn"

2. LAB: "hemoglobin A1C", "lipid panel", "complete metabolic panel", "urinalysis", "urine culture", "PSA", "thyroid panel", "pregnancy test", "COVID test", "CBC", "glucose test", "blood culture", "Western blot PCR"

3. IMAGING: "chest x-ray", "MRI", "echocardiogram", "ultrasound", "mammogram", "CT scan chest", "lumbar spine MRI", "PET CT", "annual ultrasound"

4. FOLLOWUP: "follow-up in 1 week", "come back in 2 days", "schedule appointment in 4 weeks", "follow-up in 24 hours post-procedure", "follow-up 6-9 months", "make appointment", "arrange review in 1 week"


EXTRACTION RULES:
- Only extract orders given BY THE DOCTOR (not patient history or past medications)
- Include explicit orders with clear clinical intent
- Split compound orders into separate entries
- Use EXACT words from the transcript - do not rephrase or normalize
- Include orders even if conditional ("if symptoms persist, then...")
- Focus on actionable, specific instructions
- Limit provenance to maximum 5 turn numbers
- Keep description and reason under 20 words each using exact transcript words

AVOID:
- Vague statements without clear orders
- Patient's past medical history
- General discussions without specific instructions
- Repeated identical orders unless timing/reason differs

EXTRACTION RULES FOR DESCRIPTION AND REASON:
1. DESCRIPTIONS: Only clear, clinically actionable medical orders related to:
- Medications (e.g., "start metformin", "continue omeprazole 20 milligrams daily")
- Lab tests (e.g., "order a hemoglobin a1c", "check white blood cell count")
- Imaging studies (e.g., "schedule a chest x-ray", "get an MRI of the brain")
- Follow-ups or Referrals (e.g., "see endocrinologist", "come back in 2 weeks")
2. REASONS: Only clinically meaningful problems, diagnoses, or symptoms that clearly explain why the above order is needed (e.g., "for diabetes", "due to shortness of breath")

ReAct METHODOLOGY:
Use Chain of Thought (COT) reasoning with the following structure:
1. THOUGHT: Analyze the conversation to identify potential medical orders
2. ACTION: Extract specific orders with exact words from transcript
3. OBSERVATION: Verify extraction accuracy and completeness
4. FINAL OUTPUT: Return only the JSON array

OUTPUT FORMAT:
- order_type: "medication", "lab", "imaging", or "followup"
- description: Extract exact words from transcript, max 20 words
- reason: Only clinically meaningful problems, diagnoses, or symptoms that clearly explain why the above order is needed (e.g., "for diabetes", "due to shortness of breath"), max 20 words
- provenance: Turn numbers where order appears, max 5 entries
"""

INSTRUCTION_TEMPLATE = """
Extract all medical orders from this doctor-patient conversation using ReAct methodology:

CONVERSATION: {conversation}

Follow this ReAct process:

THOUGHT: Read through the conversation and identify all turns where the doctor gives explicit medical orders. Consider what type each order is (medication, lab, imaging, followup) and what the clinical reason is based on the conversation context.

ACTION: For each identified order, extract the exact words from the transcript for both description and reason. Ensure each order has:
- Correct order_type classification
- Description using exact transcript words (max 20 words)
- Reason using exact transcript words (max 20 words)
- Provenance list with relevant turn numbers (max 5)

OBSERVATION: Review the extracted orders to ensure:
- All explicit doctor orders are captured
- No patient history or past medications included
- Exact words used from transcript without paraphrasing
- Word limits respected (20 words max for description/reason)
- Provenance limited to 5 turn numbers maximum
- Compound orders are split into separate entries

FINAL OUTPUT: Return ONLY the JSON array with no other text:
[
  {{
    "order_type": "medication|lab|imaging|followup",
    "description": "exact words from transcript max 20 words",
    "reason": "exact words from transcript max 20 words",
    "provenance": [max 5 turn numbers]
  }}
]

CRITICAL: Output ONLY the JSON array after completing your reasoning. No explanations, no additional text, no formatting - just the JSON structure.
"""

def format_messages(conv):
    instruction = INSTRUCTION_TEMPLATE.format(conversation=conv)

    # ReAct example with Chain of Thought
    react_example = f"""
EXAMPLE CONVERSATION:
Turn 42 - DOCTOR: Your blood pressure is quite elevated today at 180/95.
Turn 43 - DOCTOR: I'm going to prescribe amlodipine 5mg once daily to help control this.
Turn 44 - DOCTOR: We should also check your kidney function with a creatinine level.
Turn 45 - DOCTOR: Please come back in 2 weeks so we can recheck your pressure.

EXPECTED ReAct PROCESS:

THOUGHT: I need to analyze this conversation for medical orders. I can see the doctor is addressing high blood pressure. In turn 43, there's a medication order for amlodipine. In turn 44, there's a lab order for creatinine. In turn 45, there's a follow-up appointment order. Each has a clear clinical reason mentioned in the conversation.

ACTION: Extracting orders with exact words:
1. Medication order: "amlodipine 5mg once daily" for "blood pressure is quite elevated" (turns 42, 43)
2. Lab order: "creatinine level" to "check your kidney function" (turn 44)
3. Follow-up order: "come back in 2 weeks" to "recheck your pressure" (turn 45)

OBSERVATION: I have identified 3 explicit medical orders from the doctor. All use exact words from the transcript. Word counts are within limits. Provenance is accurate and under 5 entries each. No patient history included.

FINAL OUTPUT:
[
  {{
    "order_type": "medication",
    "description": "amlodipine 5mg once daily",
    "reason": "blood pressure is quite elevated",
    "provenance": [42, 43]
  }},
  {{
    "order_type": "lab",
    "description": "creatinine level",
    "reason": "check your kidney function",
    "provenance": [44]
  }},
  {{
    "order_type": "followup",
    "description": "come back in 2 weeks",
    "reason": "recheck your pressure",
    "provenance": [45]
  }}
]

NOW EXTRACT FROM THIS CONVERSATION:
---
{instruction}
"""

    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT,
        },
        {
            "role": "user",
            "content": instruction,
        }
    ]

    return messages

# Validation function for ReAct output
def validate_react_output(response):
    """
    Validates that the ReAct response contains only JSON output
    """
    try:
        # Find JSON array in response
        import json
        import re

        # Look for JSON array pattern
        json_pattern = r'\[.*\]'
        matches = re.search(json_pattern, response, re.DOTALL)

        if matches:
            json_str = matches.group(0)
            parsed = json.loads(json_str)

            # Validate structure
            validated_orders = []
            for order in parsed:
                if all(key in order for key in ['order_type', 'description', 'reason', 'provenance']):
                    # Check constraints
                    if (order['order_type'] in ['medication', 'lab', 'imaging', 'followup'] and
                        len(order['description'].split()) <= 20 and
                        len(order['reason'].split()) <= 20 and
                        len(order['provenance']) <= 5):
                        validated_orders.append(order)

            return validated_orders

        return []
    except:
        return []

In [None]:
# SYSTEM_PROMPT = """
# You are a clinical AI assistant that extracts structured medical orders from doctor-patient conversations.

# TASK: Extract ALL explicit medical orders that the doctor suggests, prescribes, or recommends during the conversation.

# CATEGORIES & EXAMPLES:
# 1. MEDICATION: "lasix 40 milligrams daily", "stop advil", "prednisone 40mg once daily 5 days", "increase lisinopril 40mg once daily", "albuterol inhaler 2 puffs q4h prn", "daily baby aspirin 81mg", "hold norvasc", "antibiotics", "insulin", "tramadol 50mg q6h prn"

# 2. LAB: "hemoglobin A1C", "lipid panel", "complete metabolic panel", "urinalysis", "urine culture", "PSA", "thyroid panel", "pregnancy test", "COVID test", "CBC", "glucose test", "blood culture", "Western blot PCR"

# 3. IMAGING: "chest x-ray", "MRI", "echocardiogram", "ultrasound", "mammogram", "CT scan chest", "lumbar spine MRI", "PET CT", "annual ultrasound"

# 4. FOLLOWUP: "follow-up in 1 week", "come back in 2 days", "schedule appointment in 4 weeks", "follow-up in 24 hours post-procedure", "follow-up 6-9 months", "make appointment", "arrange review in 1 week"

# EXTRACTION RULES:
# - Only extract orders given BY THE DOCTOR (not patient history or past medications)
# - Include explicit orders with clear clinical intent
# - Split compound orders into separate entries
# - Normalize medical terminology and abbreviations
# - Include orders even if conditional ("if symptoms persist, then...")
# - Focus on actionable, specific instructions

# AVOID:
# - Vague statements without clear orders
# - Patient's past medical history
# - General discussions without specific instructions
# - Repeated identical orders unless timing/reason differs

# OUTPUT FORMAT:
# Return ONLY the JSON array structure. No other text, explanations, or formatting.
# - order_type: "medication", "lab", "imaging", or "followup"
# - description: Normalized, concise order (focus on key medical terms)
# - reason: Primary clinical indication or condition
# - provenance: Turn numbers where order appears
# """

# INSTRUCTION_TEMPLATE = """
# Extract all medical orders from this doctor-patient conversation:

# CONVERSATION: {conversation}

# Return ONLY a JSON array with this exact structure:
# [
#   {{
#     "order_type": "medication|lab|imaging|followup",
#     "description": "clear medical order with key terms",
#     "reason": "clinical indication or condition",
#     "provenance": [turn_numbers]
#   }}
# ]

# CRITICAL: Output ONLY the JSON array. No explanations, no additional text, no formatting - just the JSON structure.
# """

# def format_messages(conv):
#     instruction = INSTRUCTION_TEMPLATE.format(conversation=conv)

#     # Enhanced example that shows better provenance tracking and normalization
#     enhanced_example = f"""
# EXAMPLE CONVERSATION:
# Turn 126 - DOCTOR: For your shortness of breath, I think you're having an acute heart failure exacerbation.
# Turn 127 - DOCTOR: I want to start you on Lasix 40 milligrams daily for the fluid retention.
# Turn 138 - DOCTOR: For your diabetes, let's order an A1C.

# EXPECTED OUTPUT:
# [
#   {{
#     "order_type": "medication",
#     "description": "lasix 40 milligrams daily",
#     "reason": "acute heart failure exacerbation fluid retention",
#     "provenance": [126, 127]
#   }},
#   {{
#     "order_type": "lab",
#     "description": "hemoglobin A1C",
#     "reason": "diabetes",
#     "provenance": [138]
#   }}
# ]

# NOW EXTRACT FROM THIS CONVERSATION:
# ---
# {instruction}
# """

#     messages = [
#         {
#             "role": "system",
#             "content": SYSTEM_PROMPT,
#         },
#         {
#             "role": "user",
#             "content": instruction,
#         }
#     ]

#     return messages

# # Additional helper function for post-processing if needed
# def validate_extracted_orders(orders):
#     """
#     Optional validation function to ensure extracted orders meet quality standards
#     """
#     validated_orders = []

#     for order in orders:
#         # Skip if missing required fields
#         if not all(key in order for key in ['order_type', 'description', 'reason', 'provenance']):
#             continue

#         # Normalize order_type
#         if order['order_type'] not in ['medication', 'lab', 'imaging', 'followup']:
#             continue

#         # Ensure provenance is a list
#         if not isinstance(order['provenance'], list):
#             continue

#         # Basic length validation
#         if len(order['description']) > 100 or len(order['reason']) > 100:
#             continue

#         validated_orders.append(order)

#     return validated_orders

In [None]:
# INSTRUCTION_TEMPLATE = """Please extract all medical orders from the following doctor-patient conversation:

# CONVERSATION:
# {conversation}

# Extract all explicit medical orders and return them as a JSON list using this format:
# [
#   {{
#     "order_type": "medication|lab|imaging|followup",
#     "description": "clear and normalized order name (max 20 words)",
#     "reason": "brief clinical justification (max 20 words)",
#     "provenance": [list of turn numbers where this order appears]
#   }}
# ]

# Strictly follow these Rules:
# - Give the orders which are of suggestion or prescribed by the doctor
# - Don't want any orders which are past history medication, labs, or imaging

# Rules:
# - Split compound instructions into separate orders
# - Normalize medical names and abbreviations
# - Drop vague or unclear instructions
# - Keep both `description` and `reason` concise


# Output only valid structured orders — no extra text.
# """


# React prompt 2

In [None]:
# SYSTEM_PROMPT = """You are a medical AI assistant specialized in extracting medical orders from doctor-patient conversations using a ReAct (Reasoning and Acting) approach.

# You will analyze conversations systematically by:
# 1. REASONING about what you observe in each turn
# 2. ACTING to identify and extract medical orders
# 3. REFLECTING on your findings to ensure completeness

# Medical orders include:
# - Medications (prescriptions, dosage changes)
# - Laboratory tests
# - Imaging studies
# - Follow-up appointments

# Use the following format for your analysis:

# THOUGHT: [Your reasoning about what you're analyzing]
# ACTION: [What you're doing - scanning, identifying, extracting]
# OBSERVATION: [What you found in the current section]
# REFLECTION: [Whether you need to continue or if you found something important]

# Continue this process until you've analyzed the entire conversation, then provide your final JSON output."""

# INSTRUCTION_TEMPLATE = """Please extract all medical orders from the following doctor-patient conversation using the ReAct approach:

# CONVERSATION:
# {conversation}

# Follow this systematic process:

# 1. SCAN: Read through the conversation and identify all doctor turns
# 2. ANALYZE: For each doctor turn, reason about potential medical orders
# 3. EXTRACT: Identify specific orders with their details
# 4. VALIDATE: Ensure all orders are captured with correct provenance

# Use this format for your analysis:

# THOUGHT: [What am I looking for in this turn?]
# ACTION: [Scanning turn X for medical orders]
# OBSERVATION: [What medical orders or relevant information did I find?]
# REFLECTION: [Is this a complete order? Do I need more context?]

# After completing your analysis, provide the final JSON output:

# [
#   {{
#     "order_type": "medication|lab|imaging|followup",
#     "description": "specific description of the order",
#     "reason": "medical condition or reason for the order",
#     "provenance": [list of turn numbers where this order appears]
#   }}
# ]

# Remember to:
# - Focus only on explicit orders given by the doctor
# - Be precise with medical terminology
# - Track turn numbers accurately for provenance
# - Consider context from multiple turns for complete orders"""

# def format_messages(conv):
#     instruction = INSTRUCTION_TEMPLATE.format(conversation=conv)

#     example_section = """
# EXAMPLE ANALYSIS:

# CONVERSATION:
# Turn 126 - DOCTOR: so, for your first problem of your shortness of breath i think that you are in an acute heart failure exacerbation.
# Turn 127 - DOCTOR: i want to go ahead and, uh, put you on some lasix, 40 milligrams a day.
# Turn 138 - DOCTOR: for your second problem of your type i diabetes, um, let's go ahead... i wanna order a hemoglobin a1c for, um, uh, just in a month or so.

# EXPECTED REACT ANALYSIS:

# THOUGHT: I need to systematically analyze each doctor turn to identify medical orders.
# ACTION: Scanning turn 126 for medical orders
# OBSERVATION: Doctor is explaining diagnosis of acute heart failure exacerbation for shortness of breath. This is diagnostic reasoning, not an order yet.
# REFLECTION: This provides context but no direct order. Need to check next turns.

# THOUGHT: Continuing to look for orders related to the heart failure diagnosis.
# ACTION: Scanning turn 127 for medical orders
# OBSERVATION: Doctor explicitly states "i want to go ahead and, uh, put you on some lasix, 40 milligrams a day" - this is a clear medication order.
# REFLECTION: Found complete medication order with dosage. Need to link to reason from turn 126.

# THOUGHT: Moving to next section to look for additional orders.
# ACTION: Scanning turn 138 for medical orders
# OBSERVATION: Doctor says "i wanna order a hemoglobin a1c" - this is a laboratory test order for diabetes management.
# REFLECTION: Found complete lab order. Timeline mentioned is "in a month or so" but this is still an order.

# FINAL EXTRACTION:
# [
#   {{
#     "order_type": "medication",
#     "description": "lasix 40 milligrams a day",
#     "reason": "shortness of breath acute heart failure exacerbation",
#     "provenance": [126, 127]
#   }},
#   {{
#     "order_type": "lab",
#     "description": "hemoglobin a1c",
#     "reason": "type i diabetes",
#     "provenance": [138]
#   }}
# ]

# NOW ANALYZE THIS CONVERSATION:
# ---
# """

#     full_instruction = example_section + instruction

#     messages = [
#         {
#             "role": "system",
#             "content": SYSTEM_PROMPT,
#         },
#         {
#             "role": "user",
#             "content": full_instruction,
#         }
#     ]

#     return messages

# # Alternative more structured ReAct approach
# def format_messages_structured(conv):
#     """More structured ReAct approach with explicit action types"""

#     structured_instruction = f"""
# Using the ReAct framework, analyze this doctor-patient conversation step by step:

# CONVERSATION:
# {conv}

# ANALYSIS FRAMEWORK:
# Use these specific action types in your reasoning:

# ACTION_TYPES:
# - SCAN_TURN: Examine a specific conversation turn
# - IDENTIFY_ORDER: Recognize potential medical order language
# - EXTRACT_DETAILS: Pull out specific order information
# - LINK_CONTEXT: Connect orders to medical reasons from other turns
# - VALIDATE_ORDER: Confirm this is a complete, explicit order

# REASONING PROCESS:
# For each turn, follow this pattern:

# THOUGHT: [What am I analyzing and why?]
# ACTION: [SCAN_TURN/IDENTIFY_ORDER/EXTRACT_DETAILS/LINK_CONTEXT/VALIDATE_ORDER]
# OBSERVATION: [What did I find?]
# REFLECTION: [What does this mean for my extraction task?]

# Continue until you've processed all turns, then provide your JSON output.

# Focus on explicit doctor orders only. Be thorough but precise.
# """

#     messages = [
#         {
#             "role": "system",
#             "content": SYSTEM_PROMPT,
#         },
#         {
#             "role": "user",
#             "content": structured_instruction,
#         }
#     ]

#     return messages

# react prompt 2

In [None]:
# SYSTEM_PROMPT = """You are a medical AI assistant specialized in extracting medical orders from doctor-patient conversations.

# Use systematic ReAct reasoning internally but output ONLY the final JSON result.

# Medical orders include:
# - Medications (prescriptions, dosage changes, discontinuations)
# - Laboratory tests
# - Imaging studies
# - Follow-up appointments

# Key Analysis Principles:
# 1. Orders may span multiple turns - link context across turns
# 2. Medical reasons may be mentioned earlier in the conversation
# 3. Timing/scheduling details should be included in descriptions
# 4. Some orders may have null reasons
# 5. Look for explicit ordering language: "order", "get", "do", "check", "come back"

# Output ONLY valid JSON array. No reasoning shown."""

# INSTRUCTION_TEMPLATE = """Extract medical orders from this conversation. Use systematic analysis to:

# 1. IDENTIFY: Scan for ordering language and medical terms
# 2. CONTEXTUALIZE: Link orders to medical reasons from any turn
# 3. EXTRACT: Capture complete order details including timing
# 4. VALIDATE: Ensure provenance includes all relevant turns

# CONVERSATION:
# {conversation}

# Output format:
# [
#   {{
#     "order_type": "medication|lab|imaging|followup|referral",
#     "description": "complete description including timing/details",
#     "reason": "medical condition/reason OR null if none",
#     "provenance": [all relevant turn numbers]
#   }}
# ]

# Return [] if no orders found."""

# def format_messages(conv):
#     instruction = INSTRUCTION_TEMPLATE.format(conversation=conv)

#     examples = """
# EXAMPLE 1 - MULTI-TURN CONTEXT LINKING:

# CONVERSATION:
# Turn 8 - DOCTOR: she was febrile and had significantly elevated white blood cell count of 23,000.
# Turn 83 - DOCTOR: we could do some complete blood work including your white blood cells and a full panel of cholesterol, since it's been a while.
# Turn 86 - DOCTOR: if i put some labs in you could come in in two to three weeks?

# OUTPUT:
# [
#   {
#     "order_type": "lab",
#     "description": "blood white blood cells two to three weeks",
#     "reason": "significantly elevated white blood cell count of 23,000",
#     "provenance": [8, 83, 86]
#   },
#   {
#     "order_type": "lab",
#     "description": "blood panel of cholesterol two to three weeks",
#     "reason": null,
#     "provenance": [83, 86]
#   }
# ]

# EXAMPLE 2 - MEDICATION WITH TIMING:

# CONVERSATION:
# Turn 15 - DOCTOR: Your blood pressure is concerning.
# Turn 16 - DOCTOR: Let's start you on lisinopril 10mg daily.
# Turn 17 - DOCTOR: Take it in the morning with food.

# OUTPUT:
# [
#   {
#     "order_type": "medication",
#     "description": "lisinopril 10mg daily in the morning with food",
#     "reason": "concerning blood pressure",
#     "provenance": [15, 16, 17]
#   }
# ]

# EXAMPLE 3 - IMAGING WITH SCHEDULING:

# CONVERSATION:
# Turn 22 - DOCTOR: Given your chest pain history.
# Turn 23 - DOCTOR: I want to get a chest X-ray done.
# Turn 24 - DOCTOR: Can you do that this week?

# OUTPUT:
# [
#   {
#     "order_type": "imaging",
#     "description": "chest X-ray this week",
#     "reason": "chest pain history",
#     "provenance": [22, 23, 24]
#   }
# ]

# EXAMPLE 4 - FOLLOW-UP APPOINTMENT:

# CONVERSATION:
# Turn 30 - DOCTOR: We need to monitor your diabetes control.
# Turn 31 - DOCTOR: Come back in 3 months.
# Turn 32 - DOCTOR: We'll check your A1C then.

# OUTPUT:
# [
#   {
#     "order_type": "followup",
#     "description": "follow-up appointment in 3 months to check A1C",
#     "reason": "monitor diabetes control",
#     "provenance": [30, 31, 32]
#   }
# ]



# EXAMPLE 5 - MEDICATION CHANGES:

# CONVERSATION:
# Turn 60 - DOCTOR: Your current dose isn't controlling symptoms.
# Turn 61 - DOCTOR: Increase your metformin to 1000mg twice daily.
# Turn 62 - DOCTOR: Stop the glipizide completely.

# OUTPUT:
# [
#   {
#     "order_type": "medication",
#     "description": "increase metformin to 1000mg twice daily",
#     "reason": "current dose isn't controlling symptoms",
#     "provenance": [60, 61]
#   },
#   {
#     "order_type": "medication",
#     "description": "stop glipizide completely",
#     "reason": "current dose isn't controlling symptoms",
#     "provenance": [60, 62]
#   }
# ]

# NOW ANALYZE:
# ---
# """

#     full_instruction = examples + instruction

#     messages = [
#         {
#             "role": "system",
#             "content": SYSTEM_PROMPT,
#         },
#         {
#             "role": "user",
#             "content": full_instruction,
#         }
#     ]

#     return messages

# # Alternative function for complex cases
# def format_messages(conv):
#     """Enhanced version for complex multi-turn scenarios"""

#     instruction = """
# ADVANCED MEDICAL ORDER EXTRACTION

# Apply these ReAct principles internally:

# CONTEXT ANALYSIS:
# - Scan entire conversation for medical context before extracting orders
# - Medical reasons may appear many turns before the actual order
# - Orders may be confirmed/modified across multiple turns

# PATTERN RECOGNITION:

# MEDICATION ORDERS:
# - 'start', 'begin', 'initiate', 'prescribe', 'put on', 'add'
# - 'increase', 'decrease', 'adjust', 'titrate', 'reduce', 'up the dose'
# - 'switch to', 'change to', 'discontinue', 'stop', 'wean off'
# - 'twice daily', 'BID', 'TID', 'QID', 'PRN', 'as needed'
# - 'mg', 'units', 'tablets', 'capsules', 'ml', 'drops'
# - 'refill', 'continue', 'maintain current dose'

# IMAGING ORDERS:
# - 'get a', 'order', 'schedule', 'need', 'obtain'
# - 'CT', 'MRI', 'X-ray', 'ultrasound', 'PET scan', 'DEXA'
# - 'with contrast', 'without contrast', 'bilateral', 'unilateral'
# - 'chest', 'abdomen', 'pelvis', 'brain', 'spine', 'extremity'
# - 'follow up imaging', 'repeat in', 'serial imaging'

# LABORATORY ORDERS:
# - 'check', 'draw', 'get labs', 'blood work', 'urine', 'stool'
# - 'CBC', 'BMP', 'CMP', 'lipids', 'A1C', 'TSH', 'PSA'
# - 'fasting', 'random', 'trough level', 'peak level'
# - 'culture', 'sensitivity', 'biopsy', 'pathology'
# - 'in the morning', 'pre-op', 'post-op', 'baseline'

# FOLLOW-UP ORDERS:
# - 'see you', 'come back', 'return', 'follow up', 'recheck'
# - 'in X weeks', 'next month', '3 months', 'annually'
# - 'sooner if', 'PRN', 'as needed', 'if symptoms worsen'
# - 'with me', 'with cardiology', 'with specialist'
# - 'bring results', 'after labs', 'post-procedure'

# GENERAL ORDER INDICATORS:
# - 'could do', 'put in', 'order', 'get done', 'arrange'
# - 'let's', 'we should', 'I want', 'going to', 'plan to'
# - 'urgent', 'stat', 'emergent', 'routine', 'elective'
# - Timing phrases: 'today', 'tomorrow', 'this week', 'soon'

# EXTRACTION STRATEGY:
# - Include ALL turns that contribute to the complete order
# - Merge timing details into descriptions
# - Link distant context when medically relevant

# OUTPUT FORMAT REQUIREMENTS:
# - Use ONLY these 4 keys: order_type, description, reason, provenance
# - description: Complete order details including timing, dosage, specific procedures
# - reason: Medical justification (use null if routine/preventive)
# - provenance: Array of turn numbers that support this order
# - NO other fields allowed (no procedure_name, timing, details, etc.)




# Output only JSON array with exactly these 4 keys per order:
# [
#   {{
#     'order_type': 'string',
#     'description': 'complete order description with all details',
#     'reason': 'medical justification or null',
#     'provenance': [turn_numbers where order has been discussed]
#   }}
# ]
# CONVERSATION:
# """

#     messages = [
#         {
#             "role": "system",
#             "content": SYSTEM_PROMPT,
#         },
#         {
#             "role": "user",
#             "content": instruction+conv,
#         }
#     ]

#     return messages

# model

In [None]:
# SYSTEM_PROMPT = """You are a medical AI assistant specialized in extracting medical orders from doctor-patient conversations.

# Your task is to identify and extract all medical orders mentioned by the doctor, including:
# 1. Medications (prescriptions, dosage changes)
# 2. Laboratory tests
# 3. Imaging studies
# 4. Follow-up appointments

# For each order, extract:
# - order_type: "medication", "lab", "imaging", "followup"
# - description: Clear description of what is being ordered
# - reason: Medical condition or symptom being addressed
# - provenance: Turn numbers where this order is mentioned

# Return the results as a JSON list of objects."""


# INSTRUCTION_TEMPLATE = """Please extract all medical orders from the following doctor-patient conversation:

# CONVERSATION:
# {conversation}

# Extract all medical orders and return them as a JSON list with the following format:
# [
#   {{
#     "order_type": "medication|lab|imaging|followup|referral",
#     "description": "specific description of the order",
#     "reason": "medical condition or reason for the order",
#     "provenance": [list of turn numbers where this order appears]
#   }}
# ]

# Focus on explicit orders given by the doctor. Be precise with medical terminology."""

In [None]:
# SYSTEM_PROMPT = """
# You are a medical AI assistant specialized in extracting EXPLICIT medical orders from doctor-patient conversations.

# CRITICAL RULES:
# 1. Extract ONLY orders explicitly stated by the doctor
# 2. Do NOT infer or assume orders that aren't clearly mentioned
# 3. Provenance must be EXACT turn numbers where orders appear
# 4. Be balanced - i.e precision and recall on level terms
# 5. If the doctor orders multiple DISTINCT items (e.g., 'get a covid test and blood test'), create separate order objects for each item - never merge them into one combined description.

# Order Types:
# - medication: Prescriptions, dosage instructions, medication changes
# - lab: Blood tests, urine tests, specific diagnostic tests
# - imaging: X-rays, MRI, CT scans, ultrasounds
# - followup: Scheduled return visits, check-ups (these must be explicitly stated by the doctor)

# For each order extract:
# - order_type: One of the 4 types above
# - description: EXACT medical terminology used by doctor
# - reason: Specific condition/symptom mentioned by doctor
# - provenance: ONLY turn numbers where this exact order is mentioned"""


# INSTRUCTION_TEMPLATE = """Please extract all medical orders from the following doctor-patient conversation:

# CONVERSATION:
# {conversation}

# Extract all medical orders and return them as a JSON list with the following format:
# [
#   {{
#     "order_type": "medication|lab|imaging|followup|referral",
#     "description": "specific description of the order",
#     "reason": "medical condition or reason for the order",
#     "provenance": [list of turn numbers where this order appears]
#   }}
# ]

# Focus on explicit orders given by the doctor. Be precise with medical terminology."""

In [None]:
# def format_messages(conv):
#     instruction = INSTRUCTION_TEMPLATE.format(
#         conversation=conv,
#     )
#     instruction = f"""EXAMPLE CONVERSATION:
# Turn 126 - DOCTOR: so, for your first problem of your shortness of breath i think that you are in an acute heart failure exacerbation.
# Turn 127 - DOCTOR: i want to go ahead and, uh, put you on some lasix, 40 milligrams a day.
# Turn 138 - DOCTOR: for your second problem of your type i diabetes, um, let's go ahead... i wanna order a hemoglobin a1c for, um, uh, just in a, like a month or so.

# EXPECTED OUTPUT:
# [
#   {{
#     "order_type": "medication",
#     "description": "lasix 40 milligrams a day",
#     "reason": "shortness of breath acute heart failure exacerbation",
#     "provenance": [126, 127]
#   }},
#   {{
#     "order_type": "lab",
#     "description": "hemoglobin a1c",
#     "reason": "type i diabetes",
#     "provenance": [138]
#   }}
# ]

# NOW EXTRACT FROM THIS CONVERSATION:

# ---

# {instruction}
# """

#     messages = [
#         {
#             "role": "system",
#             "content": SYSTEM_PROMPT,
#         },
#         {
#             "role": "user",
#             "content": instruction,
#         }
#     ]

#     return messages

# React prompt final

In [None]:
SYSTEM_PROMPT = """
You are a clinical AI assistant that extracts structured medical orders from doctor-patient conversations using ReAct (Reasoning and Acting) methodology.

TASK: Extract ALL explicit medical orders that the doctor suggests, prescribes, or recommends during the conversation.

CATEGORIES & EXAMPLES:
1. MEDICATION: "lasix 40 milligrams daily", "stop advil", "prednisone 40mg once daily 5 days", "increase lisinopril 40mg once daily", "albuterol inhaler 2 puffs q4h prn", "daily baby aspirin 81mg", "hold norvasc", "antibiotics", "insulin", "tramadol 50mg q6h prn"

2. LAB: "hemoglobin A1C", "lipid panel", "complete metabolic panel", "urinalysis", "urine culture", "PSA", "thyroid panel", "pregnancy test", "COVID test", "CBC", "glucose test", "blood culture", "Western blot PCR"

3. IMAGING: "chest x-ray", "MRI", "echocardiogram", "ultrasound", "mammogram", "CT scan chest", "lumbar spine MRI", "PET CT", "annual ultrasound"

4. FOLLOWUP: "follow-up in 1 week", "come back in 2 days", "schedule appointment in 4 weeks", "follow-up in 24 hours post-procedure", "follow-up 6-9 months", "make appointment", "arrange review in 1 week"


EXTRACTION RULES:
- Only extract orders given BY THE DOCTOR (not patient history or past medications)
- Include explicit orders with clear clinical intent
- Split compound orders into separate entries
- Use EXACT words from the transcript - do not rephrase or normalize
- Include orders even if conditional ("if symptoms persist, then...")
- Focus on actionable, specific instructions
- Limit provenance to maximum 5 turn numbers
- Keep description and reason under 20 words each using exact transcript words

AVOID:
- Vague statements without clear orders
- Patient's past medical history
- General discussions without specific instructions
- Repeated identical orders unless timing/reason differs

EXTRACTION RULES FOR DESCRIPTION AND REASON:
1. DESCRIPTIONS: Only clear, clinically actionable medical orders related to:
- Medications (e.g., "start metformin", "continue omeprazole 20 milligrams daily")
- Lab tests (e.g., "order a hemoglobin a1c", "check white blood cell count")
- Imaging studies (e.g., "schedule a chest x-ray", "get an MRI of the brain")
- Follow-ups or Referrals (e.g., "see endocrinologist", "come back in 2 weeks")
2. REASONS: Only clinically meaningful problems, diagnoses, or symptoms that clearly explain why the above order is needed (e.g., "for diabetes", "due to shortness of breath")

ReAct METHODOLOGY:
Use Chain of Thought (COT) reasoning with the following structure:
1. THOUGHT: Analyze the conversation to identify potential medical orders
2. ACTION: Extract specific orders with exact words from transcript
3. OBSERVATION: Verify extraction accuracy and completeness
4. FINAL OUTPUT: Return only the JSON array

OUTPUT FORMAT:
- order_type: "medication", "lab", "imaging", or "followup"
- description: Extract exact words from transcript, max 20 words
- reason: Only clinically meaningful problems, diagnoses, or symptoms that clearly explain why the above order is needed (e.g., "for diabetes", "due to shortness of breath"), max 20 words
- provenance: Turn numbers where order appears, max 5 entries
"""

INSTRUCTION_TEMPLATE = """
Extract all medical orders from this doctor-patient conversation using ReAct methodology:

CONVERSATION: {conversation}

Follow this ReAct process:

THOUGHT: Read through the conversation and identify all turns where the doctor gives explicit medical orders. Consider what type each order is (medication, lab, imaging, followup) and what the clinical reason is based on the conversation context.

ACTION: For each identified order, extract the exact words from the transcript for both description and reason. Ensure each order has:
- Correct order_type classification
- Description using exact transcript words (max 20 words)
- Reason using exact transcript words (max 20 words)
- Provenance list with relevant turn numbers (max 5)

OBSERVATION: Review the extracted orders to ensure:
- All explicit doctor orders are captured
- No patient history or past medications included
- Exact words used from transcript without paraphrasing
- Word limits respected (20 words max for description/reason)
- Provenance limited to 5 turn numbers maximum
- Compound orders are split into separate entries

FINAL OUTPUT: Return ONLY the JSON array with no other text:
[
  {{
    "order_type": "medication|lab|imaging|followup",
    "description": "exact words from transcript max 20 words",
    "reason": "exact words from transcript max 20 words",
    "provenance": [max 5 turn numbers]
  }}
]

CRITICAL: Output ONLY the JSON array after completing your reasoning. No explanations, no additional text, no formatting - just the JSON structure.
"""

In [None]:
def format_messages(conv):
    instruction = INSTRUCTION_TEMPLATE.format(conversation=conv)

    # ReAct example with Chain of Thought
    react_example = f"""
EXAMPLE CONVERSATION:
Turn 42 - DOCTOR: Your blood pressure is quite elevated today at 180/95.
Turn 43 - DOCTOR: I'm going to prescribe amlodipine 5mg once daily to help control this.
Turn 44 - DOCTOR: We should also check your kidney function with a creatinine level.
Turn 45 - DOCTOR: Please come back in 2 weeks so we can recheck your pressure.

EXPECTED ReAct PROCESS:

THOUGHT: I need to analyze this conversation for medical orders. I can see the doctor is addressing high blood pressure. In turn 43, there's a medication order for amlodipine. In turn 44, there's a lab order for creatinine. In turn 45, there's a follow-up appointment order. Each has a clear clinical reason mentioned in the conversation.

ACTION: Extracting orders with exact words:
1. Medication order: "amlodipine 5mg once daily" for "blood pressure is quite elevated" (turns 42, 43)
2. Lab order: "creatinine level" to "check your kidney function" (turn 44)
3. Follow-up order: "come back in 2 weeks" to "recheck your pressure" (turn 45)

OBSERVATION: I have identified 3 explicit medical orders from the doctor. All use exact words from the transcript. Word counts are within limits. Provenance is accurate and under 5 entries each. No patient history included.

FINAL OUTPUT:
[
  {{
    "order_type": "medication",
    "description": "amlodipine 5mg once daily",
    "reason": "blood pressure is quite elevated",
    "provenance": [42, 43]
  }},
  {{
    "order_type": "lab",
    "description": "creatinine level",
    "reason": "check your kidney function",
    "provenance": [44]
  }},
  {{
    "order_type": "followup",
    "description": "come back in 2 weeks",
    "reason": "recheck your pressure",
    "provenance": [45]
  }}
]

NOW EXTRACT FROM THIS CONVERSATION:
---
{instruction}
"""

    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT,
        },
        {
            "role": "user",
            "content": instruction,
        }
    ]

    return messages

In [None]:
def _format_conv(turns, max_turns=-1, only_last_n=False):
    formatted = []

    if max_turns > 0:
        turns = turns[-max_turns:] if only_last_n else turns[:max_turns]

    for turn in turns:
        speaker = turn['speaker']
        text = turn['transcript']
        turn_id = turn['turn_id']
        formatted.append(f"Turn {turn_id} - {speaker}: {text}")

    return "\n".join(formatted)

In [None]:
# def format_messages(conv):
#     example_conversation = """Turn 126 - DOCTOR: so, for your first problem of your shortness of breath i think that you are in an acute heart failure exacerbation.
# Turn 127 - DOCTOR: i want to go ahead and, uh, put you on some lasix, 40 milligrams a day.
# Turn 138 - DOCTOR: for your second problem of your type i diabetes, um, let's go ahead... i wanna order a hemoglobin a1c for, um, uh, just in a, like a month or so."""

#     example_reasoning = """THOUGHT:
# - Turn 126 describes shortness of breath and acute heart failure.
# - Turn 127 includes an explicit medication order: "lasix, 40 milligrams a day".
# - So this is a medication order addressing acute heart failure.

# - Turn 138 references type I diabetes and ordering "hemoglobin a1c".
# - So this is a lab test order for diabetes monitoring.

# ACTION:
# [
#   {
#     "order_type": "medication",
#     "description": "lasix 40 milligrams a day",
#     "reason": "shortness of breath acute heart failure exacerbation",
#     "provenance": [126, 127]
#   },
#   {
#     "order_type": "lab",
#     "description": "hemoglobin a1c",
#     "reason": "type i diabetes",
#     "provenance": [138]
#   }
# ]"""

#     instruction = f"""
# EXAMPLE CONVERSATION:
# {example_conversation}

# EXAMPLE THINKING AND EXTRACTION:
# {example_reasoning}

# NOW APPLY THE SAME THINKING AND EXTRACTION TO THIS CONVERSATION:

# {conv}

# Start with your THOUGHT, then give the ACTION as the final JSON output.
# """

#     messages = [
#         {
#             "role": "system",
#             "content": SYSTEM_PROMPT,
#         },
#         {
#             "role": "user",
#             "content": instruction,
#         }
#     ]
#     return messages


In [None]:
sample_data = ds[4]

sample_conv = _format_conv(sample_data["transcript"])
prompt = format_messages(conv=sample_conv)

In [None]:
response = ""

for chunk in lm.infer_stream(prompt):
    response += chunk
    print(chunk, end="", flush=True)


```json
[
  {
    "order_type": "medication",
    "description": "meloxicam 15 mg once a day",
    "reason": "for back pain",
    "provenance": [
      "124",
      "125",
      "126",
      "127",
      "128"
    ]
  },
  {
    "order_type": "medication",
    "description": "increase metformin to 1000 mg twice a day",
    "reason": "for diabetes",
    "provenance": [
      "133",
      "134",
      "135",
      "136",
      "137"
    ]
  },
  {
    "order_type": "medication",
    "description": "order lisinopril 20 mg daily",
    "reason": "for hypertension",
    "provenance": [
      "140",
      "141",
      "142",
      "143",
      "147"
    ]
  },
  {
    "order_type": "followup",
    "description": "repeat a hemoglobin a1c in about 6 months",
    "reason": "for diabetes",
    "provenance": [
      "135",
      "136",
      "137",
      "138",
      "139"
    ]
  },
  {
    "order_type": "followup",
    "description": "refer you to physical therapy",
    "reason": "for back pain"

In [None]:
def infer_sample(sample, max_seqlen=8192):
    sample["pred"] = None
    if not sample["transcript"]:
        print(f"Transcript is None, skipping...")
        return sample

    sample_conv = _format_conv(sample["transcript"])
    prompt = format_messages(conv=sample_conv)

    token_count = lm.token_count(prompt[-1]['content'])
    if token_count > 0.9 * max_seqlen:
        print(f"Token length {token_count} exceeded max_seqlen {max_seqlen}, skipping...")
        return sample

    try:
        out = lm.infer(messages=prompt, max_new_tokens=2048)
        # print(out)
        sample["pred"] = out
    except Exception as e:
        print(f"Error in LLM call -> {e}")
        return sample


    return sample

In [None]:
ds_val = ds_val.map(infer_sample, num_proc=3)



Map (num_proc=3):   0%|          | 0/100 [00:00<?, ? examples/s]

Error in LLM call -> Error code: 400 - {'object': 'error', 'message': "This model's maximum context length is 8128 tokens. However, you requested 8153 tokens (6105 in the messages, 2048 in the completion). Please reduce the length of the messages or completion. None", 'type': 'BadRequestError', 'param': None, 'code': 400}
Error in LLM call -> Error code: 400 - {'object': 'error', 'message': "This model's maximum context length is 8128 tokens. However, you requested 8283 tokens (6235 in the messages, 2048 in the completion). Please reduce the length of the messages or completion. None", 'type': 'BadRequestError', 'param': None, 'code': 400}
Error in LLM call -> Error code: 400 - {'object': 'error', 'message': "This model's maximum context length is 8128 tokens. However, you requested 8636 tokens (6588 in the messages, 2048 in the completion). Please reduce the length of the messages or completion. None", 'type': 'BadRequestError', 'param': None, 'code': 400}


In [None]:
ds_val

Dataset({
    features: ['id', 'expected_orders', 'transcript', 'pred'],
    num_rows: 100
})

In [None]:
ds_val.filter(lambda x: x["transcript"] is None).num_rows, ds_val.filter(lambda x: x["pred"] is None).num_rows

Filter:   0%|          | 0/100 [00:00<?, ? examples/s]

Filter:   0%|          | 0/100 [00:00<?, ? examples/s]

(0, 3)

In [None]:
pred = ds_val.to_pandas()

In [52]:
df = pred[['id', 'pred']]

In [65]:
import pandas as pd
import re

# Assuming your DataFrame is named `df`
# and the column with the text is named 'notes_with_action'
count = 0
# Function to extract the ACTION JSON string using regex
def extract_action_json(text):
    # First try: triple-backtick JSON block
    if text:
      match_1 = re.search(r"\s*```json\s*(.*?)\s*```", text, re.DOTALL)
      if match_1:
        return match_1.group(1)
      # If not found, fall back to raw JSON block
      if not match_1:
          match_2 = re.search(r"\s*```json\s*(\[.*)",text, re.DOTALL)
      if match_2:

        return match_2.group(1)
      # print()
      return None
    return None

# (Optional) If you want to parse the JSON into Python objects (dicts/lists)
import json
import ast

def parse_json(json_str):
    try:
        return json.loads(json_str)
    except:
        return ast.literal_eval(json_str)

In [66]:
# Apply the function to create a new column
df["pred_json"] = df["pred"].apply(extract_action_json)

In [67]:
df["pred_json_parse"] = df["pred_json"].apply(parse_json)

SyntaxError: '{' was never closed (<unknown>, line 314)

In [58]:
df.iloc[0]

Unnamed: 0,0
id,acibench_D2N182_virtscribe_clef_taskC_test3
pred,"```json\n[\n {\n ""order_type"": ""medication..."
pred_json,"[\n {\n ""order_type"": ""medication"",\n ""..."
pred_json_parse,


In [39]:
 df.to_excel('pred_medgemma_4b_prompt_1.xlsx')

In [48]:
id_to_pred_dict = dict(zip(df['id'], df['pred_json_parse']))

In [None]:
# If pred_json_parse column has strings that are valid Python lists, you may want to parse them:
import ast

id_to_pred_dict = {
    row['id']: ast.literal_eval(row['pred_json_parse']) if isinstance(row['pred_json_parse'], str) else row['pred_json_parse']
    for _, row in df.iterrows()
}

In [49]:
with open('./pred_orders_react_4b_1.json','w')as f:
  json.dump(id_to_pred_dict,f,indent =2)