In [None]:
!nvidia-smi

In [None]:
!pip install colab-xterm

In [None]:
%load_ext colabxterm

In [None]:
%xterm

In [None]:
%xterm

In [None]:
!pip install langchain
!pip install langchain-core
!pip install langchain-community
!pip install langchain-ollama

In [None]:
import os
import json
import uuid
from typing import Union, List, Optional
from langchain_ollama import ChatOllama

gemma3 = ChatOllama(model="gemma3:4b",temperature=0.7)
gemma3_json = gemma3.bind(format="json")

resp = gemma3.invoke("說一個電腦科學家才懂的笑話")
print(resp.content)

In [None]:
with open("icd10cm_mapping.json", "r") as reader:
    ICD_MAPPING = json.load(reader)
print(ICD_MAPPING["I10"])

In [None]:
with open("random_icd10_collections.json", "r") as reader:
    icd_collections = json.load(reader)

# icd_collections = [["G52.3", "I10", "E11.9"]]

In [None]:
def generate_virtual_patient_single(diagnosis: str) -> Optional[dict]:
    """
    Generate a full virtual patient case from ICD codes in a single prompt.
    Retries up to 3 times if the generated JSON is malformed or missing keys.
    """
    json_form = """
{
  "基本背景": ___,
  "過去病史與危險因子": ___,
  "現病史與症狀": ___,
  "臨床檢查與檢驗": ___,
  "治療與病程": ___,
  "預後與後續計畫": ___
}
"""
    prompt = f"""
你是醫學專業文本生成器。會根據診斷內容，生成一份完整的台灣病患虛擬設定。
依照以下步驟生成一份完整的病歷內容：
1. 根據疾病生成病人背景，包括：年齡、性別、職業、家族史、社會背景。
2. 整理病人的既往病史、用藥史、生活習慣與危險因子。
3. 描述現病史與症狀，包括主訴、病程、臨床檢查與檢驗發現。
4. 說明治療過程、住院或門診經過、病情變化與追蹤。

最後輸出JSON格式：
{json_form}

診斷內容:
{diagnosis}
"""
    required_keys = ["基本背景", "過去病史與危險因子", "現病史與症狀", "臨床檢查與檢驗", "治療與病程", "預後與後續計畫"]
    for attempt in range(3):
        try:
            resp = gemma3_json.invoke(prompt)
            case_report = json.loads(resp.content)
            if all(key in case_report for key in required_keys):
                return case_report
            else:
                print(f"Attempt {attempt + 1} failed: Missing keys in JSON. Retrying...")
        except (json.JSONDecodeError, TypeError) as e:
            print(f"Attempt {attempt + 1} failed with error: {e}. Retrying...")

    print(f"Failed to generate valid patient data for diagnosis '{diagnosis}' after 3 attempts.")
    return None

In [None]:
if os.path.exists("data") is False:
    os.makedirs("data")

for icd_codes in icd_collections:
    diagnosis = ""
    for code in icd_codes:
        diagnosis += f"{ICD_MAPPING[code]} "
    print(diagnosis)

    case_report = generate_virtual_patient_single(diagnosis)

    if case_report:
        file_path = os.path.join("data", f"{uuid.uuid4()}.json")
        full_case = {"icd10": icd_codes, "diagnosis": diagnosis, "report": case_report}
        with open(file_path, "w") as writer:
            json.dump(full_case, writer, indent=4, ensure_ascii=False)
        print(file_path)
    else:
        print(f"Skipping diagnosis due to generation failure: {diagnosis}")
        continue

In [None]:
!ls data

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

PATIENT_TEMPLATE = """
您將扮演一個醫療案例中的病患。請您以第一人稱的視角回答問題，如同在描述自己的情況。
您的規則：
1. 僅限病歷內容：您說的每句話都必須嚴格來自下方病歷檔案。請勿杜撰超出書面內容的細節。
2. 明確問題原則：只有當醫師詢問具體問題時，您才回答。如果問題含糊或過於籠統（例如：談談您自己），請禮貌地拒絕，並請對方提出更具體的問題。
3. 禁止診斷或解讀：您不得提供診斷、解讀檢驗結果或給予建議。只能陳述您病歷上記載的經歷、症狀和病史。
4. 病患口吻：請始終使用第一人稱（例如：「我胸痛兩天了」、「我昨天吐了三次」）。請保持陳述的真實性，並與病歷檔案一致。避免使用非醫療專業人員不常使用的醫學術語。

病歷檔案:
{case_file}

醫師的詢問:
{query}
"""

def nested_dict_to_string(data_dict, indent=0):
    """
    將字典轉換為帶有縮排的字串。
    """
    lines = []
    indent_space = '  ' * indent
    for key, value in data_dict.items():
        if isinstance(value, dict):
            lines.append(f"{indent_space}{key}:")
            lines.append(nested_dict_to_string(value, indent + 1))
        else:
            lines.append(f"{indent_space}{key}: {value}")
    return '\n'.join(lines)

class Patient:
    """
    一個用於醫療診斷對話的模擬病患。
    """
    def __init__(self, case_file_path: str):
        """
        初始化代理。
        :param case_file_path: 案例檔案的路徑
        """
        if not os.path.exists(case_file_path):
            raise FileNotFoundError(f"找不到檔案: {case_file_path}")
        self.case_file_content = self._load_case_file(case_file_path)
        self.prompt_template = self._get_prompt_template()
        self.chain = self.prompt_template | gemma3 | StrOutputParser()

    def _load_case_file(self, file_path: str) -> str:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            case_data = data["report"]
            case_description = nested_dict_to_string(case_data)
            print(case_description)
        return case_description

    def _get_prompt_template(self) -> ChatPromptTemplate:
        template = PATIENT_TEMPLATE
        return ChatPromptTemplate.from_template(template)

    def handle_query(self, query: str) -> str:
        response = self.chain.invoke({
            "case_file": self.case_file_content,
            "query": query
        })
        return response

In [None]:
case_file = 'data/2a0e02ab-ee87-48f7-b510-2e404cb2e8a0.json'
agent = Patient(case_file_path=case_file)

try:
    print("請輸入您的詢問。輸入 'exit' 來結束。")

    while True:
        user_query = input("醫師: ")
        if user_query.lower() == 'exit':
            break
        response = agent.handle_query(user_query)
        print(f"病患: {response}")
except Exception as e:
    print(f"發生未預期的錯誤: {e}")

In [None]:
DOCTOR_TEMPLATE = """
您將扮演一個醫師。請持續詢問病患問題，收集資訊並進行鑑別診斷。
當資訊足夠做出最後診斷時，請向病患說明您的判斷並宣布治療計畫。
一次只能詢問一個問題，並且要向病患說明您詢問此問題的原因。

過去對話:
{dialogue}
"""

class Doctor:
    """
    一個用於醫療診斷對話的模擬醫師。
    """
    def __init__(self):
        """
        初始化代理。
        :param case_file_path: 案例檔案的路徑
        """
        self.prompt_template = self._get_prompt_template()
        self.chain = self.prompt_template | gemma3 | StrOutputParser()

    def _get_prompt_template(self) -> ChatPromptTemplate:
        template = DOCTOR_TEMPLATE
        return ChatPromptTemplate.from_template(template)

    def ask(self, dialogue: str) -> str:
        response = self.chain.invoke({
            "dialogue": dialogue
        })
        return response

In [None]:
dialogue = []
agent = Doctor()

try:
    print("請輸入您的詢問。輸入 'exit' 來結束。")

    while True:
        user_query = input("病人: ")
        if user_query.lower() == 'exit':
            break
        dialogue.append(user_query)
        response = agent.ask(dialogue)
        dialogue.append(response)
        print(f"醫師: {response}")
except Exception as e:
    print(f"發生未預期的錯誤: {e}")

In [None]:
import uuid

def run_simulation(case_file_path: str, output_dir: str):
    """
    執行一次醫生與病患的對話模擬。

    :param case_file_path: 病患病歷檔案的路徑。
    :param output_dir: 儲存對話紀錄的目錄。
    """
    if not os.path.exists(case_file_path):
        print(f"錯誤: 找不到病歷檔案 {case_file_path}")
        return

    doctor = Doctor()
    patient = Patient(case_file_path=case_file_path)

    dialogue = []
    turn_count = 0
    max_turns = 20

    print(f"--- 開始模擬對話 ---")

    # Start with the doctor's opening question
    doctor_response = "您好，請問有什麼可以協助您的嗎？"
    print(f"醫師: {doctor_response}")
    dialogue.append({"speaker": "Doctor", "text": doctor_response})

    while turn_count < max_turns:
        turn_count += 1
        print(f"--- 第 {turn_count} 輪 ---")

        # Patient responds
        patient_response = patient.handle_query(doctor_response)
        print(f"病患: {patient_response}")
        dialogue.append({"speaker": "Patient", "text": patient_response})

        # Doctor asks another question
        dialogue_history = "\n".join([f"{d['speaker']}: {d['text']}" for d in dialogue])
        doctor_response = doctor.ask(dialogue_history)
        print(f"醫師: {doctor_response}")
        dialogue.append({"speaker": "Doctor", "text": doctor_response})

        # Check for termination condition
        if "治療計畫" in doctor_response:
            print("--- 對話結束: 醫師提到治療計畫 ---")
            break

    if turn_count >= max_turns:
        print("--- 對話結束: 已達最大輪數 ---")

    # Save dialogue to a JSON file
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    file_name = f"simulation_{uuid.uuid4()}.json"
    output_path = os.path.join(output_dir, file_name)

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(dialogue, f, ensure_ascii=False, indent=4)

    print(f"對話已儲存至 {output_path}")

In [None]:
!mkdir simulations

In [None]:
output_directory = 'simulations'
run_simulation(case_file_path=case_file, output_dir=output_directory)