# Answering MIMIC questions

### Step 1: Import all required libraries

In [1]:
#############################################
### Step 1: Import all required libraries ###
#############################################

print("-== STEP 1: Importing all required libraries ==-")

# Data initial processing
import os
import re
import json
import time
import requests
import duckdb
import pandas as pd
from functools import lru_cache
from thefuzz import process as fuzzy_process

# Formatting fields / structures
from pydantic import BaseModel, Field
from typing import List, Dict, Any, TypedDict, Annotated, Sequence, Optional

# Agent/tool-related libraries
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from langchain_core.messages import BaseMessage, FunctionMessage, HumanMessage, ToolMessage, AIMessage
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import ToolNode
from transformers import pipeline


-== STEP 1: Importing all required libraries ==-


### Step 2: Loading Data from MIMIC III DuckDB Database

In [2]:
############################################################
### Step 2: Loading Data from MIMIC III DuckDB Database  ###
############################################################

print("-== STEP 2.1: Function for loading data into pandas DataFrame from DuckDB ==-")

DB_FILE = 'MIMICIII.duckdb'

def execute_sql(query: str, params: tuple = (), read_only: str = True) -> pd.DataFrame:
    with duckdb.connect(DB_FILE, read_only=read_only) as con:
        res = con.execute(query, params).df()
    return res

print("[DONE] DuckDB dataset loading function initialized.")
print("")


print("-== STEP 2.2: Helper function to get all disease names (with caching) ==-")
@lru_cache(maxsize=1)
def get_all_disease_long_titles() -> List[str]:
    """Fetches and caches the list of all unique disease long titles from the database."""
    query = "SELECT DISTINCT LONG_TITLE FROM D_ICD_DIAGNOSES WHERE LONG_TITLE IS NOT NULL;"
    result_df = execute_sql(query)
    if "error" in result_df.columns or result_df.empty:
        return []
    return result_df['LONG_TITLE'].tolist()

print("[DONE] Helper function initialized.")

-== STEP 2.1: Function for loading data into pandas DataFrame from DuckDB ==-
[DONE] DuckDB dataset loading function initialized.

-== STEP 2.2: Helper function to get all disease names (with caching) ==-
[DONE] Helper function initialized.


### Step 3: Creating Tools (based on MIMIC III database)

In [82]:
#################################################################
### Step 3: Creating Tools for manipulation of MIMIC III data ###
#################################################################

print("-== STEP 3: Creating Tools for manipulation of MIMIC III data ==-")

# Extra logging
verbose_logging = False


# Models:
class DiseaseInput(BaseModel):
    disease_name: str = Field(description="The common name of a disease, e.g., 'heart failure' or a typo like 'hart failure'.")
    
class ICDCodeInput(BaseModel):
    icd_codes: List[str] = Field(description="A list of ICD-9 codes to query for.")

class NoteInput(BaseModel):
    clinical_note: str = Field(description="A string containing free-text from a clinical note.")


### TOOL 1 - Getting ICD9 codes from list of disease names
@tool("get_icd_codes_for_disease", args_schema=DiseaseInput)
def get_icd_codes_for_disease(disease_name: str) -> List[str]:
    """
    Searches for and returns a list of relevant ICD-9 codes for a common disease name.
    This tool uses a hybrid approach: a fast fuzzy search to find likely candidates, followed by an LLM
    to select the best match. This makes it robust to typos and synonyms.
    Use this tool first to find the correct codes for a disease.
    """
    all_diseases = get_all_disease_long_titles()
    
    ### Fuzzy local search for top 50 related names
    candidates = fuzzy_process.extract(disease_name, all_diseases, limit=50)
    candidate_names = [name for name, score in candidates]

    if verbose_logging:
        print(f" -----> Looking for candidate names using fuzzy search.\n     ---> Candidate names found:\n{candidate_names}")
    
    ### Using local LLM Studio (or Lemonade Server) to get final match for disease with LLM help
    api_url = "http://localhost:8000/v1/chat/completions"
    headers = {'Content-Type': 'application/json'}
    disease_schema = {
        "name": "disease_response",
        "strict": True,
        "schema": {
            "type": "object",
            "properties": {
                "disease": {"type": "array"}
            },
            "required": ["disease"]
        }
    }
    
    prompt = f"""
    From the following list of candidate medical diagnoses:
    {json.dumps(candidate_names)}

    Select the single best match for the user's search term: "{disease_name}"

    Return list of best matching strings from the provided list, and nothing else. Do not add any extra commentary or analysis.
    """

    payload = {
        #"model": "deepseek/deepseek-r1-0528-qwen3-8b", 
        # "model": "qwen-3-14b-gemini-v0.1",
        # "model": "mistralai/devstral-small-2507",
        "model": "oh-dcft-v3.1-claude-3-5-haiku-20241022-qwen",
        "messages": [
            {"role": "system", "content": "You are an expert at finding the best match for a search term in a list of options. Your only task is to answer in JSON list of closest options from the provided list. Do not add any extra commentary or analysis."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.0,
        "response_format": {
            "type": "json_schema",
            "json_schema": disease_schema
        }
    }

    try:
        response = requests.post(api_url, headers=headers, data=json.dumps(payload))
        response.raise_for_status()
        result = response.json()
        if result.get('choices') and result['choices'][0].get('message'):
            json_str = result['choices'][0]['message']['content']
            parsed_json = json.loads(json_str)
            matched_diseases = ','.join([f"'{d.strip().replace('"', '')}'" for d in parsed_json.get("disease", [])])

            if verbose_logging:
                print(f" -----> Getting final matching for diagnos from LLM.\n     ---> Best matched names found:\n{matched_diseases}")
            
            query = f"SELECT DISTINCT ICD9_CODE FROM D_ICD_DIAGNOSES WHERE LONG_TITLE in ({matched_diseases});"
            result_df = execute_sql(query)
            res = sorted(set(result_df['ICD9_CODE'].tolist()))
            
            if verbose_logging:
                print(f" -----> The list of ICD9 codes found:\n{res}")

            return res    
    
    except Exception as e:
        print(f"Error in get_icd_codes_for_disease: {e}")
    
    return []


### TOOL 2 - Getting Patients count for ICD9 codes
@tool("get_patient_count", args_schema=ICDCodeInput)
def get_patient_count(icd_codes: List[str]) -> int:
    """Returns the total number of unique patients for a given list of ICD-9 codes."""
    if not icd_codes:
        return 0
    
    placeholders = ', '.join(['?'] * len(icd_codes))
    query = f"""
        SELECT 
            COUNT(DISTINCT SUBJECT_ID)
        FROM 
            DIAGNOSES_ICD
        WHERE 
            ICD9_CODE IN ({placeholders});
    """
    result_df = execute_sql(query, tuple(icd_codes))
    if result_df.empty:
        return 0
    
    return result_df.iloc[0, 0].item()


### TOOL 3 - Getting Average Length of Stay for ICD9 codes
@tool("get_average_length_of_stay", args_schema=ICDCodeInput)
def get_average_length_of_stay(icd_codes: List[str]) -> float:
    """Calculates the statistical mean (average) hospital length of stay (in days) for patients with any of the specified ICD-9 codes."""
    if not icd_codes:
        return 0.0
        
    placeholders = ', '.join(['?'] * len(icd_codes))
    query = f"""
        SELECT 
            AVG(a.DISCHTIME::DATE - a.ADMITTIME::DATE) as LOS
        FROM 
            ADMISSIONS a
            JOIN DIAGNOSES_ICD d ON a.HADM_ID = d.HADM_ID
        WHERE
            d.ICD9_CODE IN ({placeholders});
    """
    result_df = execute_sql(query, tuple(icd_codes))
    if result_df.empty or result_df.iloc[0, 0] is None:
        return 0.0
    
    return round(result_df.iloc[0, 0].item(), 2)


### TOOL 4 - Getting Median Length of Stay for ICD9 codes
@tool("get_median_length_of_stay", args_schema=ICDCodeInput)
def get_median_length_of_stay(icd_codes: List[str]) -> float:
    """Calculates the statistical median (the middle value) of length of stay in days for a given diagnosis."""
    if not icd_codes:
        return 0.0
    
    placeholders = ', '.join(['?'] * len(icd_codes))
    query = f"""
        SELECT 
            MEDIAN(a.DISCHTIME::DATE - a.ADMITTIME::DATE) as LOS
        FROM 
            ADMISSIONS a
            JOIN DIAGNOSES_ICD d ON a.HADM_ID = d.HADM_ID
        WHERE
            d.ICD9_CODE IN ({placeholders});
    """
    result_df = execute_sql(query, tuple(icd_codes))
    if result_df.empty or result_df.iloc[0, 0] is None:
        return 0.0
    
    return round(result_df.iloc[0, 0].item(), 2)


### TOOL 5 - Getting Mode Length of Stay for ICD9 codes
@tool("get_mode_length_of_stay", args_schema=ICDCodeInput)
def get_mode_length_of_stay(icd_codes: List[str]) -> float:
    """Calculates the statistical mode (most frequent value) of length of stay in days for a given diagnosis."""
    if not icd_codes:
        return 0.0
    
    placeholders = ', '.join(['?'] * len(icd_codes))
    query = f"""
        SELECT 
            MODE(a.DISCHTIME::DATE - a.ADMITTIME::DATE) as LOS
        FROM 
            ADMISSIONS a
            JOIN DIAGNOSES_ICD d ON a.HADM_ID = d.HADM_ID
        WHERE 
            d.ICD9_CODE IN ({placeholders});
    """
    result_df = execute_sql(query, tuple(icd_codes))
    if result_df.empty or result_df.iloc[0, 0] is None:
        return 0.0
    
    return round(result_df.iloc[0, 0].item(), 2)


### TOOL 6 - Getting In-hospital Mortality Rate for ICD9 codes
@tool("get_mortality_rate", args_schema=ICDCodeInput)
def get_mortality_rate(icd_codes: List[str]) -> str:
    """Calculates the in-hospital mortality rate (as a percentage) for patients with a specific diagnosis."""
    if not icd_codes:
        return "0.0%"
        
    placeholders = ', '.join(['?'] * len(icd_codes))
    query = f"""
        SELECT 
            CAST(SUM(CASE WHEN a.HOSPITAL_EXPIRE_FLAG = 1 THEN 1 ELSE 0 END) AS REAL) * 100 / COUNT(DISTINCT a.HADM_ID)
        FROM 
            ADMISSIONS a
            JOIN DIAGNOSES_ICD d ON a.HADM_ID = d.HADM_ID
        WHERE 
            d.ICD9_CODE IN ({placeholders});
    """
    result_df = execute_sql(query, tuple(icd_codes))
    if result_df.empty or result_df.iloc[0, 0] is None:
        return "0.0%"
    
    return f"{round(result_df.iloc[0, 0].item(), 2)}%"

    
### TOOL 6 - Getting Demographic Breakdown for ICD9 codes
@tool("get_demographic_breakdown", args_schema=ICDCodeInput)
def get_demographic_breakdown(icd_codes: List[str]) -> Dict[str, Any]:
    """Returns a breakdown of patients for a given diagnosis by gender, insurance type, and marital status."""
    if not icd_codes:
        return {}

    placeholders = ', '.join(['?'] * len(icd_codes))
    query = f"""
        SELECT 
            p.GENDER,
            a.INSURANCE,
            a.MARITAL_STATUS,
            COUNT(DISTINCT a.SUBJECT_ID) as patient_count
        FROM 
            ADMISSIONS a
            JOIN PATIENTS p ON a.SUBJECT_ID = p.SUBJECT_ID
        WHERE 
            a.HADM_ID IN (SELECT DISTINCT HADM_ID FROM DIAGNOSES_ICD WHERE ICD9_CODE IN ({placeholders}))
        GROUP BY
            1, 2, 3
        ORDER BY
            patient_count DESC;
    """
    result_df = execute_sql(query, tuple(icd_codes))
    return result_df.to_dict(orient='records')


### TOOL 7 - Getting Average Age of Patients for ICD9 codes
@tool("get_average_patient_age", args_schema=ICDCodeInput)
def get_average_patient_age(icd_codes: List[str]) -> float:
    """Calculates the average age of patients with a specific diagnosis at the time of their hospital admission."""
    if not icd_codes:
        return 0.0
    
    placeholders = ', '.join(['?'] * len(icd_codes))
    query = f"""
        SELECT
            AVG(DATE_DIFF('year', p.DOB, a.ADMITTIME))
        FROM
            PATIENTS p
            JOIN ADMISSIONS a ON p.SUBJECT_ID = a.SUBJECT_ID
            JOIN DIAGNOSES_ICD d ON a.HADM_ID = d.HADM_ID
        WHERE
            d.ICD9_CODE IN ({placeholders});
    """
    result_df = execute_sql(query, tuple(icd_codes))
    if result_df.empty or result_df.iloc[0, 0] is None:
        return 0.0
    
    return round(result_df.iloc[0, 0].item(), 1)


### TOOL 8 - Getting Age Distribution of Patients for ICD9 codes
@tool("get_age_distribution", args_schema=ICDCodeInput)
def get_age_distribution(icd_codes: List[str]) -> List[Dict[str, Any]]:
    """Provides a breakdown of patient counts by age group for a given diagnosis."""
    if not icd_codes:
        return []
    
    placeholders = ', '.join(['?'] * len(icd_codes))
    query = f"""
        WITH PATIENT_AGE AS (
            SELECT
                p.SUBJECT_ID,
                MIN(DATE_DIFF('year', p.DOB, a.ADMITTIME)) as Age
            FROM 
                PATIENTS p
                JOIN ADMISSIONS a ON p.SUBJECT_ID = a.SUBJECT_ID
            WHERE
                a.HADM_ID IN (SELECT DISTINCT HADM_ID FROM DIAGNOSES_ICD WHERE ICD9_CODE IN ({placeholders}))
            GROUP BY
                p.SUBJECT_ID
        )
        SELECT
            CASE
                WHEN Age <= 18 THEN '0-18 (Pediatric)'
                WHEN Age > 18 AND Age <= 40 THEN '19-40 (Young Adult)'
                WHEN Age > 40 AND Age <= 65 THEN '41-65 (Adult)'
                ELSE '65+ (Senior)'
            END AS age_group,
            COUNT(SUBJECT_ID) as patient_count
        FROM 
            PATIENT_AGE
        GROUP BY
            age_group
        ORDER BY
            age_group;
    """
    result_df = execute_sql(query, tuple(icd_codes))

    return result_df.to_dict(orient='records')


### TOOL 9 - Getting Admission Types Breakdown of Patients for ICD9 codes
@tool("get_admission_type_breakdown", args_schema=ICDCodeInput)
def get_admission_type_breakdown(icd_codes: List[str]) -> List[Dict[str, Any]]:
    """Gets a breakdown of admission types (e.g., EMERGENCY, ELECTIVE) for a given diagnosis."""
    if not icd_codes:
        return []
    
    placeholders = ', '.join(['?'] * len(icd_codes))
    query = f"""
        SELECT
            a.ADMISSION_TYPE,
            COUNT(DISTINCT a.HADM_ID) as number_of_admissions
        FROM 
            ADMISSIONS a
        WHERE 
            a.HADM_ID IN (SELECT DISTINCT HADM_ID FROM DIAGNOSES_ICD WHERE ICD9_CODE IN ({placeholders}))
        GROUP BY 
            a.ADMISSION_TYPE
        ORDER BY 
            number_of_admissions DESC;
    """
    result_df = execute_sql(query, tuple(icd_codes))
    
    return result_df.to_dict(orient='records')


### TOOL 10 - Getting percentage of 30-day readmission of Patients for ICD9 codes
@tool("get_30_day_readmission_rate", args_schema=ICDCodeInput)
def get_30_day_readmission_rate(icd_codes: List[str]) -> str:
    """Calculates the 30-day hospital readmission rate for patients with a specific diagnosis."""
    if not icd_codes:
        return "0.0%"
    
    placeholders = ', '.join(['?'] * len(icd_codes))
    query = f"""
        WITH PATIENT_ADMISSION AS (
            SELECT
                a.SUBJECT_ID,
                a.ADMITTIME,
                a.DISCHTIME,
                LEAD(a.ADMITTIME, 1) OVER(PARTITION BY a.SUBJECT_ID ORDER BY a.ADMITTIME) as next_admittime
            FROM 
                ADMISSIONS a
            WHERE 
                a.SUBJECT_ID IN (SELECT DISTINCT d.SUBJECT_ID FROM DIAGNOSES_ICD d WHERE d.ICD9_CODE IN ({placeholders}))
        )
        SELECT
            CAST(COUNT(CASE WHEN DATE_DIFF('day', DISCHTIME, next_admittime) <= 30 THEN 1 END) AS REAL) * 100 /
            CAST(COUNT(next_admittime) AS REAL) as readmission_rate
        FROM
            PATIENT_ADMISSION;
    """
    result_df = execute_sql(query, tuple(icd_codes))
    if result_df.empty or result_df.iloc[0, 0] is None:
        return "0.0%"
    
    return f"{round(result_df.iloc[0, 0].item(), 2)}%"


### TOOL 11 - Getting 10 most common co-occurring diagnoses of Patients for ICD9 codes
@tool("get_top_comorbidities", args_schema=ICDCodeInput)
def get_top_comorbidities(icd_codes: List[str]) -> List[Dict[str, Any]]:
    """Finds the top 10 most common co-occurring diagnoses (comorbidities) for a given initial diagnosis."""
    if not icd_codes:
        return []
    
    placeholders = ', '.join(['?'] * len(icd_codes))
    query = f"""
        WITH TARGET_ADMISSION AS (
            SELECT DISTINCT
                HADM_ID
            FROM
                DIAGNOSES_ICD
            WHERE
                ICD9_CODE IN ({placeholders})
        )
        SELECT
            d_diag.LONG_TITLE,
            COUNT(DISTINCT d_icd.SUBJECT_ID) as patient_count
        FROM 
            DIAGNOSES_ICD d_icd
            JOIN D_ICD_DIAGNOSES d_diag ON d_icd.ICD9_CODE = d_diag.ICD9_CODE
        WHERE
            d_icd.HADM_ID IN (SELECT HADM_ID FROM TARGET_ADMISSION)
            AND d_icd.ICD9_CODE NOT IN ({placeholders})
        GROUP BY
            d_diag.LONG_TITLE
        ORDER BY
            patient_count DESC
        LIMIT 10;
    """
    result_df = execute_sql(query, tuple(icd_codes) + tuple(icd_codes))

    return result_df.to_dict(orient='records')


### TOOL 12 - Getting List of diagnoses from clinical notes
@tool("extract_primary_diagnoses_from_text", args_schema=NoteInput)
def extract_primary_diagnoses_from_text(clinical_note: str) -> List[str]:
    """
    Scans a clinical note for disease and disorder mentions using a BioClinical-BERT NER model.
    Use this tool first when you need to find primary diagnoses mentioned in unstructured text.
    Returns a list of found disease names.
    """

    if verbose_logging:
        print("-== EXTRACTING DIAGNOSES FROM NOTE ==-\n")
    
    try:
        cleaned_note = clinical_note.strip()
        ner_pipeline = pipeline(
            'ner',
            model='d4data/biomedical-ner-all',
            aggregation_strategy='simple'
        )

        entities = ner_pipeline(cleaned_note)
        if verbose_logging:
            print(f"\n-----> Found entities:\n {pd.DataFrame(entities)}")
        
        # Removing duplicates and sorting final result
        diagnoses = sorted(set([entity['word'] for entity in entities if entity['entity_group'] == 'Disease_disorder']))

        if verbose_logging:
            print(f"\n-----> Found primary diagnoses:\n {diagnoses}\n")
        
        return diagnoses

    except Exception as e:
        print(f"Error during NER processing: {e}")
        return ["Error processing note."]


### TOOL 13 - Getting List of history diagnoses from clinical notes
@tool("extract_historical_diagnoses_from_text", args_schema=NoteInput)
def extract_historical_diagnoses_from_text(clinical_note: str) -> List[str]:
    """
    Scans a clinical note for disease and disorder mentions using a BioClinical-BERT NER model.
    Use this tool first when you need to find historical diagnoses mentioned in unstructured text.
    Returns a list of found disease names.
    """

    if verbose_logging:
        print("-== EXTRACTING DIAGNOSES FROM NOTE ==-\n")
    
    try:
        cleaned_note = clinical_note.strip()
        ner_pipeline = pipeline(
            'ner',
            model='d4data/biomedical-ner-all',
            aggregation_strategy='simple'
        )

        entities = ner_pipeline(cleaned_note)
        if verbose_logging:
            print(f"\n-----> Found entities:\n {pd.DataFrame(entities)}")
        
        # Removing duplicates and sorting final result
        diagnoses = sorted(set([entity['word'] for entity in entities if entity['entity_group'] == 'History']))

        if verbose_logging:
            print(f"\n-----> Found historical diagnoses:\n {diagnoses}\n")
        
        return diagnoses

    except Exception as e:
        print(f"Error during NER processing: {e}")
        return ["Error processing note."]

print("\n[DONE] Tools initialized successfully.")

-== STEP 3: Creating Tools for manipulation of MIMIC III data ==-

[DONE] Tools initialized successfully.


### Step 4: Testing Tools (_invoking directly_)

In [29]:
#############################
### Step 4: Testing Tools ###
#############################

print("-== STEP 4: Testing Tools with manipulation of MIMIC III data ==-\n")

# Extra logging
verbose_logging = True

try:
    print("\n-== STEP 4.1: Testing 'get_icd_codes_for_disease' Tool with a typo: 'sistolic hart failure'... ==-")
    hf_codes = get_icd_codes_for_disease.invoke({"disease_name": "sistolic hart failure"})
    print(f" => Found codes: \n{hf_codes}")
    print("----------------------")
    
    print("\n-== STEP 4.2: Testing 'get_patient_count' Tool for heart failure ==-")   
    patient_count = get_patient_count.invoke({"icd_codes": hf_codes})
    print(f" => Patient count: \n{patient_count}")
    print("----------------------")
    
    print("\n-== STEP 4.3: Testing 'get_mortality_rate' Tool for heart failure ==-")   
    mortality = get_mortality_rate.invoke({"icd_codes": hf_codes})
    print(f" => Mortality rate: \n{mortality}")
    print("----------------------")
    
    print("\n-== STEP 4.4: Testing 'get_demographic_breakdown' Tool for heart failure (top 10 results) ==-")  
    demographics = get_demographic_breakdown.invoke({"icd_codes": hf_codes})
    print(f" => Demographic Breakdown: \n{pd.DataFrame(demographics).head(10)}")
    print("----------------------")

    print("\n-== STEP 4.5: Testing 'get_top_comorbidities' Tool for heart failure ==-")   
    comorbidities = get_top_comorbidities.invoke({"icd_codes": hf_codes})
    print(f" => 10 most common co-occurring diagnoses: \n{pd.DataFrame(comorbidities)}")
    print("----------------------")

    print("\n-== STEP 4.6: Testing 'get_30_day_readmission_rate' Tool for heart failure ==-")   
    readmission = get_30_day_readmission_rate.invoke({"icd_codes": hf_codes})
    print(f" => 30 day readmission rate: \n{readmission}")
    print("----------------------")

    print("\n-== STEP 4.7: Testing 'get_age_distribution' Tool for heart failure ==-")   
    age_distribution = get_age_distribution.invoke({"icd_codes": hf_codes})
    print(f" => Age distribution rate: \n{pd.DataFrame(age_distribution)}")
    print("----------------------")

    print("\n-== STEP 4.8: Testing 'get_average_patient_age' Tool for heart failure ==-")   
    average_age = get_average_patient_age.invoke({"icd_codes": hf_codes})
    print(f" => Average Age: \n{average_age}")
    print("----------------------")

except Exception as e:
    print(f"Test failed with error: '{e}'")

try:
    print("\n-== STEP 4.9: Testing 'extract_primary_diagnoses_from_text' Tool ==-")
    clinical_note = """
    Patient is a 68-year-old male with a history of hypertension and type 2 diabetes,
    presenting to the emergency department with acute shortness of breath and a productive cough.
    Chest X-ray confirms a diagnosis of pneumonia
    """
    diagnoses = extract_primary_diagnoses_from_text.invoke({"clinical_note": clinical_note})
    cn_codes = get_icd_codes_for_disease.invoke({"disease_name": diagnoses[0]})
    print(f" => Found codes: \n{cn_codes}")
    print("----------------------")

    print("\n-== STEP 4.10: Testing 'extract_historical_diagnoses_from_text' Tool ==-")
    clinical_note = """
    Patient is a 68-year-old male with a history of hypertension and type 2 diabetes,
    presenting to the emergency department with acute shortness of breath and a productive cough.
    Chest X-ray confirms a diagnosis of pneumonia
    """
    diagnoses = extract_historical_diagnoses_from_text.invoke({"clinical_note": clinical_note})
    cn_codes = get_icd_codes_for_disease.invoke({"disease_name": diagnoses[0]})
    print(f" => Found codes: \n{cn_codes}")
    print("----------------------")
    
except Exception as e:
    print(f"Test failed with error: '{e}'")


-== STEP 4: Testing Tools with manipulation of MIMIC III data ==-


-== STEP 4.1: Testing 'get_icd_codes_for_disease' Tool with a typo: 'sistolic hart failure'... ==-
 -----> Looking for candidate names using fuzzy search.
     ---> Candidate names found:
['Post-osseointegration mechanical failure of dental implant', 'Failure of lactation, delivered, with mention of postpartum complication', 'Acute kidney failure with lesion of renal cortical necrosis', 'Unspecified abortion, complicated by renal failure, unspecified', 'Unspecified hypertensive heart disease with heart failure', 'Hypertensive heart and chronic kidney disease, malignant, without heart failure and with chronic kidney disease stage V or end stage renal disease', 'Systolic heart failure, unspecified', 'Diastolic heart failure, unspecified', 'Failure in dosage in electroshock or insulin-shock therapy', 'Mechanical failure of instrument or apparatus during heart catheterization', 'Unspecified hypertensive heart disease witho

Device set to use cpu



-----> Found entities:
               entity_group     score                  word  start  end
0                      Age  0.998372       68 - year - old     13   24
1                      Sex  0.999397                  male     25   29
2                  History  0.997994          hypertension     48   60
3                  History  0.994775                type 2     65   71
4           Clinical_event  0.999548            presenting     86   96
5   Nonbiological_location  0.999750  emergency department    104  124
6     Detailed_description  0.999973                 acute    130  135
7             Sign_symptom  0.999907   shortness of breath    136  155
8     Detailed_description  0.999969            productive    162  172
9     Biological_structure  0.999893                 chest    184  189
10    Diagnostic_procedure  0.987177               x - ray    190  195
11        Disease_disorder  0.999842             pneumonia    220  229

-----> Found primary diagnoses:
 ['pneumonia']

 --

Device set to use cpu



-----> Found entities:
               entity_group     score                  word  start  end
0                      Age  0.998372       68 - year - old     13   24
1                      Sex  0.999397                  male     25   29
2                  History  0.997994          hypertension     48   60
3                  History  0.994775                type 2     65   71
4           Clinical_event  0.999548            presenting     86   96
5   Nonbiological_location  0.999750  emergency department    104  124
6     Detailed_description  0.999973                 acute    130  135
7             Sign_symptom  0.999907   shortness of breath    136  155
8     Detailed_description  0.999969            productive    162  172
9     Biological_structure  0.999893                 chest    184  189
10    Diagnostic_procedure  0.987177               x - ray    190  195
11        Disease_disorder  0.999842             pneumonia    220  229

-----> Found historical diagnoses:
 ['hypertension'

### Step 5: Building the LangGraph Agent

In [83]:
############################################
### Step 5: Building the LangGraph Agent ###
############################################

print("-== STEP 5: Building the LangGraph Agent with local models ==-\n")

# Extra logging
verbose_logging = False


### Define an Expanded Agent State
class AgentState(TypedDict):
    original_query: str
    query_category: Optional[str]
    rephrased_query: Optional[str]
    question_type: str
    disease_names: Optional[List[str]]
    icd_codes: Optional[list[str]]
    tool_output: Optional[str]
    final_answer: Optional[str]


### Initialize LLM and Tools
llm = ChatOpenAI(
    openai_api_base="http://localhost:8000/v1",
    openai_api_key="not-needed",
    #model_name="deepseek/deepseek-r1-0528-qwen3-8b",
    # model_name="qwen/qwq-32b",
    # model_name="mistralai/devstral-small-2507",
    model_name="oh-dcft-v3.1-claude-3-5-haiku-20241022-qwen",
    # model_name="qwen-3-14b-gemini-v0.1",
    # model_name="amdevraj_-_mistral-7b-ift",
    temperature=0.0
)
tools = {tool.name: tool for tool in [
    extract_primary_diagnoses_from_text,
    extract_historical_diagnoses_from_text,
    get_icd_codes_for_disease,
    get_patient_count,
    get_average_length_of_stay,
    get_mortality_rate,
    get_demographic_breakdown,
    get_median_length_of_stay,
    get_mode_length_of_stay,
    get_average_patient_age,
    get_age_distribution,
    get_admission_type_breakdown,
    get_30_day_readmission_rate,
    get_top_comorbidities
]}

##########################
### Define Graph Nodes ###
##########################
print("-== STEP 5.1: Defining Graph Nodes ==-\n")

# Node 1: Initial Router. Determine Query Type
def determine_query_type(state: AgentState):
    """The entry point. Determines the query type and updates the state."""
    
    if verbose_logging:
        print("\n-== DETERMINING QUERY TYPE ==-")
    else:
        print("..", end="")
        
    prompt = f"""You are a routing assistant. Your job is to determine if the user's query contains a clinical note or unstructured text that needs to be processed first.

User Question: "{state['original_query']}"

Does this query contain a clinical note that should be analyzed to find diseases? Answer with only 'yes' or 'no'.
"""
    
    response = llm.invoke(prompt)
    if "yes" in response.content.lower():
        if verbose_logging:
            print(" ----> Category: Clinical Note Detected.")
        return {"query_category": "ner_path"}
    else:
        if verbose_logging:
            print(" ----> Category: Simple Query.")
        return {"query_category": "direct_path"}


def route_query(state: AgentState) -> str:
    """Reads the query_category from the state and returns the next node's name."""
    
    if verbose_logging:
        print("\n-== ROUTING QUERY ==-")
    else:
        print("..", end="")
   
    category = state.get("query_category")
    if category == "ner_path":
        if verbose_logging:
            print(" ----> Path: Routing to NER.")
        return "run_ner_and_rephrase"
    else:
        if verbose_logging:
            print(" ----> Path: Routing to classification.")
        return "classify_question"
        

# Node 2: Getting correct NER tool and process it
def run_ner_and_rephrase_query(state: AgentState):
    """
    First, determines which NER tool to use (primary vs. historical).
    Then, calls that tool to extract diagnoses.
    Finally, uses an LLM to rephrase the original query with the results.
    """

    if verbose_logging:
        print("\n-== RUNNING DYNAMIC NER AND REPHRASING QUERY ==-")
    else:
        print("..", end="")
        
    original_query = state['original_query']

    if verbose_logging:
        print(" ----> Choosing NER tool...")
        
    ner_tool_prompt = f"""You are a tool-choosing assistant. Based on the user's query, decide which of the following tools is most appropriate.

    Available Tools:
    - extract_primary_diagnoses_from_text: Use this to find the main, active diagnoses for the current medical event.
    - extract_historical_diagnoses_from_text: Use this to find past diseases from the patient's history.

    User Query: "{original_query}"

    Which tool should be used? Respond with ONLY the tool name.
    """
    tool_name_response = llm.invoke(ner_tool_prompt)
    chosen_tool_name = tool_name_response.content.strip()
    if verbose_logging:
        print(f" ----> Chosen NER tool: {chosen_tool_name}")

    if chosen_tool_name in tools:
        ner_tool = tools[chosen_tool_name]
        extracted_diseases = ner_tool.invoke({"clinical_note": original_query})
    else:
        extracted_diseases = ["Could not determine which NER tool to use."]

    if verbose_logging:
        print(f" ----> Extracted Diseases for Rephrasing: {extracted_diseases}")
        
    rephrase_prompt = f"""You are a query rephrasing assistant. Your goal is to simplify a complex query.
Given an original query and a list of diseases extracted from it, rephrase it into a simple, direct question.

**Original Query**: "{original_query}"
**Extracted Diseases**: {extracted_diseases}

**Rephrased Question**:
"""
    rephrase_response = llm.invoke(rephrase_prompt)
    rephrased_query = rephrase_response.content.strip()
    
    if verbose_logging:
        print(f" ----> Rephrased Query: {rephrased_query}")
        
    return {"rephrased_query": rephrased_query}
        

# Node 3: Router. Classify the User's Question
def classify_question(state: AgentState):
    """Uses the LLM to classify the question. It will use the rephrased_query if available."""

    if verbose_logging:
        print("\n-== CLASSIFYING QUESTION ==-")
    else:
        print("..", end="")

    # Use the rephrased query if it exists, otherwise use the original.
    query_to_classify = state.get("rephrased_query") or state.get("original_query")

    prompt = f"""You are a query classifier. Your job is to analyze the user's question and extract two pieces of information:
1. The 'question_type', which must be one of the following tool names: {', '.join(tools.keys())}
2. The 'disease_names', which must be a list of all diseases mentioned in the query.

User Question: "{query_to_classify}"

CRITICAL: Respond with ONLY a markup of JSON object with the keys 'question_type' and 'disease_names'.

**Example**:
"```json
{{
  "question_type": "get_top_comorbidities",
  "disease_names": ["hypertension", "type 2 diabetes", "pneumonia"]
}}
```"
"""
    
    response = llm.invoke(prompt)
    response_content = response.content.strip()
    parsed_json = None
    match = re.search(r"```json\s*(\{.*?\})\s*```", response_content, re.DOTALL)
    if match:
        try:
            parsed_json = json.loads(match.group(1).strip())
        except json.JSONDecodeError:
            print(f" ---> [ERROR]: Found JSON block, but failed to parse: {match.group(1).strip()}")
    else:
        try:
            parsed_json = json.loads(response_content.split("```json")[-1].strip("`").strip().replace("```json", ""))
        except json.JSONDecodeError:
            print(f" ---> [ERROR]: Could not find JSON block and failed to parse raw content: {response_content}")

    if parsed_json:
        if verbose_logging:
            print(f" ----> Classification: \n{parsed_json}")
        return {
            "question_type": parsed_json["question_type"],
            "disease_names": parsed_json["disease_names"]
        }
    else:
        return {"question_type": "error"}


# Node 4: ICD Codes. Node to get ICD codes
def get_codes(state: AgentState):
    """Loops through a list of disease names and aggregates their ICD codes."""

    if verbose_logging:
        print("\n-== GETTING ICD CODES ==-")
    else:
        print("..", end="")
        
    disease_names = state["disease_names"]
    all_codes = set()
    for disease in disease_names:
        codes = get_icd_codes_for_disease.invoke({"disease_name": disease})
        if codes:
            all_codes.update(codes)

    if verbose_logging:
        print(f" ----> Found codes: \n{sorted(all_codes)}")
    
    return {
        "icd_codes": sorted(all_codes),
        "tool_output": f"Found codes for {disease_names}: {sorted(all_codes)}"
    }


# Node 5: Summary. A conditional node to run the correct final tool
def should_run_final_tool(state: AgentState) -> str:
    """Determines which path to take after getting ICD codes."""

    if verbose_logging:
        print("\n-== ROUTING ==-")
    else:
        print("..", end="")
        
    if state["question_type"] == "get_icd_codes_for_disease":
        if verbose_logging:
            print(" ----> Path: Direct lookup. Generating response.")
        return "generate_response"
    else:
        if verbose_logging:
            print(" ----> Path: Analysis. Running final tool.")
        return "run_final_tool"


def run_final_tool(state: AgentState):
    """
    Based on the classification, runs the appropriate final tool.
    This node acts as a code-based router.
    """

    if verbose_logging:
        print("\n-== RUNNING FINAL TOOL ==-")
    else:
        print("..", end="")
        
    question_type = state["question_type"]
    icd_codes = state["icd_codes"]
    if question_type not in tools or not icd_codes:
        return {"tool_output": "Error: Could not find a valid tool or ICD codes."}

    tool_to_run = tools[question_type]
    if verbose_logging:
        print(f" ----> Calling tool: \n{question_type}\n with codes: \n{icd_codes}")
    
    output = tool_to_run.invoke({"icd_codes": icd_codes})
    return {"tool_output": str(output)}


# Node 6: Final. Generate the final response
def generate_response(state: AgentState):
    """Uses the LLM to synthesize a final, human-readable answer."""

    if verbose_logging:
        print("\n-== GENERATING RESPONSE ==-")
    else:
        print("..", end="")
        
    prompt = f"""You are a helpful medical research assistant. The user asked the following question:
"{state['original_query']}"

You have performed a series of steps and have the final result: {state['tool_output']}

Please formulate a clear, concise, and friendly final answer for the user based on this result.
"""
    
    response = llm.invoke(prompt)
    if verbose_logging:
        print(f" ----> Final Answer: \n{response.content}")
    return {"final_answer": response.content}


#######################
### Build the Graph ###
#######################
print("-== STEP 5.2: Adding Edges ==-\n")
workflow = StateGraph(AgentState)

# Add all nodes
workflow.add_node("determine_query_type", determine_query_type)
workflow.add_node("run_ner_and_rephrase", run_ner_and_rephrase_query)
workflow.add_node("classify_question", classify_question)
workflow.add_node("get_codes", get_codes)
workflow.add_node("run_final_tool", run_final_tool)
workflow.add_node("generate_response", generate_response)

# The entry point
workflow.set_entry_point("determine_query_type")

# Add edges
workflow.add_conditional_edges(
    "determine_query_type",
    route_query,
    {
        "run_ner_and_rephrase": "run_ner_and_rephrase",
        "classify_question": "classify_question"
    }
)
workflow.add_edge("run_ner_and_rephrase", "classify_question")
workflow.add_edge("classify_question", "get_codes")
workflow.add_conditional_edges(
    "get_codes",
    should_run_final_tool,
    {
        "run_final_tool": "run_final_tool",
        "generate_response": "generate_response"
    }
)
workflow.add_edge("run_final_tool", "generate_response")
workflow.add_edge("generate_response", END)


#####################################
### Compile / visualize the graph ###
#####################################
print("-== STEP 5.3: Compiling Graph + visualization ==-\n")
app = workflow.compile()
print("[DONE] Advanced workflow agent compiled successfully.")

print("\n-== Displaying LangGraph in TXT mode... ==-")
print(app.get_graph().draw_ascii())

print("\n-== Saving LangGraph in PNG with Graphviz... ==-")
png_bytes = app.get_graph().draw_mermaid_png()
with open("agent_graph.png", "wb") as f:
    f.write(png_bytes)
print("\n[DONE] LangGraph image saved to agent_graph.png")


-== STEP 5: Building the LangGraph Agent with local models ==-

-== STEP 5.1: Defining Graph Nodes ==-

-== STEP 5.2: Adding Edges ==-

-== STEP 5.3: Compiling Graph + visualization ==-

[DONE] Advanced workflow agent compiled successfully.

-== Displaying LangGraph in TXT mode... ==-
                      +-----------+            
                      | __start__ |            
                      +-----------+            
                             *                 
                             *                 
                             *                 
                 +----------------------+      
                 | determine_query_type |      
                 +----------------------+      
                    ..               ...       
                 ...                    ..     
               ..                         ...  
+----------------------+                     ..
| run_ner_and_rephrase |                  ...  
+----------------------+                ..

### Step 6: Testing Agent

In [75]:
#############################
### Step 6: Testing Agent ###
#############################

print("-== STEP 6: Testing Agent with manipulation of MIMIC III data ==-\n")

# Extra logging
verbose_logging = False

def invoke_agent(query) -> str:
    """Helper Function to run the agent query"""
    
    inputs = {"original_query": query}
    final_state = app.invoke(inputs)
    final_answer = final_state.get('final_answer', 'No final answer was generated.').split("</think>")[-1].strip()
    return final_answer
    

### Testing Agent
print("-== STEP 6.1: Testing analytical questions ==-")

queries = [
    "What is the average age for patients with sepsis?",
    "Show me the age distribution for patients with type 2 diabetes.",
    "What is the average patient age for Acute Myocardial Infarction?",
    "What is the median length of stay for patients with Congestive Heart Failure?",
    "What are the most common comorbidities for patients with diabetes?",
    "Show me the admission type breakdown for stroke.",
    "What is the 30 day readmission rate for heart failure?",
]

for idx, query in enumerate(queries):
    print(f"______________________________\n\u2753 Query {idx + 1}: {query}")
    print(f"\n\u2705 Final Answer: \n{invoke_agent(query)}")
    print("\n====================")


print("\n-== STEP 6.2: Testing complex queries based on Clinical notes ==-")

clinical_note = """
Patient is a 68-year-old male with a history of hypertension and type 2 diabetes,
presenting to the emergency department with acute shortness of breath and a productive cough.
Chest X-ray confirms a diagnosis of pneumonia
"""
queries = [
    f"""
Given the following clinical note, what are the top comorbidities for the patient's primary diagnosis?

Note: "{clinical_note}"
""",
    f"""
Given the following clinical note, what are the ICD9 codes for all historical diseases mentioned?

Note: "{clinical_note}"
"""
]

for idx, query in enumerate(queries):
    print(f"______________________________\n\u2753 Query {idx + 1}: {query}")
    print(f"\n\u2705 Final Answer: \n{invoke_agent(query)}")
    print("\n====================")


-== STEP 6: Testing Agent with manipulation of MIMIC III data ==-

-== STEP 6.1: Testing analytical questions ==-
______________________________
❓ Query 1: What is the average age for patients with sepsis?
..............
✅ Final Answer: 
The average age for patients with sepsis in our dataset is approximately 78.8 years.

______________________________
❓ Query 2: Show me the age distribution for patients with type 2 diabetes.
..............
✅ Final Answer: 
Here's the age distribution for patients with type 2 diabetes:

- **19-40 (Young Adult)**: 26 patients
- **41-65 (Adult)**: 257 patients  
- **65+ (Senior)**: 284 patients

Would you like any additional analysis or visualization of this data?

______________________________
❓ Query 3: What is the average patient age for Acute Myocardial Infarction?
..............
✅ Final Answer: 
The average patient age for Acute Myocardial Infarction is approximately 77.3 years.

______________________________
❓ Query 4: What is the median length o

Device set to use cpu


..........
✅ Final Answer: 
Based on the clinical note you provided, here are the top comorbidities associated with pneumonia (the patient's primary diagnosis):

1. **Unspecified essential hypertension** - 1642 patients
2. **Diabetes mellitus without mention of complication, type II or unspecified type, not stated as uncontrolled** - 845 patients  
3. **Congestive heart failure, unspecified** - 1732 patients
4. **Acute respiratory failure** - 1552 patients
5. **Atrial fibrillation** - 1337 patients
6. **Acute kidney failure, unspecified** - 1330 patients
7. **Unspecified septicemia** - 865 patients  
8. **Coronary atherosclerosis of native coronary artery** - 847 patients
9. **Severe sepsis** - 752 patients
10. **Urinary tract infection, site not specified** - 746 patients

These comorbidities are commonly associated with pneumonia cases based on the data available. The patient's mentioned history of hypertension and type 2 diabetes aligns well with these findings.

___________________

Device set to use cpu


........
✅ Final Answer: 
Based on the clinical note provided, I found ICD-9 codes for the historical diseases mentioned:

- **Hypertension**: 401.9
- **Type 2 diabetes**:
  - 250.02 (with ketoacidosis)
  - 250.72 (with neurological manifestations)  
  - 250.90 (without complication)

These codes represent the historical conditions mentioned in the patient's history.



In [76]:
####################################################
### Step 6.3: Testing Agent with detailed output ###
####################################################

# Extra logging
verbose_logging = True

clinical_note = """
Patient is a 68-year-old male with a history of hypertension and type 2 diabetes,
presenting to the emergency department with acute shortness of breath and a productive cough.
Chest X-ray confirms a diagnosis of pneumonia
"""

query = f"""
Given the following clinical note, what are the top comorbidities for the patient's primary diagnosis?

Note: "{clinical_note}"
"""

print(f"______________________________\n\u2753 Query {idx + 1}: {query}")
print(f"\n\u2705 Final Answer: \n{invoke_agent(query)}")
print("\n====================")

______________________________
❓ Query 2: 
Given the following clinical note, what are the top comorbidities for the patient's primary diagnosis?

Note: "
Patient is a 68-year-old male with a history of hypertension and type 2 diabetes,
presenting to the emergency department with acute shortness of breath and a productive cough.
Chest X-ray confirms a diagnosis of pneumonia
"


-== DETERMINING QUERY TYPE ==-
 ----> Category: Clinical Note Detected.

-== ROUTING QUERY ==-
 ----> Path: Routing to NER.

-== RUNNING DYNAMIC NER AND REPHRASING QUERY ==-
 ----> Choosing NER tool...
 ----> Chosen NER tool: extract_primary_diagnoses_from_text
-== EXTRACTING DIAGNOSES FROM NOTE ==-



Device set to use cpu



-----> Found entities:
               entity_group     score                  word  start  end
0                      Age  0.998523       68 - year - old    125  136
1                      Sex  0.999522                  male    137  141
2                  History  0.998385          hypertension    160  172
3                  History  0.997866                type 2    177  183
4           Clinical_event  0.999422            presenting    194  204
5   Nonbiological_location  0.999645  emergency department    212  232
6     Detailed_description  0.999972                 acute    238  243
7             Sign_symptom  0.999898   shortness of breath    244  263
8     Detailed_description  0.999947            productive    270  280
9     Biological_structure  0.999876                 chest    288  293
10    Diagnostic_procedure  0.836773               x - ray    294  299
11        Disease_disorder  0.999825             pneumonia    324  333

-----> Found primary diagnoses:
 ['pneumonia']

 --

In [77]:
###################################################
### Step 6.4: Testing Agent with reduced output ###
###################################################

# Extra logging
verbose_logging = False

clinical_note = """
Patient is a 68-year-old male with a history of hypertension and type 2 diabetes,
presenting to the emergency department with acute shortness of breath and a productive cough.
Chest X-ray confirms a diagnosis of pneumonia
"""

query = f"""
Given the following clinical note, what are the ICD9 codes for all historical diseases mentioned?

Note: "{clinical_note}"
"""

print(f"______________________________\n\u2753 Query {idx + 1}: {query}")
print(f"\n\u2705 Final Answer: \n{invoke_agent(query)}")
print("\n====================")

______________________________
❓ Query 2: 
Given the following clinical note, what are the ICD9 codes for all historical diseases mentioned?

Note: "
Patient is a 68-year-old male with a history of hypertension and type 2 diabetes,
presenting to the emergency department with acute shortness of breath and a productive cough.
Chest X-ray confirms a diagnosis of pneumonia
"

......

Device set to use cpu


........
✅ Final Answer: 
Based on the clinical note provided, I found ICD-9 codes for the historical diseases mentioned:

- **Hypertension**: 401.9
- **Type 2 diabetes**:
  - 250.02 (with ketoacidosis)
  - 250.72 (with neurological manifestations)  
  - 250.90 (without complication)

These codes represent the historical conditions mentioned in the patient's history.



### Step 7: DEMO

In [84]:
########################################
### Step 7.1: DEMO - Simple Question ###
########################################

# Extra logging
verbose_logging = True

query = "What is the median length of stay for patients with Congestive Heart Failure?"
print(f"______________________________\n\u2753 Query: {query}")
print(f"\n\u2705 Final Answer: \n{invoke_agent(query)}")
print("\n====================")

______________________________
❓ Query: What is the median length of stay for patients with Congestive Heart Failure?

-== DETERMINING QUERY TYPE ==-
 ----> Category: Simple Query.

-== ROUTING QUERY ==-
 ----> Path: Routing to classification.

-== CLASSIFYING QUESTION ==-
 ----> Classification: 
{'question_type': 'get_median_length_of_stay', 'disease_names': ['congestive heart failure']}

-== GETTING ICD CODES ==-
 -----> Looking for candidate names using fuzzy search.
     ---> Candidate names found:
['Rheumatic heart failure (congestive)', 'Congestive heart failure, unspecified', 'Post-osseointegration mechanical failure of dental implant', 'Abnormality in fetal heart rate or rhythm, delivered, with or without mention of antepartum condition', 'Abnormality in fetal heart rate or rhythm during labor', 'Failure of lactation, delivered, with mention of postpartum complication', 'Acute kidney failure with lesion of renal cortical necrosis', 'Unspecified abortion, complicated by renal fa

In [85]:
#########################################
### Step 7.2: DEMO - Complex Question ###
#########################################

# Extra logging
verbose_logging = True

clinical_note = """
Patient is a 68-year-old male with a history of hypertension and type 2 diabetes,
presenting to the emergency department with acute shortness of breath and a productive cough.
Chest X-ray confirms a diagnosis of pneumonia
"""

query = f"""
Given the following clinical note, what are the ICD9 codes for all historical diseases mentioned?

Note: "{clinical_note}"
"""

print(f"______________________________\n\u2753 Query {idx + 1}: {query}")
print(f"\n\u2705 Final Answer: \n{invoke_agent(query)}")
print("\n====================")

______________________________
❓ Query 2: 
Given the following clinical note, what are the ICD9 codes for all historical diseases mentioned?

Note: "
Patient is a 68-year-old male with a history of hypertension and type 2 diabetes,
presenting to the emergency department with acute shortness of breath and a productive cough.
Chest X-ray confirms a diagnosis of pneumonia
"


-== DETERMINING QUERY TYPE ==-
 ----> Category: Clinical Note Detected.

-== ROUTING QUERY ==-
 ----> Path: Routing to NER.

-== RUNNING DYNAMIC NER AND REPHRASING QUERY ==-
 ----> Choosing NER tool...
 ----> Chosen NER tool: extract_historical_diagnoses_from_text
-== EXTRACTING DIAGNOSES FROM NOTE ==-



Device set to use cpu



-----> Found entities:
               entity_group     score                  word  start  end
0     Diagnostic_procedure  0.832262                    ic     48   50
1                  History  0.285574                  ##d9     50   52
2                      Age  0.998370       68 - year - old    120  131
3                      Sex  0.999338                  male    132  136
4                  History  0.998856          hypertension    155  167
5                  History  0.998948                type 2    172  178
6           Clinical_event  0.999422            presenting    189  199
7   Nonbiological_location  0.999708  emergency department    207  227
8     Detailed_description  0.999970                 acute    233  238
9             Sign_symptom  0.999844   shortness of breath    239  258
10    Detailed_description  0.999968            productive    265  275
11    Biological_structure  0.999905                 chest    283  288
12    Diagnostic_procedure  0.962815               x