In [1]:
##################################
###       SET UP LOGGER        ###
##################################

import logging
logging.basicConfig(filename='logs.txt',
                    filemode='a',
                    format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
                    datefmt="%H:%M:%S",
                    level=logging.DEBUG)
logger = logging.getLogger()
logger.setLevel(level=logging.DEBUG)

In [8]:
##################################
###       DEPENDENCIES         ###
##################################

# general
import pandas as pd
import datetime
import yaml
import re

# H20 GPT client
import ast
import gradio_client

# FHIR client
from fhirclient.client import FHIRClient
from fhirclient.models.observation import Observation
from fhirclient.models.condition import Condition
from fhirclient.models.medicationstatement import MedicationStatement
from fhirclient.models.medication import Medication
from fhirclient.models.procedure import Procedure
from fhirclient.models.annotation import Annotation
from fhirclient.models.patient import Patient

# EBI client (ontology)
from ols_client import EBIClient

In [3]:
##################################
###       SET UP               ###
##################################

# get the config
with open("config.yml", "r") as ymlfile:
    config = yaml.safe_load(ymlfile)

# FHIR client settings
fhir_settings = {
    "app_id": config["FHIR"]["app_id"],
    "api_base": config["FHIR"]["api_base"],
}
fhir_client = FHIRClient(settings=fhir_settings)

# set up H2O GPT client
llm_client = gradio_client.Client(config["H2O"]["HOST_URL"])
logger.info("H2O GPT client set up")

# set up EBI client
ebi_client = EBIClient()

# get test data
data = pd.read_csv(config["DATA"]["path"], sep=config["DATA"]["sep"])
logger.info("Test data on {} paients loaded".format(len(data)))

Loaded as API: http://localhost:7860/ ✔


In [4]:
##################################
###     UPLOAD PATIENTS        ###
##################################

# create a patient
patient = Patient({
    "name": [{"text": "John Dae"}]
})
patient.create(fhir_client.server)

# create a medication
medication = Medication({
    "status": "active"
})
medication.create(fhir_client.server)

# convert each observation to FHIR and write to the FHIR server
# as a preliminary observation linked to the patient John Dae
for ind, raw in data.iterrows():
    observation = Observation({
        "status": "registered",
        "code": {"text": data.iloc[ind,1]},
        "subject": {
            "reference": "Patient/1",
            "display": "John Dae"
        },
    })
    observation.create(fhir_client.server)

In [12]:
##################################
###   EXTRACT DATA USING LLM   ###
##################################

# llm call function
def run_llm(prompt):

    # string of dict for input
    kwargs = dict(instruction_nochat=prompt)

    # run LLM
    logger.info(f"Prompt: {prompt}")
    res = llm_client.predict(str(dict(kwargs)), api_name="/submit_nochat_api")
    
    # extract response
    response = ast.literal_eval(res)["response"]
    logger.info(f"Response: {response}")
    return response

# get the prompt for the given template
def get_completion_prompt(text, config = config):
    """Get the prompt for the given template."""

    # concatinate the prompt with doctor"s note
    prompt = config["LLM"]["system_prompt"] + text

    return prompt

# process LLM response
def process_llm_response(ann_text):
    
    # process response string
    ann_text = ann_text.strip() # strip
    ann_text = ann_text.lower() # lower
    ann_text = ann_text[ann_text.find("conditions"):] # delete intro
    ann_text = ann_text.replace("*",";") # remove stars
    ann_text = ann_text.replace("\n"," ") # remove new lines
    # add new lines to sections
    ann_text = ann_text.replace("observations:","\nobservations:")
    ann_text = ann_text.replace("medications:","\nmedications:")
    ann_text = ann_text.replace("procedures:","\nprocedures:")

    # check that annotations has exactly 4 lines
    anns = ann_text.split("\n")
    if len(anns) != 4:
        logger.error("The response not 4 lines")
        return None
        
    # convert to dict
    ann_dict = {}
    for ann in anns:

        # split the key and value
        ann_key, ann_values = ann.split(":", 1)

        # split the value into a list
        ann_values = [a.strip() for a in ann_values.strip().split(";") if a != ""]

        # if there is an ignore phrase, reject the value
        for ignore_phrase in config["LLM"]["ignore_phrases"]:
            ann_values = [a for a in ann_values if ignore_phrase not in a]

        # extract entity and parameter from each value
        value_dicts = []
        for i in range(len(ann_values)):

            # regular expression to find if there are brackets with parameters as in "entity (parameter)"
            m = re.search(r"\(([A-Za-z0-9._\-\/ ]+)\)", ann_values[i])

            # if there is a match, extract the entity and parameter
            if m is not None and ann_key in config["LLM"]["headers_with_parameters"]:

                # extract the entity from the value
                entity = ann_values[i][:m.span()[0]].strip()
                parameter = m.group(1).strip()

            else:
                
                # otherwise the entity is the whole value and the parameter is empty
                entity = ann_values[i].strip(),
                parameter = ""
        
            # if entity is tuple, convert to string
            if type(entity) == tuple:
                entity = entity[0]

            # ground the entity in ontology
            ontology = annotate_term_with_ontology(entity)

            # define the value dictionary with entity, parameter and ontology only if ontology was detected
            if ontology is not None:
                value_dicts.append({
                    "entity": entity,
                    "parameter": parameter,
                    "coding": ontology
                })

        # write to the dict
        ann_dict[ann_key] = value_dicts

    # check that annotation dict has each of the expected headers 
    for i in range(4):
        if config["LLM"]["headers"][i] not in ann_dict.keys():
            logger.error("The response does not start with the correct header")
            return None

    return ann_dict

# annotate observation
def annotate_observation(observation, fhir_client, note_id, update = True):
    
    # generate the prompt
    prompt = get_completion_prompt(
        text = observation.code.text
    )
    
    # LLM call to extract entities 
    ann_text = run_llm(prompt)

    # preprocess annotation text
    ann_dict = process_llm_response(ann_text)

    # update FHIR records
    if update:

        # update the observation
        observation.note = [Annotation({
            "authorString" : "Raw annotations from LLama2",
            "text": str(ann_dict)
        })]
        observation.status = "final"
        observation.update(fhir_client.server)

        # write derived conditions
        for ind, condition_json in enumerate(ann_dict["conditions"]):
            
            # add unique note id and processing id
            condition_json['note_id'] = note_id
            condition_json['condition_id'] = ind
            
            # write the condition
            condition = Condition({
                "clinicalStatus": {"text": "preliminary"},
                "code": {"coding": [condition_json["coding"]]},
                "subject": {
                    "reference": "Patient/1",
                    "display": "John Dae"
                },
                "note": [{
                    "authorString" : "Raw annotations from LLama2",
                    "text": str(condition_json)
                }]
            })
            condition.create(fhir_client.server)

        # write derived observations
        for ind, observation_json in enumerate(ann_dict["observations"]):
            
            # add unique note id and processing id
            observation_json['note_id'] = note_id
            observation_json['observation_id'] = ind
            
            # write the observation
            observation = Observation({
                "status": "preliminary",
                "code": {"coding": [observation_json["coding"]]},
                "valueString": observation_json["parameter"],
                "subject": {
                    "reference": "Patient/1",
                    "display": "John Dae"
                },
                "note": [{
                    "authorString" : "Raw annotations from LLama2",
                    "text": str(observation_json)
                }]
            })
            observation.create(fhir_client.server)

        # write derived conditions
        for ind, medication_json in enumerate(ann_dict["medications"]):
            
            # add unique note id and processing id
            medication_json['note_id'] = note_id
            medication_json['medication_id'] = ind

            # write the medication
            medication = MedicationStatement({
                "status": "preliminary",
                "category": {"coding": [medication_json["coding"]]},
                "medication": {
                    "reference": "Medication/2",
                    "display": medication_json["entity"]
                },
                "dosage": [{
                    "text": medication_json["parameter"]
                }],
                "subject": {
                    "reference": "Patient/1",
                    "display": "John Dae"
                },
                "note": [{
                    "authorString" : "Raw annotations from LLama2",
                    "text": str(medication_json)
                }]
            })
            medication.create(fhir_client.server)

        # write derived conditions
        for ind, procedure_json in enumerate(ann_dict["procedures"]):

            # add unique note id and processing id
            procedure_json['note_id'] = note_id
            procedure_json['procedure_id'] = ind

            # write the procedure
            procedure = Procedure({
                "status": "preliminary",
                "code": {"coding": [procedure_json["coding"]]},
                "subject": {
                    "reference": "Patient/1",
                    "display": "John Dae"
                },
                "note": [{
                    "authorString": "Raw annotations from LLama2",
                    "text": str(procedure_json)
                }]
            })
            procedure.create(fhir_client.server)

    return ann_dict

# annotate using ontology
def annotate_term_with_ontology(text):
    try:
        results = ebi_client.search(text)
        if len(results) > 0:
            return {
                "system": results[0]["iri"],
                "code": results[0]["short_form"],
                "display": results[0]["description"][0]
            }
        else:
            return None
    except:
        return None

# annotate clinical notes
job_not_done = True
note_id = 0
while job_not_done:

    # get observation batch
    local_fhir_client = FHIRClient(settings=fhir_settings)
    search = Observation.where(struct={'status': 'registered'})
    observations = search.perform_resources(local_fhir_client.server)

    # check that it is not empty (job not finished)
    if len(observations) == 0:
        job_not_done = False
    else:

        # annotate the batch of observations
        for observation in observations:
            annotate_observation(observation, local_fhir_client, note_id, update = True)
            note_id += 1

HTTPError: 400 Client Error:  for url: http://localhost:8080/fhir/Procedure