In [None]:
# Adjustements:
# -  outputs 
#   --> max_token_length 1500 now (from 1000)
#   -->  temperature 0.7 now
# - merge cells: torch.cuda-tests


In [None]:
# https://huggingface.co/HuggingFaceH4/zephyr-7b-beta
#!pip install accelerate

import torch
from transformers import pipeline
from pymongo import MongoClient


# Setting up transformer pipeline.
pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", device_map="auto")

# Setting up database.
client = MongoClient('mongodb://localhost:27017/')
db = client['MIMIC-IV']

In [None]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))
print(torch.zeros(1).cuda())
print(torch.__path__)
print(torch.__version__)

In [None]:
def generate_patient_dialogue(subject_id, stay_id):
    # Initializing variable to save each of the generated dialogue segements.
    dialogue_segments = []
    json_data_used = {}

    print('subject_id:')
    print(subject_id)
    print('stay_id:')
    print(stay_id)
    
    # ************************ First Act: Patient is loaded into ambulance car ************************

    #Setting gender variable
    gender_info = db['ED-EdStays'].find_one({'stay_id': stay_id})
    gender_code = gender_info.get('gender', '')  # Fetch the gender code

    # Convert gender codes "M" and "F" to "male" and "female"
    gender = 'male' if gender_code == 'M' else 'female' if gender_code == 'F' else 'the patient\'s gender'

    # Gather the JSON data used from MongoDB.
    gender_info = db['ED-EdStays'].find_one({'stay_id': stay_id})
    triage_info = db['ED-Triage'].find_one({'stay_id': stay_id})
    vitalsigns_info = db['ED-VitalSigns'].find_one({'stay_id': stay_id})
    diagnosis_info = db['ED-Diagnosis'].find_one({'stay_id': stay_id})
    pyxis_info = db['ED-Pyxis'].find_one({'stay_id': stay_id})
    medredcon_info = db['ED-Medrecon'].find_one({'stay_id': stay_id})
    
    json_data_used['ED-EdStays'] = gender_info
    json_data_used['ED-Triage'] = triage_info
    json_data_used['ED-VitalSigns'] = vitalsigns_info
    json_data_used['ED-Diagnosis'] = diagnosis_info
    json_data_used['ED-Pyxis'] = pyxis_info
    json_data_used['ED-Medrecon'] = medredcon_info

    # Fetching chief complaint from ED-Triage
    triage_info = db['ED-Triage'].find_one({'stay_id': stay_id})
    chief_complaint = triage_info.get('chiefcomplaint', 'unknown complaint')

    # Fetching ed_triage data for information for a patient using their stay_id
    ed_triage = db['ED-Triage'].find_one({'stay_id': stay_id})

    # Fetching mediation that patient had taken before
    medrecon_info = db['ED-Medrecon'].find_one({'stay_id': stay_id})
    
    if medrecon_info:
        medication_patientreported_name = medrecon_info.get('name', 'none')
        medication_patientreported_reason = medrecon_info.get('etcdescription', 'none')
    else:
        medication_patientreported_name = 'none'
        medication_patientreported_reason = 'any'

    # Create scenario 1 templates
    scenario_1 = f"""Your Taks is to generate text like an audio to text transcript of the ambulance crew's utterances as they load
                a new {gender} patient into the ambulance and plays before they do any interventions, 
                the patient is unconscious. 
                The ambulance crew discuss the patient's main complaint '{chief_complaint} ' which has been reported to them.
                Remember only give back utterances of the ambulance crew. 
                """

    scenario_2 = f"""Generate text representing a standard interaction between the ambulance crew and between them and a {gender} patient, 
                    where the patient's condition is {chief_complaint} . 
                    The dialogue should be professional and focus on assessing the patient's condition in a non-critical context. 
                    Remember that the patient responses are in a colloquial manner 
                    and the ambulance emergency medical crew does not perform any measuerement or examinations yet, but asks if
                    the patient has taken any medication before, to which the patient answers that the had taken {medication_patientreported_name}
                    for {medication_patientreported_reason} reasons."""

    # Choose scenario 1 or 2 based on pain and acuitiy. Scenario 1 is for patient with severe status that might be unresponsive.
    if ed_triage and ed_triage.get('pain') == 'unable' or ed_triage.get('acuity') == "1.0000":
        scenario = scenario_1
        print("Scenario 1 is applied.")
    else:
        scenario = scenario_2  # Default to scenario 2
        print("Default Scenario 2 is applied")

    print('Acuitiy:')
    print(ed_triage.get('acuity'))
    print('Scenario input prompt:')
    print(scenario)

    messages = [
        {
            "role": "system",
            "content": "You are a speech-to-text simulator that generates transcrips of utterances recorded in an ambulance car, you're task is to generate spoken utterances from and between the ambulance emergency medical service crew and from the crew to the patient.",
        },
        {"role": "user", "content": scenario},
    ]
    prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) 
    outputs = pipe(prompt, max_new_tokens=1500, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, return_full_text=False)

    #print(outputs[0]["generated_text"])

    input_text = outputs[0]["generated_text"]
    dialogue_segments.append(outputs[0]["generated_text"])


    # ************************ Second Act: Triage First & Measurements ************************
    summary_prompt = f"""
        Your task is to extract the relevant information from a \
        dialog withing an ambulance car.
        From the dialogue text below, delimited by triple 
        backticks, extract the information that is related to medical condition and how they interacted
        in at most 200 words 
     
    Review: ```{input_text}```
    """
 
    summary_output = pipe(summary_prompt, max_new_tokens=100, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, return_full_text=False)
    print('summary_output:')
    print(summary_output)

    # Fetch the document for the given stay_id
    triage_values = db['ED-Triage'].find_one({'stay_id': stay_id}, {'temperature': 1, 'heartrate': 1, 'resprate': 1, 'o2sat': 1, 'pain': 1, '_id': 1, 'sbp': 1, 'dbp': 1})

    if triage_values:
        # Simplify and correct the value extraction and conversion process
        temperature = f"{float(triage_values.get('temperature', 'not specified')):.1f}" if str(triage_values.get('temperature', '')).replace('.', '', 1).isdigit() else triage_values.get('temperature', 'not specified')
        heartrate = str(int(float(triage_values.get('heartrate', 'not specified')))) if str(triage_values.get('heartrate', '')).replace('.', '', 1).isdigit() else triage_values.get('heartrate', 'not specified')
        resprate = str(int(float(triage_values.get('resprate', 'not specified')))) if str(triage_values.get('resprate', '')).replace('.', '', 1).isdigit() else triage_values.get('resprate', 'not specified')
        o2sat = str(int(float(triage_values.get('o2sat', 'not specified')))) if str(triage_values.get('o2sat', '')).replace('.', '', 1).isdigit() else triage_values.get('o2sat', 'not specified')
        pain = triage_values.get('pain', 'not specified')  # Assuming 'pain' may not be a numeric value or conversion not needed
        sbp = str(int(float(triage_values.get('sbp', 'not specified')))) if str(triage_values.get('sbp', '')).replace('.', '', 1).isdigit() else triage_values.get('sbp', 'not specified')
        dbp = str(int(float(triage_values.get('dbp', 'not specified')))) if str(triage_values.get('dbp', '')).replace('.', '', 1).isdigit() else triage_values.get('dbp', 'not specified')


        # Prepare the prompt with the fetched and converted values
        user_prompt_act_2 = f"""
        Your task is to continue the dialogue transcript based on the condition of the patient. 
        Include a question from the ambulance crew to the {gender} patient about how severe the pain is on a scale between 0-10, 
        and let the {gender} patient answer that their pain is {pain}. 
        Include the following measurements: 
        temperature {temperature}, 
        heartrate {heartrate}, 
        resprate {resprate}, 
        bloodpressure {sbp} and {dbp} 
        and oxygen saturation {o2sat}.
        """
        print(user_prompt_act_2)
    else:
        print(f"No ED-Triage record found for stay_id: {stay_id}")


    system_prompt_act_2 = f"""
        You are a speech-to-text simulator that generates transcripts of utterances recorded in 
        in an ambulance. 
        Your task is to generate spoken utterances between the ambulance crew and the {gender} patient. 
        crew and from the crew to the patient.
        Your task is to follow the events in the ambulance on the basis of these transcripts: { summary_output }.
        """
    print(system_prompt_act_2)


    messages = [
        {
            "role": "system",
            "content": system_prompt_act_2,
        },
        {"role": "user", "content": user_prompt_act_2},
    ]
    prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) #return_full_text=False
    outputs = pipe(prompt, max_new_tokens=1500, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, return_full_text=False)
    print(outputs[0]["generated_text"])
    
    input_text = outputs[0]["generated_text"]
    
    dialogue_segments.append(outputs[0]["generated_text"])



    # ************************ Third Act: Monitoring and Medication************************
    summary_prompt = f"""
        Your task is to extract the relevant information from a \
        dialog withing an ambulance car.
        From the dialogue text below, delimited by triple 
        backticks, extract the information that is related to medical condition and how they interacted
        in at most 200 words 
     
    Review: ```{input_text}```
    """
 
    summary_output = pipe(summary_prompt, max_new_tokens=100, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, return_full_text=False)
    print(summary_output)

    #ACT2 Vitalsigns and Diagnosisabs
    
    # Fetch vitalsigns values
    vitalsigns_values = db['ED-VitalSigns'].find_one({'stay_id': stay_id}, {'temperature': 1, 'heartrate': 1, 'resprate': 1, 'o2sat': 1, 'pain': 1, '_id': 0, 'sbp': 1, 'dbp': 1})
    
    if vitalsigns_values:
        # Simplify and correct the value extraction and conversion process
        temperature = f"{float(vitalsigns_values.get('temperature', 'not specified')):.1f}" if str(vitalsigns_values.get('temperature', '')).replace('.', '', 1).isdigit() else vitalsigns_values.get('temperature', 'not specified')
        heartrate = str(int(float(vitalsigns_values.get('heartrate', 'not specified')))) if str(vitalsigns_values.get('heartrate', '')).replace('.', '', 1).isdigit() else vitalsigns_values.get('heartrate', 'not specified')
        resprate = str(int(float(vitalsigns_values.get('resprate', 'not specified')))) if str(vitalsigns_values.get('resprate', '')).replace('.', '', 1).isdigit() else vitalsigns_values.get('resprate', 'not specified')
        o2sat = str(int(float(vitalsigns_values.get('o2sat', 'not specified')))) if str(vitalsigns_values.get('o2sat', '')).replace('.', '', 1).isdigit() else vitalsigns_values.get('o2sat', 'not specified')
        pain = vitalsigns_values.get('pain', 'not specified')  # Assuming 'pain' may not be a numeric value or conversion not needed
        sbp = str(int(float(vitalsigns_values.get('sbp', 'not specified')))) if str(vitalsigns_values.get('sbp', '')).replace('.', '', 1).isdigit() else vitalsigns_values.get('sbp', 'not specified')
        dbp = str(int(float(vitalsigns_values.get('dbp', 'not specified')))) if str(vitalsigns_values.get('dbp', '')).replace('.', '', 1).isdigit() else vitalsigns_values.get('dbp', 'not specified')
    
    # Fetch pyxis mediation valies
    
    #pyxis_medication = db['ED-Pyxis'].find_one({'stay_id': stay_id}, {'name': 1 })
    #medication = pyxis_medication.get('name' , 'not specified')
    
    # Fetch pyxis medication values
    pyxis_medication = db['ED-Pyxis'].find_one({'stay_id': stay_id}, {'name': 1})
    if pyxis_medication is not None:
        medication = pyxis_medication.get('name', 'not specified')
    else:
        medication = 'not specified'
    
    # Fetch diagnosis
    diagnosis_name = db['ED-Diagnosis'].find_one({'stay_id': stay_id}, {'icd_title': 1})
    if diagnosis_name is not None:
        diagnosis = diagnosis_name.get('icd_title', 'not specified')
    else:
        diagnosis = 'not specified'
        # Fetch diagnosis
        diagnosis_name = db['ED-Diagnosis'].find_one({'stay_id': stay_id}, {'icd_title': 1 })
        diagnosis = diagnosis_name.get('icd_title' , 'not specified')

    if vitalsigns_values:
        # Prepare the prompt with the fetched values
        user_prompt_act_3 = f"""
        Your task is to continue the dialogue transcript based on the condition of the {gender} patient. 
        Include the following measurements: temperature {temperature}, 
        heartrate {heartrate}, 
        resprate {resprate}, 
        bloodpressure {sbp} and {dbp}, 
        and oxygen saturation {o2sat},
        and let the ambulance emergency medical service crew administer the medication: {medication},
        also let the ambulance crew discuss or suspect about the {gender} patients diagnosis: {diagnosis}.                            
        """
    
    else:
        print(f"No ED-Triage record found for stay_id: {stay_id}")

    system_prompt_act_3 = f"""
    You are a speech-to-text simulator that generates transcripts of utterances recorded in in an ambulance. 
    Your task is to generate spoken utterances between the ambulance crew and from the crew to the {gender} patient.
    Your task is to follow the events in the ambulance on the basis of these transcripts: { summary_output } explicitly 
    mention if the patient was unconscious.
                     """
    
    print('system_prompt_act_3:')
    print(system_prompt_act_3)
    print('user_prompt_act_3:')
    print(user_prompt_act_3)

    messages = [
        {
            "role": "system",
            "content": system_prompt_act_3,
        },
        {"role": "user", "content": user_prompt_act_3},
    ]
    prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=1500, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, return_full_text=False)
    print(outputs[0]["generated_text"])


    input_text = outputs[0]["generated_text"]
    dialogue_segments.append(outputs[0]["generated_text"])


    #***** Final Act: Arriving at Hospital
    summary_prompt = f"""
        Your task is to extract the relevant information from a \
        dialog withing an ambulance car.
        From the dialogue text below, delimited by triple 
        backticks, extract the information that is related to medical condition and how they interacted
        in at most 200 words 
     
    Review: ```{input_text}```
    """
     
    summary_output = pipe(summary_prompt, max_new_tokens=100, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, return_full_text=False)
    print(summary_output)

    
    # ************************ LAST ACT:Arriving at Hospital ************************
    
    system_prompt_act_last = f"""
    You are a speech-to-text simulator that generates transcripts of utterances recorded in in an ambulance. 
    Your task is to generate spoken utterances between the ambulance crew and from the crew to the {gender} patient.
    Your task is to follow the events in the ambulance on the basis of these transcripts: { summary_output }. 
    Please remember to exclude any descriptions of actions, scene transitions, or non-verbal elements.
    """

    
    user_prompt_act_last = f"""
    Generate text representing a standard interaction between the ambulance crew and between them and the {gender} patient, 
    where they are on the way to the hospital and eventually arriving at the hospital.
    Generate only the spoken dialogue as it occurs.
    """
    
    print(system_prompt_act_last)
    print(user_prompt_act_last)

    messages = [
        {
            "role": "system",
            "content": system_prompt_act_last,
        },
        {"role": "user", "content": user_prompt_act_last},
    ]
    prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=1500, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, return_full_text=False)
    print(outputs[0]["generated_text"])
    
    dialogue_segments.append(outputs[0]["generated_text"])
    
    # Joining segments:
    full_dialogue = '\n'.join(dialogue_segments)
    
    return full_dialogue, json_data_used

In [None]:
def main(number_of_patients):
    dialogues_collection = db['NLP-EVAL']
    
    ed_stays_cursor = db['ED-EdStays'].aggregate([
        {'$match': {'arrival_transport': 'AMBULANCE'}},
        {'$sample': {'size': number_of_patients}}
    ])

    for ed_stay in ed_stays_cursor:
        subject_id = ed_stay['subject_id']
        stay_id = ed_stay['stay_id']
        
        full_dialogue, json_data_used = generate_patient_dialogue(subject_id, stay_id)
        
        document = {
            'stay_id': stay_id, 
            'dialogue': full_dialogue,
            'json_data_used': json_data_used
        }
        dialogues_collection.insert_one(document)

    print(f"Completed generating and uploading data for {number_of_patients} patients.")

In [None]:
if __name__ == "__main__":
    main(1)  # Specify the number of patients here