In [1]:
import transformers
import torch
from transformers import AutoTokenizer
import pandas as pd
from vllm import LLM, SamplingParams
import re
import tqdm

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,3,4,5"
os.environ['VLLM_LOGGING_LEVEL'] = "ERROR"

In [None]:
# model_id = "/local/vietdq/Llama-3-70B-Instruct-Gradient-524k/snapshots/4dcdcc504104ddb927a5275d910e30a869877cdd/"
# model_id = "/local/vietdq/Qwen2-72B-Instruct/"
model_id = "/local/vietdq/Meta-Llama-3.1-70B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
sampling_params = SamplingParams(temperature=0.6, top_p=0.9, stop_token_ids=terminators, max_tokens=3000)

In [None]:
model = LLM(model_id, tensor_parallel_size=4, seed=5)

In [None]:
def load_retrieved_hadm_id() -> pd.DataFrame:
    test_retrieved_hadm_id = pd.read_csv("/nethome/vietdq/Projects/test_retrievedHadmID_tv_oneshot.csv")
    return test_retrieved_hadm_id

def load_train_valid_discharge() -> pd.DataFrame:
    base_path = "/nethome/vietdq/discharge_data"
    discharge_val = pd.read_csv(base_path + "/valid/discharge.csv")
    discharge_train = pd.read_csv(base_path + "/train/discharge.csv")
    data = pd.merge(discharge_val, discharge_train, how='outer', on=['note_id', 'subject_id', 'hadm_id', 'note_type', 'note_seq', 'charttime', 'storetime', 'text'])
    data.rename(columns = {'text':'discharge_summary'}, inplace = True)
    data['discharge_instructions'] = data['discharge_summary'].apply(lambda x: re.findall(r'Discharge Instructions:\n(.*?)Followup Instruction', x, re.DOTALL))
    data['brief_hospital_course'] = data['discharge_summary'].apply(lambda x: re.findall(r'Brief Hospital Course:\s*\n{0,2}(.*?)(?=\n\s*\n{0,2}\s*[A-Z_]+[^\n:]+:\n)', x, re.DOTALL))
    data['json'] = data.apply(lambda x: x.to_json(), axis=1)
    return data

def load_test_discharge() -> pd.DataFrame:
    base_path = "/nethome/vietdq/discharge_data"
    discharge_test = pd.read_csv(base_path + "/test_phase_2/discharge.csv")
    discharge_test.rename(columns = {'text':'discharge_summary'}, inplace = True)
    discharge_test['discharge_summary'] = [re.sub(r'Discharge Instructions:\n(.*?)Followup Instruction', '', x, flags=re.DOTALL) for x in discharge_test['discharge_summary']]
    discharge_test['discharge_summary'] = [re.sub(r'Brief Hospital Course:\s*\n{0,2}(.*?)(?=\n\s*\n{0,2}\s*[A-Z_]+[^\n:]+:\n)', '', x, flags=re.DOTALL) for x in discharge_test['discharge_summary']]
    discharge_test['history_present_illness'] = discharge_test['discharge_summary'].apply(lambda x: re.findall(r'History of Present Illness:\s*\n{0,2}(.*?)(?=\n\s*\n{0,2}\s*[A-Z_]+[^\n:]+:\n)', x, re.DOTALL))
    discharge_test['discharge_medication'] = discharge_test['discharge_summary'].apply(lambda x: re.findall(r'Discharge Medications:\s*\n{0,2}(.*?)(?=\n\s*\n{0,2}\s*[A-Z_]+[^\n:]+:\n)', x, re.DOTALL))
    discharge_test['json'] = discharge_test.apply(lambda x: x.to_json(), axis=1)
    return discharge_test

In [None]:
test_phase_2_discharge_target = pd.read_csv("/nethome/vietdq/discharge_data/test_phase_2/discharge_target.csv")

In [None]:
def generate_response_test_no_rag(pipeline, terminator, test_sample, test_retrieved_hadm_id_tv, train_valid_discharge, test_phase_2_discharge_target) -> dict:

    dict_to_csv = {}
    dict_to_csv['hadm_id'] = []
    dict_to_csv['expected_discharge_instruction'] = []
    dict_to_csv['generated_discharge_instruction'] = []

    dict_to_csv['expected_brief_hospital_course'] = []
    dict_to_csv['generated_brief_hospital_course'] = []

    sample_input_data = train_valid_discharge.sample(n=1, random_state=42)

    sample_output_di = sample_input_data['discharge_instructions']
    sample_output_bhc = sample_input_data['brief_hospital_course']
    del sample_input_data['discharge_instructions']
    del sample_input_data['brief_hospital_course']
    del sample_input_data['json']

    sample_input_data.loc[:, 'json'] = sample_input_data.apply(lambda x: x.to_json(), axis=1)

    sample_input = str(sample_input_data['json'])
    
    pre_prompts_di, pre_prompts_bhc = [], []

    for index, row in tqdm(test_sample.iterrows()):
        hadm_id = row['hadm_id']
    
        query_input = str(row['json'])

        prompt_di = f'''
        ###Instructions###
        Generate `DISCHARGE INSTRUCTION` for the patient based on the following data:{query_input}. \n

        The `DISCHARGE INSTRUCTION` must provide actionable information to help them take care of themselves when they leave the hospital. \n

        ###Example###
        The following is just a sample data, containing an `EXAMPLE_DISCHARGE INSTRUCTION` for the `EXAMPLE_INPUT`. \n `EXAMPLE_INPUT`: {sample_input} `EXAMPLE_DISCHARGE INSTRUCTION`: {sample_output_di} \n
    

        ###Content###
        The `DISCHARGE INSTRUCTION` of each patient of ‘subject_id’ and ‘hadm_id’ should be an overview of the conditions of the corresponding patient. \n

        The content of `DISCHARGE INSTRUCTION` must contain information from ‘discharge_summary’.\n

        Look into ‘history_present_illness’, ‘past_medical_history’ and ‘discharge_medication’ sentence by sentence to extract information on diagnosis and treatments administered. \n

        Avoid using any place holders. \n

        Avoid using '[insert diagnosis(s) e.g., pneumonia, heart failure]', instead provide specific diagnoses and treatments. \n

        The `DISCHARGE INSTRUCTION` must be written in simple terms. \n

        Provide information about medical jargon \n

        Start your response with: `Dear Ms. /Mr` followed by `____`, which is based on the ‘gender’ from the `EXAMPLE_INPUT`. \n

        Give an answer to a given question in a natural, human-like manner. \n

        Avoid adding any personal information about the patient such as name and date of birth. \n'''
        
        prompt_bhc = f'''
        ###Instructions###
        Generate `BRIEF HOSPITAL COURSE` for the patient based on the following data:{query_input}. \n

        The `BRIEF HOSPITAL COURSE` must be brief overview of what happened during a patient's stay in the hospital. \n

        ###Example###
        The following is just a sample data, containing an `EXAMPLE_BRIEF HOSPITAL COURSE` for the `EXAMPLE_INPUT`. \n `EXAMPLE_INPUT`: {sample_input} `EXAMPLE_BRIEF HOSPITAL COURSE`: {sample_output_bhc} \n
        
        ###Content###
        The `BRIEF HOSPITAL COURSE` of each patient of ‘subject_id’ and ‘hadm_id’ must be brief overview of the treatment provided and patient's response during a patient's stay in the hospital. \n

        The content of `BRIEF HOSPITAL COURSE` must contain details about the treatments or procedures the patient underwent, how the patient responded to these treatments, and any significant changes in the patient's health condition.\n
        
        Look into ‘history_present_illness’ and ‘past_medical_history’ to extract information on diagnosis, treatments administered, and discharge medications. \n
        
        The `BRIEF HOSPITAL COURSE` must be written in medical terms so that doctors can understand about the overall care of a patient. \n
        
        Avoid using any place holders. \n
        
        Avoid using '[insert diagnosis(s) e.g., pneumonia, heart failure]', instead provide specific diagnoses and treatments. \n
        
        Provide information about medical jargon. \n
        
        Start your response with: `Mr/Ms. ____ was` , which is based on the ‘gender’ from the `EXAMPLE_INPUT`. \n

        Avoid adding any personal information about the patient such as name and date of birth. \n
        '''

        messages_di = [
            {"role": "system", "content": "You are a helpful doctor"},
            {"role": "user", "content": prompt_di}, 
        ]
        messages_bhc = [
            {"role": "system", "content": "You are a helpful doctor"},
            {"role": "user", "content": prompt_bhc},
        ]
        
        final_prompt_di = tokenizer.apply_chat_template(messages_di, tokenize=False)
        final_prompt_bhc = tokenizer.apply_chat_template(messages_bhc, tokenize=False)
        pre_prompts_di.append(final_prompt_di)
        pre_prompts_bhc.append(final_prompt_bhc)
        
        dict_to_csv['hadm_id'].append(hadm_id)
        dict_to_csv['expected_discharge_instruction'].append(test_phase_2_discharge_target.loc[test_phase_2_discharge_target['hadm_id'] == row['hadm_id'], 'discharge_instructions'].iloc[0])
        dict_to_csv['expected_brief_hospital_course'].append(test_phase_2_discharge_target.loc[test_phase_2_discharge_target['hadm_id'] == row['hadm_id'], 'brief_hospital_course'].iloc[0])
        if len(pre_prompts_di) == 16 and len(pre_prompts_bhc) == 16:
            resp = pipeline.generate(pre_prompts_di, sampling_params)
            assert len(resp) == len(pre_prompts_di)
            for r in resp:
                dict_to_csv['generated_discharge_instruction'].append(r.outputs[0].text)

            pre_prompts_di = []
            
            resp = pipeline.generate(pre_prompts_bhc, sampling_params)
            assert len(resp) == len(pre_prompts_bhc)
            for r in resp:
                dict_to_csv['generated_brief_hospital_course'].append(r.outputs[0].text)
            pre_prompts_bhc = []
    # outside loop - the leftover prompts needs to be taken care of
    if pre_prompts_di and pre_prompts_bhc:
        resp = pipeline.generate(pre_prompts_di, sampling_params)
        assert len(resp) == len(pre_prompts_di)
        for r in resp:
            dict_to_csv['generated_discharge_instruction'].append(r.outputs[0].text)
        
        resp = pipeline.generate(pre_prompts_bhc, sampling_params)
        assert len(resp) == len(pre_prompts_bhc)
        for r in resp:
            dict_to_csv['generated_brief_hospital_course'].append(r.outputs[0].text)
        
    return dict_to_csv

In [None]:
train_valid_discharge = load_train_valid_discharge()
test_discharge = load_test_discharge()

retrieved_hadm_id = load_retrieved_hadm_id()
test_sample = test_discharge

In [None]:
from tqdm.auto import tqdm
output = generate_response_test_no_rag(model, terminators, test_sample, retrieved_hadm_id, train_valid_discharge, test_phase_2_discharge_target)

In [None]:
print(len(output['hadm_id']))
print(len(output['generated_discharge_instruction']))
print(len(output['expected_discharge_instruction']))
print(len(output['generated_brief_hospital_course']))
print(len(output['expected_brief_hospital_course']))

In [None]:
df_output = pd.DataFrame.from_dict(output)
df_output.to_csv('/nethome/vietdq/Projects/output_one_shot_without_rag_llama-3-1-70b-all-test.csv')

In [None]:
def generate_response_test_rag_corrected(pipeline, terminator, test_sample, test_retrieved_hadm_id_tv, train_valid_discharge, test_phase_2_discharge_target) -> dict:

    dict_to_csv = {}
    dict_to_csv['hadm_id'] = []
    dict_to_csv['expected_discharge_instruction'] = []
    dict_to_csv['generated_discharge_instruction'] = []
    dict_to_csv['corrected_discharge_instruction'] = []

    dict_to_csv['expected_brief_hospital_course'] = []
    dict_to_csv['generated_brief_hospital_course'] = []
    dict_to_csv['corrected_brief_hospital_course'] = []
    
    pre_prompts_di, pre_prompts_bhc, correcting_prompts_di, correcting_prompts_bhc = [], [], [], []

    for index, row in tqdm(test_sample.iterrows(), total=test_sample.shape[0]):
        hadm_id = row['hadm_id']
        # if (samples_with_length_smaller_8192['hadm_id'].isin([hadm_id]).any()):
    
        retrieved_hadm_id = test_retrieved_hadm_id_tv.loc[hadm_id == test_retrieved_hadm_id_tv['hadm_id'], 'retrieved_hadm_id'].iloc[0]
        sample_input_data  = train_valid_discharge.loc[train_valid_discharge['hadm_id'] == retrieved_hadm_id]

        sample_output_di = sample_input_data['discharge_instructions']
        sample_output_bhc = sample_input_data['brief_hospital_course']
        del sample_input_data['discharge_instructions']
        del sample_input_data['brief_hospital_course']
        del sample_input_data['json']

        sample_input_data.loc[:, 'json'] = sample_input_data.apply(lambda x: x.to_json(), axis=1)
        sample_input = str(sample_input_data['json'])

        query_input = str(row['json'])

        prompt_di = f'''
        ###Instructions###
        Generate `DISCHARGE INSTRUCTION` for the patient based on the following data:{query_input}. \n

        The `DISCHARGE INSTRUCTION` must provide actionable information to help them take care of themselves when they leave the hospital. \n

        ###Example###
        The following is just a sample data, containing an `EXAMPLE_DISCHARGE INSTRUCTION` for the `EXAMPLE_INPUT`. \n `EXAMPLE_INPUT`: {sample_input} `EXAMPLE_DISCHARGE INSTRUCTION`: {sample_output_di} \n
    

        ###Content###
        The `DISCHARGE INSTRUCTION` of each patient of ‘subject_id’ and ‘hadm_id’ should be an overview of the conditions of the corresponding patient. \n

        The content of `DISCHARGE INSTRUCTION` must contain information from ‘discharge_summary’.\n

        Look into ‘history_present_illness’, ‘past_medical_history’ and ‘discharge_medication’ sentence by sentence to extract information on diagnosis and treatments administered. \n

        Avoid using any place holders. \n

        Avoid using '[insert diagnosis(s) e.g., pneumonia, heart failure]', instead provide specific diagnoses and treatments. \n

        The `DISCHARGE INSTRUCTION` must be written in simple terms. \n

        Provide information about medical jargon \n

        Start your response with: `Dear Ms. /Mr` followed by `____`, which is based on the ‘gender’ from the `EXAMPLE_INPUT`. \n

        Give an answer to a given question in a natural, human-like manner. \n

        Avoid adding any personal information about the patient such as name and date of birth. \n'''
        
        prompt_bhc = f'''
        ###Instructions###
        Generate `BRIEF HOSPITAL COURSE` for the patient based on the following data:{query_input}. \n

        The `BRIEF HOSPITAL COURSE` must be brief overview of what happened during a patient's stay in the hospital. \n

        ###Example###
        The following is just a sample data, containing an `EXAMPLE_BRIEF HOSPITAL COURSE` for the `EXAMPLE_INPUT`. \n `EXAMPLE_INPUT`: {sample_input} `EXAMPLE_BRIEF HOSPITAL COURSE`: {sample_output_bhc} \n
        
        ###Content###
        The `BRIEF HOSPITAL COURSE` of each patient of ‘subject_id’ and ‘hadm_id’ must be brief overview of the treatment provided and patient's response during a patient's stay in the hospital. \n

        The content of `BRIEF HOSPITAL COURSE` must contain details about the treatments or procedures the patient underwent, how the patient responded to these treatments, and any significant changes in the patient's health condition.\n
        
        Look into ‘history_present_illness’ and ‘past_medical_history’ to extract information on diagnosis, treatments administered, and discharge medications. \n
        
        The `BRIEF HOSPITAL COURSE` must be written in medical terms so that doctors can understand about the overall care of a patient. \n
        
        Avoid using any place holders. \n
        
        Avoid using '[insert diagnosis(s) e.g., pneumonia, heart failure]', instead provide specific diagnoses and treatments. \n
        
        Provide information about medical jargon. \n
        
        Start your response with: `Mr/Ms. ____ was` , which is based on the ‘gender’ from the `EXAMPLE_INPUT`. \n

        Avoid adding any personal information about the patient such as name and date of birth. \n
        '''

        messages_di = [
            {"role": "system", "content": "You are a helpful doctor"},
            {"role": "user", "content": prompt_di},
        ]
        messages_bhc = [
            {"role": "system", "content": "You are a helpful doctor"},
            {"role": "user", "content": prompt_bhc},
        ]

        final_prompt_di = tokenizer.apply_chat_template(messages_di, tokenize=False)
        final_prompt_bhc = tokenizer.apply_chat_template(messages_bhc, tokenize=False)
        pre_prompts_di.append(final_prompt_di)
        pre_prompts_bhc.append(final_prompt_bhc)

        correcting_prompt_di = f'''
        ###Instructions###
        Generate a `CORRECTED DISCHARGE INSTRUCTION` with symptoms for the patient based on the `GENERATED DISCHARGE INSTRUCTION` {{generated_di}} of the following data:{query_input}. \n

        The `CORRECTED DISCHARGE INSTRUCTION` is an information-corrected summary of the patient's medical condition. \n

        ###Example###
        The information about the patient's medical condition in the `GENERATED DISCHARGE INSTRUCTION` {{generated_di}} may not be correct.\n

        ###Content###
        The `CORRECTED DISCHARGE INSTRUCTION` of each `GENERATED DISCHARGE INSTRUCTION` should be an overview of the conditions of the corresponding patient. \n

        Look into 'History of Present Illness': 'history_present_illness' and 'Past Medical History': 'past_medical_history' to correct the false information on diagnosis, treatments administered, and discharge medications of the `GENERATED DISCHARGE INSTRUCTION. \n

        The `GENERATED DISCHARGE INSTRUCTION` may contain 'Discharge Medications' and is sometimes not needed in the 'DISCHARGE INSTRUCTION'. \n
        Check again the 'sample_input' to see if the 'sample_output' has 'Discharge Medications'. \n

        Avoid using any placeholders. \n

        Avoid using '[insert diagnosis(s) e.g., pneumonia, heart failure]', instead provide specific diagnoses and treatments. \n

        '''

        correcting_prompt_bhc = f'''
        ###Instructions###
        Generate a `CORRECTED BRIEF HOSPITAL COURSE` with symptoms for the patient based on the `GENERATED BRIEF HOSPITAL COURSE` {{generated_bhc}} of the following data:{query_input}. \n

        The `CORRECTED BRIEF HOSPITAL COURSE` is a brief overview of what happened during a patient's stay in the hospital. \n

        ###Example###
        The information about the patient's medical condition in the `GENERATED BRIEF HOSPITAL COURSE` {{generated_bhc}} may not be correct.\n

        ###Content###
        The `CORRECTED BRIEF HOSPITAL COURSE` of each `GENERATED BRIEF HOSPITAL COURSE` should be an overview of the conditions of the corresponding patient. \n

        The `CORRECTED BRIEF HOSPITAL COURSE` must be written in medical terms so that doctors can understand a patient's overall care. \n

        Look into 'History of Present Illness': 'history_present_illness' and 'Past Medical History': 'past_medical_history' to correct any false information on diagnosis, treatments administered, and discharge medications of the `GENERATED DISCHARGE INSTRUCTION. \n

        Avoid using any placeholders. \n

        Avoid using '[insert diagnosis(s) e.g., pneumonia, heart failure]', instead provide specific diagnoses and treatments. \n

        '''
        
        correcting_prompts_di.append(correcting_prompt_di)
        correcting_prompts_bhc.append(correcting_prompt_bhc)

        # print(len(pre_prompts_di), len(pre_prompts_bhc), len(correcting_prompts_di), len(correcting_prompts_bhc))

        dict_to_csv['hadm_id'].append(hadm_id)
        dict_to_csv['expected_discharge_instruction'].append(test_phase_2_discharge_target.loc[test_phase_2_discharge_target['hadm_id'] == row['hadm_id'], 'discharge_instructions'].iloc[0])
        dict_to_csv['expected_brief_hospital_course'].append(test_phase_2_discharge_target.loc[test_phase_2_discharge_target['hadm_id'] == row['hadm_id'], 'brief_hospital_course'].iloc[0])
        if len(pre_prompts_di) == 16 and len(pre_prompts_bhc) == 16 and len(correcting_prompts_di) == 16 and len(correcting_prompts_bhc) == 16:
            resp = pipeline.generate(pre_prompts_di, sampling_params)
            assert len(resp) == len(pre_prompts_di)
            corrs_dis = []
            for idx, r in enumerate(resp):
                dict_to_csv['generated_discharge_instruction'].append(r.outputs[0].text)
                
                corr_di = correcting_prompts_di[idx].replace('{generated_di}', r.outputs[0].text)
                correcting_msg_di = [{"role": "system", "content": "You are a helpful doctor"},
                                         {"content": corr_di, "role": "user"}]
                correcting_p_di = tokenizer.apply_chat_template(correcting_msg_di, tokenize=False)
                corrs_dis.append(correcting_p_di)

            
            
            resp = pipeline.generate(pre_prompts_bhc, sampling_params)
            assert len(resp) == len(pre_prompts_bhc)
            corrs_bhc = []
            for r in resp:
                dict_to_csv['generated_brief_hospital_course'].append(r.outputs[0].text)

                corr_bhc = correcting_prompts_di[idx].replace('{generated_bhc}', r.outputs[0].text)
                correcting_msg_bhc = [{"role": "system", "content": "You are a helpful doctor"},
                                         {"content": corr_bhc, "role": "user"}]
                correcting_p_bhc = tokenizer.apply_chat_template(correcting_msg_bhc, tokenize=False)
                corrs_bhc.append(correcting_p_bhc)
            
            
            resp = pipeline.generate(corrs_bhc, sampling_params)
            assert len(resp) == len(corrs_bhc)
            for r in resp:
                dict_to_csv['corrected_brief_hospital_course'].append(r.outputs[0].text)

            resp = pipeline.generate(corrs_dis, sampling_params)
            assert len(resp) == len(corrs_dis)
            for r in resp:
                dict_to_csv['corrected_discharge_instruction'].append(r.outputs[0].text)
                

            pre_prompts_di, pre_prompts_bhc, correcting_prompts_di, correcting_prompts_bhc = [],[],[],[]

    
    # outside loop - the leftover prompts needs to be taken care of
    if pre_prompts_di and pre_prompts_bhc:
        resp = pipeline.generate(pre_prompts_di, sampling_params)
        assert len(resp) == len(pre_prompts_di)
        corrs_dis = []
        for idx, r in enumerate(resp):
            dict_to_csv['generated_discharge_instruction'].append(r.outputs[0].text)
            
            corr_di = correcting_prompts_di[idx].replace('{generated_di}', r.outputs[0].text)
            correcting_msg_di = [{"role": "system", "content": "You are a helpful doctor"},
                                     {"content": corr_di, "role": "user"}]
            correcting_p_di = tokenizer.apply_chat_template(correcting_msg_di, tokenize=False)
            corrs_dis.append(correcting_p_di)

        
        
        resp = pipeline.generate(pre_prompts_bhc, sampling_params)
        assert len(resp) == len(pre_prompts_bhc)
        corrs_bhc = []
        for r in resp:
            dict_to_csv['generated_brief_hospital_course'].append(r.outputs[0].text)

            corr_bhc = correcting_prompts_di[idx].replace('{generated_bhc}', r.outputs[0].text)
            correcting_msg_bhc = [{"role": "system", "content": "You are a helpful doctor"},
                                     {"content": corr_bhc, "role": "user"}]
            correcting_p_bhc = tokenizer.apply_chat_template(correcting_msg_bhc, tokenize=False)
            corrs_bhc.append(correcting_p_bhc)
        
        
        resp = pipeline.generate(corrs_bhc, sampling_params)
        assert len(resp) == len(corrs_bhc)
        for r in resp:
            dict_to_csv['corrected_brief_hospital_course'].append(r.outputs[0].text)

        resp = pipeline.generate(corrs_dis, sampling_params)
        assert len(resp) == len(corrs_dis)
        for r in resp:
            dict_to_csv['corrected_discharge_instruction'].append(r.outputs[0].text)
        
    return dict_to_csv

In [None]:
train_valid_discharge = load_train_valid_discharge()
test_discharge = load_test_discharge()

retrieved_hadm_id = load_retrieved_hadm_id()
test_sample = test_discharge.sample(n=3200, random_state=42)

In [None]:
output = generate_response_test_rag_corrected(model, terminators, test_sample, retrieved_hadm_id, train_valid_discharge, test_phase_2_discharge_target)

In [None]:
print(len(output['hadm_id']))
print(len(output['generated_discharge_instruction']))
print(len(output['expected_discharge_instruction']))
print(len(output['generated_brief_hospital_course']))
print(len(output['expected_brief_hospital_course']))

In [None]:
df_output = pd.DataFrame.from_dict(output)
df_output.to_csv('/nethome/vietdq/Projects/output_one_shot_rag_corrected_llama-3-1-70b-all-test.csv')