In [49]:
import os
import openai
import json
import random
import time
import re
import pickle
import traceback
from transformers import LlamaTokenizer
from tqdm import tqdm
import pandas as pd
openai.api_type = "azure"
openai.api_base = "https://test-openai-celesh.openai.azure.com/"
openai.api_version = "2023-03-15-preview"
openai.api_key = "24a7f6c60aed4aae9c9729229709857b"

## Prompt

In [50]:
inst_prompt = """

\"\"\"
{note}
\"\"\"

Using above patient's discharge summary, you want to ask a well-trained model to help your clinical decision making.
Below are high-level NLP categories that you might want to ask about.
Tell me {num_inst_per_note} different instructions that you will ask to the model.

Tasks :

\"\"\"
{tasks}
\"\"\"

Here are requirements:
1. The words used in the instructions should be diverse to maximize the diversity.
2. A GPT language model should be able to complete the instruction. For example, do not ask the model to create other than any textual output.
3. The question type should be diverse such as one-word answer question, open-ended question, multiple-choice question, and yes/no question.
4. Concat the used NLP categories on front the questions such as "" (Text Classification) 'The question' "".
"""

ans_prompt = """

\"\"\"
{note}
\"\"\"

Using above patient's discharge summary, answer to the following questions.
The question is given with their own categoreis, with colon-concatenated form.

Questions:

\"\"\"
{questions}
\"\"\"

Here are requirements:

1. Note that each question is independent.
2. Number your responses.
3. Ensure that each answer is complete and does not raise another question.
4. Answers can span multiple lines if needed.
5. Answer all questions!
6. Standardize the terminology used to describe the patient's condition?
7. The output must be in JSON format, Do not add additional information which is not in the JSON format to the output.
8. Do not anwser additional questions that are not asked.

format the output in JSON format (with only one key "qas")：

├── "qas"
  └── [i]
      ├── "answers"
      │   └── [j]
      │       ├── "answer_start"
      │       │
      │       └── "text"
      │ 
      ├── "id" 
      │ 
      └── "question"


The top-level key is "qas", which has a value that is a list of elements, denoted by [i]. Each element in the list is an object with three keys: "answers", "id", and "question".

The "answers" key has a value that is a list of elements, denoted by [j]. Each element in the list is an object with two keys: "answer_start" and "text".

If there are multiple answers or the answers have multiple sections or the answers have multiple sentences in parallel such as `1..., 2..., 3...` ,`A..., B...,C....`,`1)..., 2)..., 3)....` and so on, make sure to divide the whole answer into multiple "answer_start"-"text" pairs under the "answers" key, such as

── "answers"
   └── [0]
   |   ├── "answer_start"
   |   │
   |   └── "text": section 1 of the answer or answer 1
   └── [1]
   |   ├── "answer_start"
   |   │
   |   └── "text": section 2 of the answer or answer 2
   └── [2]
       ├── "answer_start"
       │
       └── "text": section 3 of the answer or answer 3
       

The "answer_start" key has a value representing the index of the first character of the answer string in the context.  If you can not find the answer in the context, set the value to -1.

The "text" key has a value representing the answer text which contain some information and can't be empty.

The "id" key has a value representing the identifier of the question. It can be found in the begining of each question.

"question": This is a string representing the question being asked. Just copy the input question under this key. Include the NLP categories on front in this format "" (Text Classification) 'The question' "".
"""

tasks = [
    "Question Answering",
    "Natural Language Generation",
    "Text Summarization",
    "Text Classification",
    "Temporal Information Extraction",
    "Relation Extraction",
    "Named Entity Recognition",
    "Paraphrasing",
    "Clinical Concept Normalization",
    "Keyword Extraction",
    "Abbreviation expansion",
    "Feature extraction",
    "Content extraction"
]

In [51]:
question_list = [
    
    "(Text Summarization) What is the patient's primary diagnosis?",
    
    "(Content extraction) Which sentences in the note describe the diagnosis?",
    
    "(Text Summarization) What are the comorbidities of the patient?",
    
    "(Content extraction) Which sentences in the note describe the comorbidities?",
    
    "(Named Entity Recognition) List all the medications mentioned in the discharge summary.",
    
    "(Content extraction) Which sentences in the note describe the medications?",

    "(Named Entity Recognition) List all the labs mentioned in the discharge summary.",

    "(Content extraction) Which sentences in the note describe the labs?",
    
    "(Text Summarization) Provide a brief summary of the history treatment plan for the patient?",

    "(Content extraction) Which sentences in the note describe the history treatment plan for the patient?",
    
    "(Text Summarization) Provide a brief summary of the future treatment plan for the patient?",

    "(Content extraction) Which sentences in the note describe the future treatment plan for the patient?",
    
    "(Feature extraction) What medications are the patient currently on?",
    
    "(Text Summarization) List all the diagnoses and determine whether they are from recent onsite visits or past medical history."
    
]

question_list = [
    
    "(Text Summarization) Provide a brief summary of the history treatment plan for the patient?",

    "(Content extraction) Which sentences in the note describe the history treatment plan for the patient?",
    
    "(Text Summarization) Provide a brief summary of the future treatment plan for the patient?",

    "(Content extraction) Which sentences in the note describe the future treatment plan for the patient?"
    
]
print(len(question_list))

4


In [52]:
## Functions to generate prompts 
def generate_inst_prompt(note, num_inst_per_note, sample = True):
    if sample:
        selected_tasks = random.sample(tasks, num_inst_per_note)
    else: selected_tasks = tasks
    
    return [{"role":"system","content":"You are a healthcare professional."},
        {
            "role": "user",
            "content": inst_prompt.format(
                note=note,
                num_inst_per_note=num_inst_per_note,
                tasks="\n".join(selected_tasks),
            ),
        }
    ]

def generate_ans_prompt(note, questions):
    return [
        {"role":"system","content":"You are a healthcare professional."},
        {"role": "user", "content": ans_prompt.format(note=note, questions=questions)}
    ]
    
def generate_con_prompt():
    return [
        {"role":"system","content":"You are a healthcare professional."},
        {"role": "user", "content": "Continue"}
    ]

## Functions to get completion
def get_completion(message,temperature=0.6):
    num_tries = 0
    max_num_tries = 10

    while num_tries < max_num_tries:
        try:
            print("try get_completion...")
            response = openai.ChatCompletion.create(
                engine="chat-test",
                messages = message,
                temperature=temperature,
                max_tokens=4096,
                top_p=0.95,
                frequency_penalty=0,
                presence_penalty=0,
                stop=None)

        except:
            num_tries += 1
            print(f"Error occurred in get_completion(), retrying (try {num_tries})...")
            time.sleep(5)
            continue
        
        print(f"Success! get_completion() completed. tried {num_tries}")     
        break
    
    if num_tries == max_num_tries:
        print("⚠Error: maximum number of tries reached in function get_completion().") 
        return " "
     
    return response["choices"][0]["message"]["content"]

## Functions to process the questions
def add_question_list(questions, question_list):
    num = len(question_list);temp = ""
    for i in range(len(question_list)):
        temp += str(i + 1) + ". " + question_list[i]+ "\n"
    questions = temp + add_numbers(questions,num)
    return questions

def split_questions(questions,m):
    question_list = re.findall(r'\d+\.\s+[^\d]+', questions)
    question_str = ""
    question_str_list = []
    for i in range(len(question_list)):
        if i % m == 0 and i != 0:
            question_str_list.append(question_str)
            question_str = ""
        question_str += question_list[i] + "\n"
    question_str_list.append(question_str)
    return question_str_list

def find_substring(string):
    start = string.find("{")
    end = string.rfind("}")
    return string[start:end+1]

def add_numbers(string,num):
    pattern = r'\b(\d+)\b'
    replacement = lambda match: str(int(match.group(1)) + num)
    result = re.sub(pattern, replacement, string)
    return result

## Functions to process the data
def DataClean(data,labels_path,data_path=None,write_path=None,delete=None):
    
    print('\n\nImporting and filtering database...')
    
    if data is not None and not data.empty:
        notes = data
    else:
        notes = pd.read_csv(data_path)
        
    ints_str = '0123456789-#[]' # characters that usually main categories don't start with
    
    print('\n\nSplitting each note into sections:\n\n')
    
    notes_sections = {}
    
    for note_index in tqdm(range(notes.shape[0])):
        note = notes['text'][note_index].replace('\n\n\n\n','\n').replace('\n\n\n','\n').replace('     ','\n')
        paragraphs = note.split('\n')
        
        subsections, new_section = [], ' '
        for p in paragraphs:
            line = p.strip()
            if len(line)>0 and ':' in line and not (line[line.find(':')-1] in ints_str) and not(line[0] in ints_str):
                subsections.append([new_section.strip()])
                new_section = p + ' '
            else:
                new_section += p + ' '
        subsections.append([new_section])
        subsections.pop(0)

        note_sect_tit,note_sect_par = [],[]
        for sect in subsections:
            note_sect_tit += [str(*sect)[0:str(*sect).find(':')]]
            note_sect_par += [str(*sect)[str(*sect).find(':')+1:].strip()]
        note_df = pd.DataFrame({'title':note_sect_tit,'category':'','text':note_sect_par, 'label':''})
        notes_sections[notes['note_id'][note_index]] = note_df
    
    f = open(labels_path, 'r')
    obj_label = f.readlines()
    obj_label_dict = {}
    i = 0
    for s in obj_label:
        i += 1
        if '/' in s:
            buffer = s.strip('\n').lower().split('/')
            for item in buffer:
                obj_label_dict[item] = i
        else:
            obj_label_dict[s.strip('\n').lower()] = i
    f.close()

    for key in tqdm(list(notes_sections.keys())):
        buffer = 'begin_title'
        t = list(notes_sections[key]['title'])
        for idx in range(len(t)):
            for item in list(obj_label_dict.keys()):
                if item in t[idx].lower() and len(t[idx].lower())>2:
                    buffer = item
                    notes_sections[key]['category'][idx] = buffer
                    notes_sections[key]['label'][idx] = obj_label_dict[buffer]
                    break
            notes_sections[key]['category'][idx] = buffer
            notes_sections[key]['label'][idx] = obj_label_dict[buffer]

    notes_sections_output = {}
    row_id  = notes_sections.keys()
    for key in tqdm(row_id):
        buffer = ''
        note_sect_tit, note_sect_par, note_sect_lab = [], [], []
        for i in range(len(notes_sections[key]['category'])):
            if buffer != notes_sections[key]['category'][i]:
                buffer = notes_sections[key]['category'][i]
                note_sect_tit.append(buffer)
                note_sect_lab.append(notes_sections[key]['title'][i])
                note_sect_par.append(notes_sections[key]['text'][i])
                # if buffer == 'followup instruction' or buffer == 'follow up' or buffer == 'follow-up':
                #     break
            else:
                note_sect_par[-1] = note_sect_par[-1] + ' ' + notes_sections[key]['title'][i] + ' ' + notes_sections[key]['text'][i]
        note_df = pd.DataFrame({'title': note_sect_tit, 'text': note_sect_par, 'label': note_sect_lab})
        notes_sections_output[key] = note_df


    notes_sections = notes_sections_output
    
    if delete != None:
        for key,value in notes_sections.items():
            notes_sections[key] = notes_sections[key][~notes_sections[key]['label'].isin(delete)]

    if write_path != None:
        f = open(write_path,"wb")
        pickle.dump(notes_sections,f)
        f.close()
    
    return notes_sections

## Import Data

In [53]:
df = pd.read_csv('mimiciv_discharge.csv')


Unnamed: 0,note_id,subject_id,hadm_id,note_type,note_seq,charttime,storetime,text
0,10034049-DS-19,10034049,20053563,DS,19,2158-03-02 00:00:00,2158-03-02 18:36:00,\nName: ___ Unit No: ___\n \nAd...
1,10034049-DS-20,10034049,22185456,DS,20,2158-04-06 00:00:00,2158-04-06 16:12:00,\nName: ___ Unit No: ___\n \nAd...
2,10053872-DS-7,10053872,21627804,DS,7,2162-06-23 00:00:00,2162-06-27 20:01:00,\nName: ___ Unit No: __...
3,10071869-DS-19,10071869,20895291,DS,19,2111-01-12 00:00:00,2111-01-12 02:41:00,\nName: ___ Unit No: _...
4,10099592-DS-14,10099592,26777174,DS,14,2135-04-28 00:00:00,2135-04-29 09:05:00,\nName: ___ Unit No: ___\n...
5,10002131-DS-16,10002131,24065018,DS,16,2128-03-19 00:00:00,2128-03-19 16:38:00,\nName: ___ Unit No: __...
6,10015785-DS-16,10015785,23058424,DS,16,2150-05-13 00:00:00,2150-05-13 17:09:00,\nName: ___ Unit No: __...
7,10015785-DS-17,10015785,23958054,DS,17,2150-12-09 00:00:00,2150-12-09 14:50:00,\nName: ___ Unit No: __...
8,10020640-DS-8,10020640,27984218,DS,8,2153-02-20 00:00:00,2153-02-20 18:10:00,\nName: ___ Unit No: ___...
9,10024913-DS-19,10024913,27058919,DS,19,2162-11-15 00:00:00,2162-11-20 11:57:00,\nName: ___ ___ No: ___...


In [54]:
df = pd.read_csv('mimiciv_discharge.csv')
# df = df.loc[[3,8,12,9,5,19],].reset_index(drop=True)
# df = df.loc[[8,12,9,5,19],].reset_index(drop=True)
df = df.loc[[3],].reset_index(drop=True)

tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-13b-hf")
length = df['text'].apply(lambda x: len(tokenizer.tokenize(x)))

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


In [55]:
length

0    5974
Name: text, dtype: int64

In [60]:
NotesSections = DataClean(data=df,labels_path="labels.txt",write_path="notes.pkl",delete=['Name','Admission Date','Discharge Date','Date of Birth','Followup Instructions'])



Importing and filtering database...


Splitting each note into sections:




100%|██████████| 1/1 [00:00<00:00, 415.61it/s]
100%|██████████| 1/1 [00:00<00:00, 64.14it/s]
100%|██████████| 1/1 [00:00<00:00, 889.75it/s]


In [57]:
def SectionSelect(data):
    
    data['temp'] = data['label'] + ": " + data['text']
    
    while len(tokenizer.tokenize(' '.join(data['temp']))) > 1024:
        random_index = random.randint(0, len(data) - 1)
        data = data.drop(data.index[random_index])

    return ' '.join(data['temp'])

In [41]:
data = {}
data["version"] = "mimic4"
data["data"] = []
rep = 1

for row,key in enumerate(NotesSections.keys()):
    
    time.sleep(5)
    
    print("========================================= row:", row,"rep:",rep,"=========================================")
    
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
    
    ## Split the note and select the section
    note = SectionSelect(NotesSections[key])
    
    # n = 16
    
    # question_message = generate_inst_prompt(note, num_inst_per_note = n, sample = False)
    # questions = get_completion(question_message)

    
    # questions = add_question_list(questions, question_list)
    
    question_str_list = question_list
    
    # print("```")
    # print(questions)
    # print("```")
    
    m = 1
    # question_str_list = split_questions(questions, m)
    # print(len(question_str_list))
    
    new_data = {}
    new_data["title"] = df.iloc[row,0]
    new_data["paragraphs"] = [{"context": df.iloc[row,-1], "qas": []}]

    for question_str in (question_str_list):
        
        num_tries = 0
        max_num_tries = 20

        while num_tries < max_num_tries:
            try:
                answer_message = generate_ans_prompt(note, question_str)
                answers = get_completion(answer_message)
                print("-----------------------------------------")
                print(answers)
                print("-----------------------------------------")
                new_qas = json.loads(answers)

            except Exception as e:
                
                # print("-----------------------------------------")
                # traceback.print_exc()
                # print(f"    Error:",e)
                # print("-----------------------------------------")
                
                if "Expecting property name enclosed in double quotes" in str(e):
                    
                    message = generate_con_prompt()
                    temp = get_completion(answer_message)
                    answers = answers + temp
                    
                    print(answers)
                
                else: 
                    
                    num_tries += 1
                    
                    print("-----------------------------------------")
                    print(f"    Error occurred in json.loads(), retrying (try {num_tries})...")
                    print("-----------------------------------------")
                    
                    print("-----------------------------------------")
                    print(answers)
                    print("-----------------------------------------")
                    continue
                
            print(f"    Success! json.loads() completed. tried {num_tries}")
            
            if len(new_qas["qas"]) != m:
                
                print(f"    Error! Answer additional questions. {len(new_qas['qas'])} questions generated, {m} expected. Retrying...")
                
                continue
            
            for foo in new_qas["qas"]:
                new_data["paragraphs"][0]["qas"].append(foo)
            
            print(len(new_data["paragraphs"][0]["qas"]))
                
            break
        
        
        if num_tries == max_num_tries:
            print("    ⚠Error: maximum number of tries reached in function json.loads().")
            print(question_str)
            print(answers)
        
    data["data"].append(new_data)
    print(len(new_data['paragraphs'][0]['qas']))
    print("\n")
    

    with open('tmp5.json', 'w') as f:      
        json.dump(data, f, indent=2)


2023-05-26 22:30:57
try get_completion...
Success! get_completion() completed. tried 0
-----------------------------------------
{
  "qas": [
    {
      "answers": [
        {
          "answer_start": 130,
          "text": "The patient is an elderly man with a history of CHF, Afib, Enterococcus Bacteremia and Osteomyelitis on Ampicillin, Rheumatoid Arthritis on Prednisone, Myelodysplastic syndrome, H/O GI Bleed d/t NSAIDs, H/O Prostate CA, and H/O Left TKR. He presented to the hospital due to respiratory distress and was found to have Acute hypoxemic respiratory failure secondary to parainfluenza 3 pneumonia, Acute on chronic diastolic heart failure, Deep vein thrombosis, Atrial fibrillation, Rheumatoid arthritis, and Osteomyelitis."
        }
      ],
      "id": "(Text Summarization) Provide a brief summary of the history treatment plan for the patient?",
      "question": "(Text Summarization) Provide a brief summary of the history treatment plan for the patient?"
    }
  ]
}
---

In [42]:
for row,key in enumerate(NotesSections.keys()):
    print(key)

10071869-DS-19


In [62]:
NotesSections['10071869-DS-19']

Unnamed: 0,title,text,label
4,sex,M,Sex
5,service,MEDICINE,Service
6,allergies,No Known Allergies / Adverse Drug Reactions,Allergies
7,attending,___.,Attending
8,chief complaint,hypoxemia,Chief Complaint
9,major surgical or invasive procedure,none,Major Surgical or Invasive Procedure
10,history of present illness,"___ who has chart history of dementia, recent ...",History of Present Illness
11,past medical history,- CHF (unclear type or EF) - Afib on warfarin ...,Past Medical History
12,social history,___,Social History
13,family history,"-- ""heart problems"" per wife in ___ family",Family History


In [None]:
import json
note = SectionSelect(NotesSections['10099592-DS-14'])
question_str = "(Text Summarization) List all the diagnoses and determine whether they are from recent onsite visits or past medical history.  If the answers have multiple sections in parallel, make sure to divide the whole answer into multiple answer_start-text pairs under the answers part."
answer_message = generate_ans_prompt(note, question_str)
answers = get_completion(answer_message)

KeyError: '10099592-DS-14'

In [58]:
answer_message = [{'role': 'system', 'content': 'You are a healthcare professional.'}, {'role': 'user', 'content': '\n\n"""\nSex: F Service: MEDICINE Allergies: Patient recorded as having No Known Allergies to Drugs Attending: ___. Chief Complaint: Bright red blood per rectum Major Surgical or Invasive Procedure: None History of Present Illness: The patient is a ___ with a history of HTN, rheumatoid arthritis  on methotrexate and chronic NSAID, DM type II, dementia  secondary to ___ Body disease who presents to ED following 2  episodes of painless BRBPR yesterday. She reports using the  toilet to urinate yesterday when had episode of painless bright  red blood per rectum independent of bowel movement. This was  followed some time later by a smaller episode of BRBPR. She  denied dizziness, palpitations, chest pain, DOE. She endorses an  approximately ___ pound weight loss over the past 6 months,  which she attributes to knee surgery rehabilitation. She denies  any change in bowel movements recently. She denies any history  of melana, previous BRBPR, hematemesis. Her most recent  colonoscopy was performed in ___, notable for internal  hemorrhoids.  . In the ED, initial VS 150/70 HR: 62 RR18 O2:100%RA. Her  hematocrit was 26.9 (baseline low ___. She was gauiac positive  on exam. 2PIV were placed. She was given 1L NS and 80mg IV  protonix.  .   Currently, she is without complaint. She denies  dizziness/CP/abdominal pain/SOB/N/V.  . ROS Denies fever, chills, night sweats, headache, rhinorrhea,  congestion, sore throat, cough, shortness of breath, chest pain,  abdominal pain, nausea, vomiting, diarrhea, constipation,   melena, hematochezia, dysuria, hematuria. Past Medical History: - Diabetes Type II (recently D/C metformin, HBA1C 5.4) - Rheumatoid Arthritis - Hypertension - Hyperlipidemia - ___ Body Dementia - Internal hemorrhoids - Hypesterectomy - Right knee arthroscopy - Right breast lumpectomy Social History: ___ Family History: NC Physical Exam:  GENERAL awake, alert, NAD HEENT anicteric, moist mucus membranes NECK JVP flat CARDIAC S1, S2 II/VI systolic murmur RUSB LUNG respirations unlabored, decreased BS left base, no  wheezes ABDOMEN surgical scar midline, soft, NTND, no gaurding EXT RLE swelling > LLE, painful to palpation NEURO AOx3, CNII-XII intact, strength ___ bilaterally Pertinent Results: ___ 12:10PM BLOOD WBC-6.3 RBC-2.63* Hgb-8.8* Hct-26.9*  MCV-102* MCH-33.5* MCHC-32.9 RDW-15.8* Plt ___ ___ 06:45PM BLOOD Hct-23.9* ___ 06:10AM BLOOD WBC-4.8 RBC-2.92* Hgb-9.1* Hct-28.9*  MCV-99* MCH-31.3 MCHC-31.7 RDW-16.8* Plt ___ ___ 03:20PM BLOOD Hct-29.3* ___ 06:15AM BLOOD WBC-4.0 RBC-3.04* Hgb-10.0* Hct-29.9*  MCV-99* MCH-33.0* MCHC-33.5 RDW-16.5* Plt ___ ___ 03:00PM BLOOD Hct-30.6* ___ 09:00AM BLOOD WBC-5.8 RBC-3.31* Hgb-10.8* Hct-32.5*  MCV-98 MCH-32.5* MCHC-33.2 RDW-16.7* Plt ___ ___ 12:10PM BLOOD Neuts-52.2 ___ Monos-8.3 Eos-4.9*  Baso-0.9 ___ 12:10PM BLOOD Glucose-94 UreaN-33* Creat-1.4* Na-144  K-4.1 Cl-112* HCO3-24 AnGap-12 ___ 12:10PM BLOOD ALT-13 AST-19 AlkPhos-71 TotBili-0.3 ___ 12:10PM BLOOD Albumin-3.6 Calcium-8.9 Phos-3.6 Mg-2.1 ___ 06:10AM BLOOD VitB12-289 Folate-6.0 ___ 06:10AM BLOOD TSH-3.8 ___ 10:03AM BLOOD METHYLMALONIC ACID-PND ___ U/S FINDINGS Grayscale and color Doppler sonogram of the  right common femoral,  superficial femoral, and popliteal veins were obtained. There is  normal  compressibility, flow and augmentation without evidence of DVT.  Color flow  was demonstrated to the calf veins, without evidence of  thrombus. IMPRESSION No evidence of DVT of the right lower extremity. Brief Hospital Course: ___ admitted with painless bright red blood per rectum.   #GI Bleed. During hospitalization there was no evidence of  active bleeding, the patient remained hemodynamically stable,  and her HCT, which was serially trended, remained stable.   During hospitalization she was transfused 2 units of pRBC and  her HCT responded appropriately increasing from 23.9 on  admission to 32.5 at discharge. The suspicion was highest for  lower GI bleed. GI was consulted and performed a flexible  sigmoidoscopy which demonstrated diffuse non-bleeding  diverticula and grade III non-bleeding internal hemorrhoids.   These were thought to be the sources of her BI bleed.  As an  outpatient, it may be beneficial to get evaluated for  hemorrroidal banding.  Right ___ swelling. The patient endorsed worsening right ___  swelling with correlation of swelling and tenderness on exam,  which was concerning for DVT. However, ___ ultrasound was  negative for DVT. The swelling was believed to be due to  post-surgical changes following a knee operation earlier this  year.   Rheumatoid Arthritis. The patient\'s mobic and ASA were held in  the setting of her GI Bleed. Pain control with tylenol was  adequate. Methotrexate was continued.  These can be restarted  after discussion with her primary care doctor.   Mild Dementia. The patient was well oriented, appropriate  behavior, had capacity for medical decision making, with normal  neurological exam. Donepezil was continued.   Anemia w/ elevated MCV. Baseline HCT variable, appears to be  approximately low ___. However, B12, tsh, folate were normal  consistent with previous work-up.  This should be continued to  be monitored as an outpatient.  Hypertension. The patient remained normotensive. Losartan was  held in setting of GIB, but was restarted prior to discharge. Medications on Admission: - Donepezil 10mg Qday - Fosamax 35mg Qweek - Losartan 50mg Qday - Meloxicam 15mg Qday - Methotrexate 2.5mg Qday for 6 days a week - Ranitidine 150mg BID - Timolol maleate 0.5% QAM - Latanoprost 0.005% QAM - Senna - Milk of Magnesia - Aspirin prn ~4 per day Discharge Medications: 1. Donepezil 5 mg Tablet Sig: Two (2) Tablet PO HS (at bedtime).    2. Fosamax 35 mg Tablet Sig: One (1) Tablet PO once a week.   3. Losartan 50 mg Tablet Sig: One (1) Tablet PO once a day.   4. Methotrexate Sodium 2.5 mg Tablet Sig: One (1) Tablet PO  DAILY (Daily).   5. Ranitidine HCl 150 mg Tablet Sig: One (1) Tablet PO BID (2  times a day).   6. Timolol Maleate 0.5 % Drops Sig: One (1) Drop Ophthalmic  DAILY (Daily).   7. Latanoprost 0.005 % Drops Sig: One (1) Drop Ophthalmic HS (at  bedtime).   8. Senna 8.6 mg Tablet Sig: One (1) Tablet PO BID (2 times a  day) as needed for constipation.   9. Anusol-HC 2.5 % Cream Sig: One (1)  Rectal three times a day. Disp *1 * Refills:*2* Discharge Disposition: Home Discharge Diagnosis:  Primary Gastrointestinal bleeding  Colonic diverticula Interal hemorrhoids Secondary Diabetes Type II Rheumatoid arthritis Hyperlipidemia ___ Body Dementia Discharge Condition: Stable, afebrile Discharge Instructions: You were admitted to ___ on ___ after having 2 episodes  of bright red blood for rectum. Your red blood cell counts were  stable during the hospitalization and there were no further  episodes of bleeding. You were given 2 units of blood during  your hospitalization. You underwent a flexible sigmoidoscopy to  evaluate for a source of the bleeding.  This showed diverticular  disease and internal hemorrhoids. You tolerated the procedure  well. Please follow up with your primary care physician this  week and with the GI doctors in one month. Appointments have  been made for you below.   Please continue taking your home medications, with one addition. 1. START APPLYING Anusol as directed for your hemorrhoids 2. START following a high fiber diet  Please return to the emergency department for any more episodes  of bleeding from the rectum, dizziness, palpitations, chest  pain, or worsening symptoms.\n"""\n\nUsing above patient\'s discharge summary, answer to the following questions.\nThe question is given with their own categoreis, with colon-concatenated form.\n\nQuestions:\n\n"""\n24. (Keyword Extraction) Extract all keywords related to the patient\'s GI bleed.\n\n\n"""\n\nHere are requirements:\n\n1. Note that each question is independent.\n2. Number your responses.\n3. Ensure that each answer is complete and does not raise another question.\n4. Answers can span multiple lines if needed.\n5. Answer all questions!\n6. Standardize the terminology used to describe the patient\'s condition?\n7. The output must be in JSON format, Do not add additional information which is not in the JSON format to the output.\n8. Do not anwser additional questions that are not asked.\n\nformat the output in JSON format (with only one key "qas")：\n\n├── "qas"\n  └── [i]\n      ├── "answers"\n      │   └── [j]\n      │       ├── "answer_start"\n      │       │\n      │       └── "text"\n      │ \n      ├── "id" \n      │ \n      └── "question"\n\n\nThe top-level key is "qas", which has a value that is a list of elements, denoted by [i]. Each element in the list is an object with three keys: "answers", "id", and "question".\n\nThe "answers" key has a value that is a list of elements, denoted by [j]. Each element in the list is an object with two keys: "answer_start" and "text".\n\nImportant: If there are multiple answers or the answers have multiple sections or the answers have multiple sentences in parallel such as `1..., 2..., 3...` ,`A..., B...,C....`,`1)..., 2)..., 3)....` and so on, make sure to divide the whole answer into multiple "answer_start"-"text" pairs under the "answers" key, such as\n\n── "answers"\n   └── [0]\n   |   ├── "answer_start"\n   |   │\n   |   └── "text": section 1 of the answer or answer 1\n   └── [1]\n   |   ├── "answer_start"\n   |   │\n   |   └── "text": section 2 of the answer or answer 2\n   └── [2]\n       ├── "answer_start"\n       │\n       └── "text": section 3 of the answer or answer 3\n       \n\nThe "answer_start" key has a value representing the index of the first character of the answer string in the context.  If you can not find the answer in the context, set the value to -1.\n\nThe "text" key has a value representing the answer text which contain some information and can\'t be empty.\n\nThe "id" key has a value representing the identifier of the question. It can be found in the begining of each question.\n\n"question": This is a string representing the question being asked. Just copy the input question under this key. Include the NLP categories on front in this format "" (Text Classification) \'The question\' "".\n'}]

response = openai.ChatCompletion.create(
engine="chat-test",
messages = answer_message,
temperature=0.5,
max_tokens=4096,
top_p=0.95,
frequency_penalty=0,
presence_penalty=0,
stop=None)

In [59]:
response

<OpenAIObject chat.completion id=chatcmpl-7KTId0oG0h861JGjqweETB4mqW7Lo at 0x7fe78a991d10> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "{\n  \"qas\": [\n    {\n      \"answers\": [\n        {\n          \"answer_start\": 184,\n          \"text\": \"bright red blood per rectum\"\n        },\n        {\n          \"answer_start\": 201,\n          \"text\": \"painles\"\n        },\n        {\n          \"answer_start\": 215,\n          \"text\": \"2 episodes\"\n        },\n        {\n          \"answer_start\": 236,\n          \"text\": \"urinate yesterday when had episode of painless bright  red blood per rectum independent of bowel movement\"\n        },\n        {\n          \"answer_start\": 301,\n          \"text\": \"weight loss\"\n        },\n        {\n          \"answer_start\": 338,\n          \"text\": \"knee surgery rehabilitation\"\n        },\n        {\n          \"answer_start\": 759,\n          \"text

In [None]:
'ab' in 'abc'

True

In [None]:
0