In [2]:
import os
import openai
import json
import random
import time
import re
import pickle
import traceback
from transformers import LlamaTokenizer
from tqdm import tqdm
import pandas as pd
openai.api_type = "azure"
openai.api_base = "https://test-openai-celesh.openai.azure.com/"
openai.api_version = "2023-03-15-preview"
openai.api_key = "24a7f6c60aed4aae9c9729229709857b"

## Prompt

In [3]:
inst_prompt = """

\"\"\"
{note}
\"\"\"

Using above patient's discharge summary, you want to ask a well-trained model to help your clinical decision making.
Below are high-level NLP categories that you might want to ask about.
Tell me {num_inst_per_note} different instructions that you will ask to the model.

Tasks :

\"\"\"
{tasks}
\"\"\"

Here are requirements:
1. The words used in the instructions should be diverse to maximize the diversity.
2. A GPT language model should be able to complete the instruction. For example, do not ask the model to create other than any textual output.
3. The question type should be diverse such as one-word answer question, open-ended question, multiple-choice question, and yes/no question.
4. Concat the used NLP categories on front the questions such as  "" (Text Classification) 'The question' "".
"""

ans_prompt = """

\"\"\"
{note}
\"\"\"

Using above patient's discharge summary, answer to the following questions.
The question is given with their own categoreis, with colon-concatenated form.

Here are questions asked in this time:
\"\"\"
{questions}
\"\"\"

Here are requirements:
1. Note that each question is independent.
2. Number your responses.
3. Ensure that each answer is complete and does not raise another question.
4. Answers can span multiple lines if needed.
5. Answer all the questions!
6. Do not anwser additional questions that are not asked in this time!
7. Standardize the terminology used to describe the patient's condition!
8. The output must be in JSON format, Do not add additional information which is not in the JSON format to the output.


format the output in JSON format (with only one key "qas")：

├── "qas"
  └── [i]
      ├── "answers"
      │ 
      ├── "id" 
      │ 
      └── "question"


The top-level key is "qas", which has a value that is a list of elements, denoted by [i]. Each element in the list is an object with three keys: "answers", "id", and "question".

The "answers" key has a value that is a list of elements, denoted by [j]. The element in the list is the answer text to the question. If there are multiple answers or the answers have multiple sections or the answers have multiple sentences in parallel such as `1..., 2..., 3...` ,`A..., B...,C....`,`1)..., 2)..., 3)....` and so on, please divide the whole answer into multiple elements, such as `[section 1 of the answer or answer 1, section 2 of the answer or answer 2, section 3 of the answer or answer 3]`

The "id" key has a value representing the identifier of the question. It can be found in the begining of each question.
"question": This is a string representing the question being asked. Just copy the input question under this key. Include the NLP categories on front in this format "" (Text Classification) 'The question' "".
"""

tasks = [
    "Question Answering",
    "Natural Language Generation",
    "Text Summarization",
    "Text Classification",
    "Temporal Information Extraction",
    "Relation Extraction",
    "Named Entity Recognition",
    "Paraphrasing",
    "Clinical Concept Normalization",
    "Keyword Extraction",
    "Abbreviation expansion",
    "Feature extraction",
    "Content extraction"
]

In [4]:
question_list = [
    
    "(Text Summarization) What is the patient's primary diagnosis?",
    
    "(Content extraction) Which sentences in the note describe the diagnosis?",
    
    "(Text Summarization) What are the comorbidities of the patient?",
    
    "(Content extraction) Which sentences in the note describe the comorbidities?",
    
    "(Named Entity Recognition) List all the medications mentioned in the discharge summary.",
    
    "(Content extraction) Which sentences in the note describe the medications?",

    "(Named Entity Recognition) List all the labs mentioned in the discharge summary.",

    "(Content extraction) Which sentences in the note describe the labs?",
    
    "(Text Summarization) Provide a brief summary of the history treatment plan for the patient?",

    "(Content extraction) Which sentences in the note describe the history treatment plan for the patient?",
    
    "(Text Summarization) Provide a brief summary of the future treatment plan for the patient?",

    "(Content extraction) Which sentences in the note describe the future treatment plan for the patient?",
    
    "(Content extraction) What medications are the patient currently on?",
    
    "(Text Summarization) List all the diagnoses and determine whether they are from recent onsite visits or past medical history."
    
]

print(len(question_list))

14


In [5]:
## Functions to generate prompts 
def generate_inst_prompt(note, num_inst_per_note, sample = True):
    if sample:
        selected_tasks = random.sample(tasks, num_inst_per_note)
    else: selected_tasks = tasks
    
    return [{"role":"system","content":"You are a healthcare professional."},
        {
            "role": "user",
            "content": inst_prompt.format(
                note=note,
                num_inst_per_note=num_inst_per_note,
                tasks="\n".join(selected_tasks),
            ),
        }
    ]

def generate_ans_prompt(note, questions):
    return [
        {"role":"system","content":"You are a healthcare professional."},
        {"role": "user", "content": ans_prompt.format(note=note, questions=questions)}
    ]
    
def generate_con_prompt():
    return [
        {"role":"system","content":"You are a healthcare professional."},
        {"role": "user", "content": "Continue"}
    ]

## Functions to get completion
def get_completion(message,temperature=0.6):
    
    num_tries = 0
    max_num_tries = 5

    while num_tries < max_num_tries:
        try:
            print("try get_completion...")
            response = openai.ChatCompletion.create(
                engine="chat-test",
                messages = message,
                temperature=temperature,
                max_tokens=4096,
                top_p=0.95,
                frequency_penalty=0,
                presence_penalty=0,
                stop=None)
        except Exception as e:
            
            print("-----------------------------------------")
            traceback.print_exc()
            print(f"get_completion Error:",e)
            print("-----------------------------------------")
            
            num_tries += 1
            print(f"Error occurred in get_completion(), retrying (try {num_tries})...")
            time.sleep(5)
            continue
        
        print(f"Success! get_completion() completed. tried {num_tries}")     
        break
    
    if num_tries == max_num_tries:
        print("⚠Error: maximum number of tries reached in function get_completion().") 
        return " "
     
    return response["choices"][0]["message"]["content"]

## Functions to process the questions
def add_question_list(questions, question_list):
    num = len(question_list);temp = ""
    for i in range(len(question_list)):
        temp += str(i + 1) + ". " + question_list[i]+ "\n"
    questions = temp + add_numbers(questions,num)
    return questions

def split_questions(questions,m):
    question_list = re.findall(r'\d+\.\s+[^\d]+', questions)
    question_str = ""
    question_str_list = []
    for i in range(len(question_list)):
        if i % m == 0 and i != 0:
            question_str_list.append(question_str)
            question_str = ""
        question_str += question_list[i] + "\n"
    question_str_list.append(question_str)
    return question_str_list

def find_substring(string):
    start = string.find("{")
    end = string.rfind("}")
    return string[start:end+1]

def add_numbers(string,num):
    pattern = r'\b(\d+)\b'
    replacement = lambda match: str(int(match.group(1)) + num)
    result = re.sub(pattern, replacement, string)
    return result

def get_tokensnum(message,tokenizer):
    
    if type(message) == str:
        return len(tokenizer.tokenize(message))
    else:
        return len(tokenizer.tokenize(message[0]['content'])) + len(tokenizer.tokenize(message[1]['content']))

## Functions to process the data
def DataClean(data,labels_path,data_path=None,write_path=None,delete=None):
    
    print('\n\nImporting and filtering database...')
    
    if data is not None and not data.empty:
        notes = data
    else:
        notes = pd.read_csv(data_path)
        
    ints_str = '0123456789-#[]' # characters that usually main categories don't start with
    
    print('\n\nSplitting each note into sections:\n\n')
    
    notes_sections = {}
    
    for note_index in tqdm(range(notes.shape[0])):
        note = notes['text'][note_index].replace('\n\n\n\n','\n').replace('\n\n\n','\n').replace('     ','\n')
        paragraphs = note.split('\n')
        
        subsections, new_section = [], ' '
        for p in paragraphs:
            line = p.strip()
            if len(line)>0 and ':' in line and not (line[line.find(':')-1] in ints_str) and not(line[0] in ints_str):
                subsections.append([new_section.strip()])
                new_section = p + ' '
            else:
                new_section += p + ' '
        subsections.append([new_section])
        subsections.pop(0)

        note_sect_tit,note_sect_par = [],[]
        for sect in subsections:
            note_sect_tit += [str(*sect)[0:str(*sect).find(':')]]
            note_sect_par += [str(*sect)[str(*sect).find(':')+1:].strip()]
        note_df = pd.DataFrame({'title':note_sect_tit,'category':'','text':note_sect_par, 'label':''})
        notes_sections[notes['note_id'][note_index]] = note_df
    
    f = open(labels_path, 'r')
    obj_label = f.readlines()
    obj_label_dict = {}
    i = 0
    for s in obj_label:
        i += 1
        if '/' in s:
            buffer = s.strip('\n').lower().split('/')
            for item in buffer:
                obj_label_dict[item] = i
        else:
            obj_label_dict[s.strip('\n').lower()] = i
    f.close()

    for key in tqdm(list(notes_sections.keys())):
        buffer = 'begin_title'
        t = list(notes_sections[key]['title'])
        for idx in range(len(t)):
            for item in list(obj_label_dict.keys()):
                if item in t[idx].lower() and len(t[idx].lower())>2:
                    buffer = item
                    notes_sections[key]['category'][idx] = buffer
                    notes_sections[key]['label'][idx] = obj_label_dict[buffer]
                    break
            notes_sections[key]['category'][idx] = buffer
            notes_sections[key]['label'][idx] = obj_label_dict[buffer]

    notes_sections_output = {}
    row_id  = notes_sections.keys()
    for key in tqdm(row_id):
        buffer = ''
        note_sect_tit, note_sect_par, note_sect_lab = [], [], []
        for i in range(len(notes_sections[key]['category'])):
            if buffer != notes_sections[key]['category'][i]:
                buffer = notes_sections[key]['category'][i]
                note_sect_tit.append(buffer)
                note_sect_lab.append(notes_sections[key]['title'][i])
                note_sect_par.append(notes_sections[key]['text'][i])
                # if buffer == 'followup instruction' or buffer == 'follow up' or buffer == 'follow-up':
                #     break
            else:
                note_sect_par[-1] = note_sect_par[-1] + ' ' + notes_sections[key]['title'][i] + ' ' + notes_sections[key]['text'][i]
        note_df = pd.DataFrame({'title': note_sect_tit, 'text': note_sect_par, 'label': note_sect_lab})
        notes_sections_output[key] = note_df


    notes_sections = notes_sections_output
    
    if delete != None:
        for key,value in notes_sections.items():
            notes_sections[key] = notes_sections[key][~notes_sections[key]['label'].isin(delete)]

    if write_path != None:
        f = open(write_path,"wb")
        pickle.dump(notes_sections,f)
        f.close()
    
    return notes_sections

## Import Data

In [6]:
df = pd.read_csv('mimiciv_discharge.csv')

In [7]:
df = pd.read_csv('mimiciv_discharge.csv')
df = df.loc[[2,5,7,3,8,12,9,19,18,16],].reset_index(drop=True)

tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
length = df['text'].apply(lambda x: len(tokenizer.tokenize(x)))

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


In [8]:
NotesSections = DataClean(data=df,labels_path="labels.txt",write_path="notes.pkl",delete=['Name','Admission Date','Discharge Date','Date of Birth','Followup Instructions'])

100%|██████████| 10/10 [00:00<00:00, 1744.86it/s]
100%|██████████| 10/10 [00:00<00:00, 62.51it/s]



Importing and filtering database...


Splitting each note into sections:





100%|██████████| 10/10 [00:00<00:00, 1146.77it/s]


In [34]:
def SectionSelect(data):
    
    data['temp'] = data['label'] + ": " + data['text']
    while len(tokenizer.tokenize(' '.join(data['temp']))) > 1400:
        random_index = random.randint(0, len(data) - 1)
        data = data.drop(data.index[random_index])
    return ' '.join(data['temp']),{'section_used':data['label'].tolist()}

In [36]:
for key in NotesSections.keys():
    note,section_used = SectionSelect(NotesSections[key])
    print(len(tokenizer.tokenize(note)))
    print(section_used)

917
{'section_used': ['History of Present Illness', 'Brief Hospital Course', 'Discharge Condition']}
871
{'section_used': ['Sex', 'Attending', 'Chief Complaint', 'Major Surgical or Invasive Procedure', 'Review of systems', 'Social History', 'Physical Exam', 'Medications on Admission', 'Discharge Disposition']}
1163
{'section_used': ['Service', 'Allergies', 'Attending', 'History of Present Illness', 'Review of systems', 'Social History', 'Family History', 'Physical Exam', 'Discharge Medications', 'Discharge Disposition', 'Facility', 'Discharge Diagnosis', 'Discharge Condition']}
915
{'section_used': ['Service', 'Attending', 'Major Surgical or Invasive Procedure', 'History of Present Illness', 'Past Medical History', 'Physical Exam', 'Discharge Medications', 'Discharge Disposition', 'Discharge Condition', 'Discharge Instructions']}
493
{'section_used': ['Sex', 'Past Medical History', 'Family History', 'Medications on Admission', 'Facility']}
1058
{'section_used': ['Discharge Medications'

In [45]:
data = {}
data["version"] = "mimic4"
data["data"] = []
num_tokens = 0
note_record = {}

for row,key in enumerate(NotesSections.keys()):
    
    time.sleep(5)
    print("========================================= row:", row,"=========================================")
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
    
    
    ## Split the note and select the section and record the note
    note_record[key] = []
    note,section_used = SectionSelect(NotesSections[key])
    note_record[key].append(section_used)
    note_record[key].append({"question_record":[]})
    note_record[key].append({"question_error":[]})
    
    n = 16
    
    question_message = generate_inst_prompt(note, num_inst_per_note = n, sample = False)
    num_tokens += get_tokensnum(question_message,tokenizer)
    questions = get_completion(question_message)
    num_tokens += get_tokensnum(questions,tokenizer)
    
    questions = add_question_list(questions, question_list)
    
    print("```")
    print(questions)
    print("```")
    
    m = 5
    question_str_list = split_questions(questions, m)
    
    new_data = {}
    new_data["title"] = df.iloc[row,0]
    new_data["paragraphs"] = [{"context": df.iloc[row,-1], "qas": []}]

    for question_str in question_str_list:
        
        time.sleep(5)
        num_tries = 0
        max_num_tries = 5
        note_record[key][1]["question_record"].append(question_str)
        
        while num_tries < max_num_tries:
            
            try:
                answer_message = generate_ans_prompt(note, question_str)
                num_tokens += get_tokensnum(answer_message,tokenizer)
                answers = get_completion(answer_message)
                num_tokens += get_tokensnum(answers,tokenizer)
                
                print("-----------------------------------------")
                print(answers)
                print("-----------------------------------------")
                new_qas = json.loads(answers)

            except Exception as e:
                
                num_tries += 1
                
                print("-----------------------------------------")
                traceback.print_exc()
                print(f"    Error:",e)
                print("-----------------------------------------")
                
                ## add the continue part
                if "Expecting property name enclosed in double quotes" in str(e):
                    
                    continue_message = generate_con_prompt()
                    num_tokens += get_tokensnum(continue_message,tokenizer)
                    temp = get_completion(continue_message)
                    num_tokens += get_tokensnum(temp,tokenizer)
                    
                    answers = answers + temp
                    print(answers)
                
                else: 
                    
                    print("-----------------------------------------")
                    print(f"    Error occurred in json.loads(), retrying (try {num_tries})...")
                    print("-----------------------------------------")
                    
                    print("-----------------------------------------")
                    print(answers)
                    print("-----------------------------------------")
                    continue
                
            print(f"    Success! json.loads() completed. tried {num_tries}")
            
            if len(new_qas["qas"]) != m:
                num_tries += 1
                print(f"    Error! Answer additional questions. {len(new_qas['qas'])} questions generated, {m} expected. Retrying...")
                continue
            
            for foo in new_qas["qas"]:
                new_data["paragraphs"][0]["qas"].append(foo)
            print(len(new_data["paragraphs"][0]["qas"]))        
            break
        
        if num_tries == max_num_tries:
            print("    ⚠Error: maximum number of tries reached in function json.loads().")
            print(question_str)
            print(answers)
            note_record[key][2]["question_error"].append(question_str)
        
    data["data"].append(new_data)
    print(len(new_data['paragraphs'][0]['qas']))
    print("\n")
    with open('tmp5.json', 'w') as f:      
        json.dump(data, f, indent=2)
    with open('log.json', 'w') as f:      
        json.dump(note_record, f, indent=2)

2023-05-29 23:00:46
try get_completion...
Success! get_completion() completed. tried 0
```
1. (Text Summarization) What is the patient's primary diagnosis?
2. (Content extraction) Which sentences in the note describe the diagnosis?
3. (Text Summarization) What are the comorbidities of the patient?
4. (Content extraction) Which sentences in the note describe the comorbidities?
5. (Named Entity Recognition) List all the medications mentioned in the discharge summary.
6. (Content extraction) Which sentences in the note describe the medications?
7. (Named Entity Recognition) List all the labs mentioned in the discharge summary.
8. (Content extraction) Which sentences in the note describe the labs?
9. (Text Summarization) Provide a brief summary of the history treatment plan for the patient?
10. (Content extraction) Which sentences in the note describe the history treatment plan for the patient?
11. (Text Summarization) Provide a brief summary of the future treatment plan for the patient?
1

Traceback (most recent call last):
  File "/var/folders/nt/61qr68k1095348fy48d1yhf80000gp/T/ipykernel_37696/2648004828.py", line 39, in get_completion
    response = openai.ChatCompletion.create(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 230, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 624, in _interpret_response
    self._interpret_response_line(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", lin

try get_completion...
Success! get_completion() completed. tried 1
-----------------------------------------
{
  "qas": [
    {
      "question": "(Text Summarization) Provide a brief summary of the future treatment plan for the patient?",
      "answers": [
        "The patient will have post-procedure follow-up arranged by interventional radiology, and she has been discharged with prescriptions for oxycodone and ibuprofen to manage pain. She has also been prescribed lactulose to manage constipation.",
        "The future treatment plan for the patient includes post-procedure follow-up, prescriptions for pain management and constipation, and instructions to contact her primary care physician if she is unable to have a bowel movement within a certain timeframe."
      ],
      "id": 11
    },
    {
      "question": "(Content extraction) Which sentences in the note describe the future treatment plan for the patient?",
      "answers": [
        "Post-procedure follow up to be arranged 

Traceback (most recent call last):
  File "/var/folders/nt/61qr68k1095348fy48d1yhf80000gp/T/ipykernel_37696/2240430983.py", line 58, in <cell line: 7>
    new_qas = json.loads(answers)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 91 column 7 (char 5387)


Success! get_completion() completed. tried 0
-----------------------------------------
{
  "qas": [
    {
      "question": "(Text Summarization) What is the patient's primary diagnosis?",
      "id": 1,
      "answers": [
        "The patient's primary diagnosis is acute hypoxemic respiratory failure secondary to parainfluenza 3 pneumonia."
      ]
    },
    {
      "question": "(Content extraction) Which sentences in the note describe the diagnosis?",
      "id": 2,
      "answers": [
        "On day  of arrival at ___ patient acutely hypoxemic with inc O2  requirement, CXR at Rehab showed ?Opacity of R Lung.",
        "In the ED, initial vitals Afebrile, normotensive, HR 70-100,  80% on 6L, 98% on NRB - Exam notable for: intermittentl oriented elderly man with  tachypnea who had SaO2 mid ___ on 6L - Labs were notable for:   ABG: pH ___ INR 5.0, Hgb 7.3 / Hct 23, LFTs wnl, Alb 2.3, Lactate 1.9, BUN  23, Cr 0.6, Na 148, UA normal,  - Imaging:  CXR shows Significant opacification of t

Traceback (most recent call last):
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 714, in urlopen
    httplib_response = self._make_request(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 466, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 461, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/http/client.py", line 289, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteD

try get_completion...
-----------------------------------------
get_completion Error: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
-----------------------------------------
Error occurred in get_completion(), retrying (try 2)...


Traceback (most recent call last):
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 714, in urlopen
    httplib_response = self._make_request(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 403, in _make_request
    self._validate_conn(conn)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 1053, in _validate_conn
    conn.connect()
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/urllib3/connection.py", line 419, in connect
    self.sock = ssl_wrap_socket(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/urllib3/util/ssl_.py", line 449, in ssl_wrap_socket
    ssl_sock = _ssl_wrap_socket_impl(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/urllib3/util/ssl_.py", line 493, in _ssl_wrap_socket_impl
    return ssl_context.wrap_socket(sock, server_hostname=server_hostname

try get_completion...
Success! get_completion() completed. tried 2
-----------------------------------------
{
  "qas": [
    {
      "answers": [
        "Hypotension"
      ],
      "id": 1,
      "question": "(Text Summarization) What is the patient's primary diagnosis?"
    },
    {
      "answers": [
        "He was found by his PCP to be hypotensive with SBP 66 and sent to the ED.",
        "labs notable for creatinine of 1.8, lipase of 146, HCT 33 (baseline 36). EKG - nsr, <1mm STE V1-v3: no sig change prior. Will add on troponin. CXR was unremarkable. U/A not done.",
        "Primary diagnosis Hypotension"
      ],
      "id": 2,
      "question": "(Content extraction) Which sentences in the note describe the diagnosis?"
    },
    {
      "answers": [
        "3VD",
        "DM",
        "Alzheimer's"
      ],
      "id": 3,
      "question": "(Text Summarization) What are the comorbidities of the patient?"
    },
    {
      "answers": [
        "3VD, DM, Alzheimer's",
      

Traceback (most recent call last):
  File "/var/folders/nt/61qr68k1095348fy48d1yhf80000gp/T/ipykernel_37696/2648004828.py", line 39, in get_completion
    response = openai.ChatCompletion.create(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 230, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 624, in _interpret_response
    self._interpret_response_line(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", lin

try get_completion...
Success! get_completion() completed. tried 1
-----------------------------------------
{
  "qas": [
    {
      "answers": [
        "The future treatment plan for the patient includes continuing home medications, adding Percocet as needed for pain, and using hot packs as needed to the area of pain. The patient is also advised to return to the emergency department if they develop numbness or weakness of the legs, or severe, intractable pain that limits walking."
      ],
      "id": "11",
      "question": "(Text Summarization) Provide a brief summary of the future treatment plan for the patient?"
    },
    {
      "answers": [
        "No changes were made to your home medications, we have only  added Percocet as needed for pain, as well as recommending the use of hot packs as needed to the area of pain."
      ],
      "id": "12",
      "question": "(Content extraction) Which sentences in the note describe the future treatment plan for the patient?"
    },
    

Traceback (most recent call last):
  File "/var/folders/nt/61qr68k1095348fy48d1yhf80000gp/T/ipykernel_37696/2648004828.py", line 39, in get_completion
    response = openai.ChatCompletion.create(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 230, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 624, in _interpret_response
    self._interpret_response_line(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", lin

try get_completion...
Success! get_completion() completed. tried 1
-----------------------------------------
{
  "qas": [
    {
      "answers": [
        "The patient's ESRD is being treated with HD (hemodialysis) which was continued upon discharge."
      ],
      "id": "21",
      "question": "(Relation Extraction) What is the relationship between the patient's ESRD and their current treatment?"
    },
    {
      "answers": [
        "The duration of the patient's hospital stay is not mentioned in the summary."
      ],
      "id": "22",
      "question": "(Temporal Information Extraction) Can you extract the duration of the patient's hospital stay from the summary?"
    },
    {
      "answers": [
        "Metoprolol Succinate 50 mg Tablet Sustained Release 24 hr, Aspirin 81 mg Tablet, Delayed Release (E.C.), Omeprazole 20 mg Capsule, Delayed Release(E.C.), Calcium Acetate 667 mg Capsule, Sevelamer HCl 800 mg Tablet, and Docusate Sodium 100 mg Capsule."
      ],
      "id": "23",


Traceback (most recent call last):
  File "/var/folders/nt/61qr68k1095348fy48d1yhf80000gp/T/ipykernel_37696/2648004828.py", line 39, in get_completion
    response = openai.ChatCompletion.create(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 230, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 624, in _interpret_response
    self._interpret_response_line(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", lin

try get_completion...
Success! get_completion() completed. tried 1
```
1. (Text Summarization) What is the patient's primary diagnosis?
2. (Content extraction) Which sentences in the note describe the diagnosis?
3. (Text Summarization) What are the comorbidities of the patient?
4. (Content extraction) Which sentences in the note describe the comorbidities?
5. (Named Entity Recognition) List all the medications mentioned in the discharge summary.
6. (Content extraction) Which sentences in the note describe the medications?
7. (Named Entity Recognition) List all the labs mentioned in the discharge summary.
8. (Content extraction) Which sentences in the note describe the labs?
9. (Text Summarization) Provide a brief summary of the history treatment plan for the patient?
10. (Content extraction) Which sentences in the note describe the history treatment plan for the patient?
11. (Text Summarization) Provide a brief summary of the future treatment plan for the patient?
12. (Content extracti

Traceback (most recent call last):
  File "/var/folders/nt/61qr68k1095348fy48d1yhf80000gp/T/ipykernel_37696/2648004828.py", line 39, in get_completion
    response = openai.ChatCompletion.create(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 230, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 624, in _interpret_response
    self._interpret_response_line(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", lin

try get_completion...
-----------------------------------------
get_completion Error: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-03-15-preview have exceeded token rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.
-----------------------------------------
Error occurred in get_completion(), retrying (try 2)...


Traceback (most recent call last):
  File "/var/folders/nt/61qr68k1095348fy48d1yhf80000gp/T/ipykernel_37696/2648004828.py", line 39, in get_completion
    response = openai.ChatCompletion.create(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 230, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 624, in _interpret_response
    self._interpret_response_line(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", lin

try get_completion...
Success! get_completion() completed. tried 2
-----------------------------------------
{
  "qas": [
    {
      "answers": [
        "Patient is advised to follow up with all appointments as advised."
      ],
      "id": "11",
      "question": "(Text Summarization) Provide a brief summary of the future treatment plan for the patient?"
    },
    {
      "answers": [
        "All follow-up appointments advised."
      ],
      "id": "12",
      "question": "(Content extraction) Which sentences in the note describe the future treatment plan for the patient?"
    },
    {
      "answers": [
        "Lasix", 
        "Lopressor", 
        "insulin"
      ],
      "id": "13",
      "question": "(Content extraction) What medications are the patient currently on?"
    },
    {
      "answers": [
        "Obesity", 
        "Fatty liver", 
        "GERD", 
        "Colonic adenomas", 
        "Small focus of follicular lymphoma", 
        "Hypertension", 
        "LVH",

Traceback (most recent call last):
  File "/var/folders/nt/61qr68k1095348fy48d1yhf80000gp/T/ipykernel_37696/2240430983.py", line 58, in <cell line: 7>
    new_qas = json.loads(answers)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 77 column 7 (char 3079)


Success! get_completion() completed. tried 0
-----------------------------------------
{
  "qas": [
    {
      "answers": [
        "The future treatment plan for the patient includes follow-up appointments and continued management of his pre-existing medical conditions such as hypertension, hyperlipidemia, obesity, cardiomyopathy, and chronic lower back pain. The patient was started on Lasix and Lopressor and required insulin therapy during his admission. He is also advised to continue taking his medications for GERD and colonic adenomas. "
      ],
      "id": "11",
      "question": "(Text Summarization) Provide a brief summary of the future treatment plan for the patient?"
    },
    {
      "answers": [
        "Following surgery, he was transferred to the CVICU for invasive monitoring in stable condition. He was initially hypoxic, requiring high peep. He weaned from sedation, awoke neurologically intact and extubated early POD2. He was started on Lasix and Lopressor. Nitro gtt w

Traceback (most recent call last):
  File "/var/folders/nt/61qr68k1095348fy48d1yhf80000gp/T/ipykernel_37696/2648004828.py", line 39, in get_completion
    response = openai.ChatCompletion.create(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 230, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 624, in _interpret_response
    self._interpret_response_line(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", lin

try get_completion...
Success! get_completion() completed. tried 1
-----------------------------------------
{
  "qas": [
    {
      "answers": [
        "The patient will follow up with Hem/Onc for work up for Factor V Leiden Deficiency and antiphospholipid syndrome. The patient will also follow up with Dr. ___ in ___ clinic. The patient will continue taking xarelto for 3 weeks and then switch to 20 mg daily. He will also continue gabapentin, tramadol, and simvastatin. He will use home oxygen for sleep and ambulation."
      ],
      "id": "11",
      "question": "(Text Summarization) Provide a brief summary of the future treatment plan for the patient?"
    },
    {
      "answers": [
        "The sentences that describe the future treatment plan for the patient are: 'Plan for outpatient follow up with Hem/Onc to address hypercoagulable work up with Factor V Leiden and antiphospholipid syndrome. Patient also with plan to follow up with Dr. ___ in ___ clinic. Following this, the pati

Traceback (most recent call last):
  File "/var/folders/nt/61qr68k1095348fy48d1yhf80000gp/T/ipykernel_37696/2648004828.py", line 39, in get_completion
    response = openai.ChatCompletion.create(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 230, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 624, in _interpret_response
    self._interpret_response_line(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", lin

try get_completion...
Success! get_completion() completed. tried 1
-----------------------------------------
{
  "qas": [
    {
      "answers": [
        "shortness of breath, clamminess, lung pain, back pain, weakness in lower extremities"
      ],
      "id": "21",
      "question": "(Keyword Extraction) What are the main symptoms the patient reported?"
    },
    {
      "answers": [
        "shortness of breath"
      ],
      "id": "22",
      "question": "(Abbreviation Expansion) What does \"SOB\" stand for?"
    },
    {
      "answers": [
        "Hypercoagulable workup with Factor V Leiden Deficiency"
      ],
      "id": "23",
      "question": "(Feature Extraction) What treatment plan was recommended for the patient's hypercoagulable workup?"
    },
    {
      "answers": [
        "Home oxygen requirement for sleep and ambulation"
      ],
      "id": "24",
      "question": "(Content Extraction) What is the patient's current oxygen requirement?"
    },
    {
      "answer

Traceback (most recent call last):
  File "/var/folders/nt/61qr68k1095348fy48d1yhf80000gp/T/ipykernel_37696/2648004828.py", line 39, in get_completion
    response = openai.ChatCompletion.create(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 230, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", line 624, in _interpret_response
    self._interpret_response_line(
  File "/Users/sinianzhang/opt/anaconda3/lib/python3.9/site-packages/openai/api_requestor.py", lin

try get_completion...
Success! get_completion() completed. tried 1
-----------------------------------------
{
  "qas": [
    {
      "answers": [
        "shortness of breath, clamminess, back pain, weakness in both lower extremities"
      ],
      "id": "21",
      "question": "(Keyword Extraction) What are the main symptoms the patient reported?"
    },
    {
      "answers": [
        "Shortness of Breath"
      ],
      "id": "22",
      "question": "(Abbreviation Expansion) What does \"SOB\" stand for?"
    },
    {
      "answers": [
        "Hypercoagulable workup with Factor V Leiden deficiency and antiphospholipid syndrome."
      ],
      "id": "23",
      "question": "(Feature Extraction) What treatment plan was recommended for the patient's hypercoagulable workup?"
    },
    {
      "answers": [
        "Home oxygen requirement for sleep and ambulation."
      ],
      "id": "24",
      "question": "(Content Extraction) What is the patient's current oxygen requirement?"


In [47]:
print("The number of tokens: ",num_tokens)

The number of tokens:  175734


In [37]:
section = [
        "Allergies",
        "Attending",
        "History of Present Illness",
        "Past Medical History",
        "Social History",
        "Physical Exam",
        "Brief Hospital Course",
        "Medications on Admission",
        "Discharge Disposition"
]

In [44]:
temp = NotesSections["10053872-DS-7"][NotesSections["10053872-DS-7"]['label'].isin(section)]

In [45]:
''.join(temp['temp'])

'Allergies: Sulfa (Sulfonamides) / Erythromycin BaseAttending: ___.History of Present Illness: Patient is a ___ year old female status post uterine artery  fibroid embolization by ___ on ___ who was discharged ___ and  around midnight awoke with right groin pain. The patient noticed  that her right groin appeared swollen and felt slightly firm.  This area of swelling seemed to increase in size. The patient  also felt pain radiating across her lower abdomen, causing her  to "double over in pain." She also felt it difficult to  straighten her right leg without pain. She called the  interventional radiology physician\'s assistant, who instructed  her to come to the emergency department.  In the ED, patient was found by ultrasound to have a right groin  pseudoaneurysm. Patient was admitted for thrombin injection of  pseudoaneurysm.  On the floor, patient reported right groin pain of ___. She had  no other complaints. Review of sytems (+) Per HPI (-) Denies fever, chills, night sweats, rece

In [46]:
len(tokenizer.tokenize(''.join(temp['temp'])))

1314