In [None]:
! pip install medspacy==1.2.0

In [None]:
! pip install openai==1.55.0

In [3]:
import sys
shared_path = '/gpfs/data/majorlab/biasaudit'
sys.path.insert(0, shared_path)

In [None]:
from bson import ObjectId
from pymongo import MongoClient
from pymongo.errors import *

#### Install

In [None]:
import yaml
import datetime
import time
import pytz
import os
import json
import requests
import logging
import gc

#import random
from preprocess import preprocess_dcnarrative # custom
from note_helpers import * # custom
#from interconnect import push_set_of_sdes, push_success_flowsheet # not needed for testing

## time stuff
## timezone first
est = pytz.timezone('America/New_York')
last_request = datetime.datetime(2024, 1, 1) # init
## get now and round down to the minute
#now = datetime.datetime.now(tz=est)
#now_s = now.replace(second=0).strftime("%Y-%m-%dT%H:%M:%SZ")

# set up logging
def timetz(*args):
    return datetime.datetime.now(est).timetuple()
logging.Formatter.converter = timetz # convert logging timezone to Eastern
logging.basicConfig(level=logging.INFO,
                    format='PatientFriendly | GPTRunner | %(asctime)s | %(levelname)s | %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger("SCDS logger")

In [5]:
import argparse
parser = argparse.ArgumentParser(description='SDE push script')
parser.add_argument("-f", help = "Dummy argument to trick jupyter...")
args = parser.parse_args()
# any overrides here
args.epic_env = 'PRD' #os.environ.get('ENV','POC') # if missing, take POC
args.app = 'noteconcept' #os.environ.get('IC_APP', 'noteconcept')

logger.info('starting up GPT runner in ENV={} using interconnect app={}'.format(args.epic_env, args.app))

print("Operating in Epic's {} environment, using the {} app".format(args.epic_env, args.app))

## find prod vs non-prod from more granular Epic env
if args.epic_env == 'PRD':
    args.epic_env_class = 'PRD'
    args.mongo_version = 'MongoProd_Owner'
else:
    args.epic_env_class = 'nonPRD'
    args.mongo_version = 'MongoDev_Owner'
print(args.epic_env_class)

## read config
with open(os.path.join(shared_path, "gpt_config.yml"), "r") as cfg:
    config = yaml.safe_load(cfg.read())

min_s = int(config.get('kong').get('delta'))
args.app_user = config.get('credentials').get('user', 'none')
args.app_pass = config.get('credentials').get('password', 'none')
## check values
if args.app_user=='none' or args.app_pass=='none':
    print('One or both of the user or password is empty, panic!')
#print(config)

sdes = config.get('sdes')

with open(os.path.join(shared_path, 'prompts/ChatSetup-DC.json')) as j:
    setup = json.load(j)
chat_payload = []

chat_payload.append({'role': 'system', 
                     'content': setup.get('systemPrompt')})

for ex in setup.get('fewShotExamples'): # list
    chat_payload.append({'role': 'user', 
                         'content': ex.get('userInput')})
    chat_payload.append({'role': 'assistant', 
                         'content': ex.get('chatbotResponse')})

gpt_settings = setup.get('chatParameters')

with open(os.path.join(shared_path, 'prompts/DC Prompt.txt')) as p:
    prompt = p.read()

PatientFriendly | GPTRunner | 2025-01-31 08:41:00 | INFO | starting up GPT runner in ENV=PRD using interconnect app=noteconcept


Operating in Epic's PRD environment, using the noteconcept app
PRD


## Testing

In [6]:
payload = json.dumps({"temperature": 0, 
                      "top_p" : 0.95,
                      "max_tokens": 800, 
                      "messages": [
                          {"role": "system", 
                           "content": "You are a helpful assistant."}, 
                          {"role": "user", 
                           "content": "Select a random number between 1 and 100,000"}] }) 
  
headers = {"apikey": config.get('kong').get('apikey'),
           'Content-Type': 'application/json'} 
url = "https://genai-api.prod1.nyumc.org/gpt-4o/v1.0.0/chat/completions" 
  
res = requests.post(url, headers=headers, data=payload) 

if res.status_code != 200:
    print('error')
elif not res.json().get('choices'):
    print('bad payload')
else:
    if len(res.json().get('choices')) > 1:
        print('more than one result')
    for c in res.json().get('choices'):
        result = c.get('message').get('content')
print(result)

Sure, here is a random number between 1 and 100,000: 47,583.


You can change anything there, the parameters (from the Patient Friendly defaults of: {"temperature": 0, "top_p" : 0.95, "max_tokens": 800}), or the chain of messages. A conversation is the addition of 'layers' in that nested list, system then user, then system, then user etc. 

#### Check Patient Friendly chat parameters

In [7]:
chat_payload
# current set up is a custom system message, and
# two-shot learning examples, user messsage of d/c narrative -> assistant output of a patient friendly summary.

[{'role': 'system',
  'content': 'You are an AI assistant for a health system called NYU Langone Health who will provide a summary of the hospital course in plain, easy-to-understand language for a patient who is discharged from hospital.'},
 {'role': 'user',
  'content': 'DISCHARGE NARRATIVE\n\xa0\nAdmit date: 7/9/2023\n\xa0\nExpected Discharge date: \u200b \u200b\u200b\n\xa0\nAttending Authorizing Discharge: Dr. Alexey Yanilshtein\nAdmission Diagnoses: Right perinephric Hematoma\n\xa0\nDischarge Diagnoses/Hospital Problems:\nActive Problems:\n  Renal mass, right\nResolved Problems:\n  * No resolved hospital problems. *\n\xa0\n\xa0\n\u200b\xa0\n\xa0\nIndication for Admission: Right Perinephric hematoma with acute extravasation\n\xa0\nHistory of Present Illness: \nThis is a\xa062 y.o.\xa0male\xa0with past urological history significant for renal colic x2\xa0(both stones passed spontaneously without surgical intervention)\xa0and right renal mass which has been followed by his private ur

In [8]:
# chat parameters are 
print(gpt_settings) # note the different naming from the Azure side vs the API side
print({"temperature": 0, "top_p": 0.95, "max_tokens": 800}) # API friendly namnig
# you can override any of these and feed them into the above. 

{'deploymentName': 'GPT4', 'maxResponseLength': 800, 'temperature': 0, 'topProbablities': 0.95, 'stopSequences': None, 'pastMessagesToInclude': 10, 'frequencyPenalty': 0, 'presencePenalty': 0}
{'temperature': 0, 'top_p': 0.95, 'max_tokens': 800}


#### Testing PF DC generation

In [9]:
### Updating the gpt function call

def request_gpt(content, config, settings=None):
    payload = { 
        "temperature": 0, 
        "top_p" : 0.95,
        "max_tokens": 800, 
        "messages": content
    }
    if settings: # i.e. not empty or default
        payload.update(settings) # override above defaults with one or more specified
    
    headers = { 
      'apikey': config.get('kong').get('apikey'), 
      'Content-Type': 'application/json' 
    }
    #url = "https://genai-api.prod1.nyumc.org/openai/deployments/GPT4/chat/completions?api-version=2023-07-01-preview" 
    url = "https://genai-api.prod1.nyumc.org/openai/deployments/GPT4/chat/completions?api-version=2023-03-15-preview" 
    
    res = requests.post(url, headers=headers, data=json.dumps(payload)) 
    response = res.json()

    return response

In [10]:
note_text_test = '''
"DISCHARGE NARRATIVE\n\n \n\n\nAdmit date: 7/9/2024\n\n \n\n\nExpected Discharge date: 7/12/2024 ‚Äã‚Äã\n\n \n\n\nAttending Authorizing Discharge: Dr. Jeffrey Samuel\n\n \n\n\nAdmission Diagnoses: Anemia due to acute blood loss [D62]\n\nCoffee ground emesis [K92.0]\n\nGastrointestinal hemorrhage, unspecified gastrointestinal hemorrhage type [K92.2]\n\nHematemesis, unspecified whether nausea present [K92.0]\n\n \n\n \n\n\nDischarge Diagnoses/Hospital Problems:\n\nPrincipal Problem:\n\nAnemia due to acute blood loss\n\nActive Problems:\n\nCoffee ground emesis\n\nHematemesis\n\nResolved Problems:\n\n* No resolved hospital problems. *\n\n \n\n \n\n \n\n \n\n \n\n\nIndication for Admission: Anemia due to acute blood loss [D62]\n\nCoffee ground emesis [K92.0]\n\nGastrointestinal hemorrhage, unspecified gastrointestinal hemorrhage type [K92.2]\n\nHematemesis, unspecified whether nausea present [K92.0]\n\n \n\n \n\n\nHistory of Present Illness:   \nSee below\n\n| |   \n---|---|---  \n\nHospital Course:   \n77 year-old White male with PMHx of A-fib (Xarelto d/c'd due to GIB), HTN, HLD, CKD, Type 2 DM, Asthma who presented to the ED due to hematemesis and melena s/p outpatient EGD with findings of gastritis and non-bleeding gastric ulcer which was biopsied 07/09. Patient states he has a hx of GIB - was hospitalized last year - and had an EGD at that time which was notable for erosive gastritis, erosive duodenitis and duodenal ulcer. He follows with Dr. Rizzo (GI) and was planned for EGD 07/09 to monitor these findings. Per chart review the EGD was notable for non-bleeding gastric ulcer with pigmented material - which was biopsied. There was also findings of gastritis. Patient stated he went home and had a hot and sour soup and thereafter had 2 episodes of dark bowel movements and one episode of vomiting bright red blood. \n\nPatient then called his gastroenterologist and was told to come into the ED. \n\n \n\nED Course\n\nWhile in the ED the patient's course is notable for large volume hematemesis and hypotension 90/50's. Labs were notable for Hg at lactate of 5.3, BUN of 56, Hg of 11.4 (with previous baseline 03/2024 in 14.5) s/p 1U. GI evaluated the patient - s/p urgent EGD with findings of clotted blood seen in the stomach, bright red blood seen in the duodenal bulb, and 5mm cratered ulcer at the pre-pyloric area s/p 2 clips.\n\n \n\nAt time of exam patient denied recent fevers, chills, SOB, chest pain, abdominal pain. Patient stated he was having melena and hematemesis after the endoscopy and a large volume of hematemesis in the ED with associated nausea. He denied any current nausea at the time of evaluation. He also states that at home prior to these episodes today - he was not having melena or hematemesis - he states he's been in his normal state of health. Last use of ASA was a few days ago. \n\n \n\nHoag 1\n\n7/11 Cleared by GI to resume ASA, unheld cardiovascular meds. Hemoglobin remains stable. Diet was advanced. \n\n7/12 Due to having one episode of bright red blood per rectum and experiencing dizziness, cardiovascular meds were reheld. Hemoglobin is slowly declining but still above transfusion threshold. PM CBC stable. Was orthostatics positive, pending repeat. \n\n7/13 hemoglobin has stabilized, low suspicion for active bleeding at this time. Orthostatics positive in AM, pending repeat.\n\n \n\nProcedures/\nDisposition: Home/ Self Care\n\n \n\n \n\nHome Health Face-to-Face Encounter Certification, Name of Physician for Home Care Follow Up: Peter Spiegler, MD, Medical condition which is related to the primary reason the patient needs home care (Diagnosis: GI bleed and Debility related to hospitalization. The following clinical findings support that the patient is homebound due to the need for help of another person to leave home and that the patient needs intermittent skilled nursing and/or therapy New diagnosis ., Skilled Nursing Needs (perform, assess and/or instruct):, RN as needed for change in clinical status, PT, OT, Speech Therapy/Language Pathology (perform, assess and/or instruct):, PT and/or OT for weakness or deconditioning, Physician Documentation:, I, a licensed physician, had a face to face encounter with the above-named patient on 7/10/2024 for the above medical condition(s) which is related to the primary reason the patient needs home care., I certify that the patient is under my care and I have initiated the establishment of the home health plan of care and that this patient will be followed by a physician who will periodically review the plan of care., Additional Comments or Instructions:\n\n‚Äã"
'''

In [None]:
temp = chat_payload.copy() # copy from the one set from config
temp.append({'role': 'user',
#             'content': 'Select a random number between 1 and 100,000'})
             'content': prompt.replace('{text}', note_text_test)})
gpt_result = request_gpt(temp, config) # supplying no third argument uses default parameters

if not gpt_result.get('choices'):
    print('bad payload')
else:
    if len(gpt_result.get('choices')) > 1:
        print('more than one result')
    for c in gpt_result.get('choices'):
        result = c.get('message').get('content')
        reason = c.get('finish_reason')
        print(reason)


In [None]:
postprocess_gpt_outputs(result)

#### Running GPT calls
Re-generating the patient friendly discharge summaries with the summaries that have the demographic values changed

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import os
import json
import re

In [16]:
counter_test = pd.read_csv('Perturbing/20250109_pertub_test.csv')

In [46]:
counter_test_short = counter_test.head(10).copy()

In [18]:
def gen_PF_sum_test(note_text):
    temp = [{'role': 'user', 'content': prompt.replace('{text}', note_text)}]
    gpt_result = request_gpt(temp, config)
    time.sleep(20)

    #print(gpt_result)

    if not gpt_result.get('choices'):
        print('bad payload')
        return None

    result = None
    for c in gpt_result.get('choices'):
        result = c.get('message').get('content')
        reason = c.get('finish_reason')
        print(reason)
        break

    if result is None:
        print("No valid content in choices")
        return None

    return postprocess_gpt_outputs(result)


In [47]:
counter_test_short_list = counter_test_short['race_1_change'].to_list()

In [None]:
counter_output_test = gen_PF_sum_test(counter_test_short_list[1])

In [70]:
counter_output_test

{'answers': ['What brought me to the hospital?',
  'You came to the hospital because you were having seizures and feeling unwell. You also had a hard time doing your daily tasks.',
  'Why was I hospitalized?',
  'You were in the hospital because you have a glioma and seizure-like activity. A glioma is a type of brain tumor. Seizure-like activity means you were having seizures, which are sudden, uncontrolled electrical disturbances in the brain.',
  'What happened in the hospital?',
  'While in the hospital, you had more seizures. The doctors found a glioma in your brain using a special picture called an MRI. They also found that you have cerebral venous sinus thrombosis, which is a blood clot in the brain, and psychogenic nonepileptic seizures, which are seizures that are not caused by abnormal brain activity. The doctors stopped some of your seizure medicine and changed your blood thinner medicine. They also talked to you, your mom, and your other doctors about your care plan.',
  'Wh

In [None]:
counter_test['PF_race_bl_2'] = counter_test['race_eth'].apply(gen_PF_sum_test)

In [None]:
counter_test.loc[counter_test['PF_race_bl_2'].isna(), 'PF_race_bl_2'] = (
    counter_test.loc[counter_test['PF_race_bl_2'].isna(), 'race_eth']
    .apply(gen_PF_sum_test)
)

In [34]:
counter_test.to_csv('20250131_counter_bl_2.csv')

In [None]:
counter_test3['PF_race_3'] = counter_test3['race_3_change'].apply(gen_PF_sum_test)

In [None]:
counter_master.loc[counter_master['PF_race_bl'].isna(), 'PF_race_bl'] = (
    counter_master.loc[counter_master['PF_race_bl'].isna(), 'race_eth']
    .apply(gen_PF_sum_test)
)


In [None]:
counter_master.loc[counter_master['PF_race_1'].isna(), 'PF_race_1'] = (
    counter_master.loc[counter_master['PF_race_1'].isna(), 'race_1_change']
    .apply(gen_PF_sum_test)
)

In [None]:
counter_master.loc[counter_master['PF_race_2'].isna(), 'PF_race_2'] = (
    counter_master.loc[counter_master['PF_race_2'].isna(), 'race_2_change']
    .apply(gen_PF_sum_test)
)

In [None]:
counter_master.loc[counter_master['PF_race_3'].isna(), 'PF_race_3'] = (
    counter_master.loc[counter_master['PF_race_3'].isna(), 'race_3_change']
    .apply(gen_PF_sum_test)
)

In [34]:
counter_master.to_csv('20250113_counter_master_complete.csv')

In [35]:
counter_gender = pd.read_csv('20250112counter_gender.csv')

In [None]:
counter_gender.loc[counter_gender['PF_gender'].isna(), 'PF_gender'] = (
    counter_gender.loc[counter_gender['PF_gender'].isna(), 'gender_counter']
    .apply(gen_PF_sum_test)
)

In [79]:
def extract_answers(cell):
    if isinstance(cell, dict):  # Check if it's a dict and has 'answers'
        return cell.get('answers')  # Extract the 'answers' key
    return None  # Return None if not a valid dict

In [80]:
# Apply to the column
counter_gender_short['PF_gender'] = counter_gender_short['PF_gender'].apply(extract_answers)