## How to use?

Install below libraries via terminal in the jupyter lab and then restart this notebook.
1. `pip install awscli==1.32.4` 
2. `pip install boto3==1.34.3`

You will need following credentails in order to run this notebook:
```
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["BEDROCK_ASSUME_ROLE"] = ""  
os.environ["BEDROCK_ENDPOINT_URL"] = ""
```
Once you set these OS variable you can push your data and run the bedrock model to get the results.

Run the 00 notebook if you want to pull some data from DB. As we do not support the DB queries directly without the config we need that to be run before.

In [None]:
from bedrock import get_bedrock_client
import os
import numpy as np
import pandas as pd
import shutil
import glob
from tqdm import tqdm
import ast
import json

from omegaconf import OmegaConf

from saiva.model.shared.load_raw_data import fetch_training_data, fetch_training_cache_data
from saiva.model.shared.database import DbEngine
from saiva.model.shared.constants import LOCAL_TRAINING_CONFIG_PATH
from saiva.training.utils import load_config
from saiva.model.shared.utils import get_client_class
# from saiva.model.shared.constants import CLIENT
from eliot import to_file, log_message
to_file(sys.stdout)


CLIENT = "avante"

In [None]:
# Add your bedrock creds here

os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["BEDROCK_ASSUME_ROLE"] = ""  
os.environ["BEDROCK_ENDPOINT_URL"] = ""

In [None]:
# Loading training configs so we can connect to DB

config = load_config(LOCAL_TRAINING_CONFIG_PATH)
training_config = config.training_config

In [None]:
# A query to get the notes data for avante, the client has to be changed in the 00 notebook config files.

for organization_config in training_config.organization_configs:
    engine = DbEngine()
    saiva_engine = engine.get_postgresdb_engine()
    client_sql_engine = engine.get_sqldb_engine(
        db_name=organization_config.datasource.source_database_name,
        credentials_secret_id=organization_config.datasource.source_database_credentials_secret_id,
        query={"driver": "ODBC Driver 17 for SQL Server"}
    )
    
    # verify connectivity
    engine.verify_connectivity(client_sql_engine)
    
    client = "avante", 
    train_start_date = "2023-01-01"
    test_end_date = "2023-12-01"
    
    facilities_query = "SELECT FacilityID FROM view_ods_facility WHERE LineOfBusiness = 'SNF' AND Deleted='N'"
    facilities = pd.read_sql(facilities_query, con=client_sql_engine)
    facilities = facilities["FacilityID"].values.tolist()
    facilities = ",".join(str(facility_id) for facility_id in facilities)
    
    mpid_query = f'''
                SELECT DISTINCT patientid, facilityid, masterpatientid, allergies, patientmrn
                FROM view_ods_facility_patient
                WHERE facilityid IN ({facilities}) and ((patientdeleted is NULL or patientdeleted='N') and 
                (masterpatientdeleted  is NULL or masterpatientdeleted='N'))
                '''
    mpIds = pd.read_sql(mpid_query, con=client_sql_engine)
    
    notes_query = f"""select patientid, facilityid, progressnoteid, progressnotetype, createddate, effectivedate, sectionsequence,
                        section, notetextorder, notetext, highrisk, showon24hr, showonshift
                        from view_ods_progress_note
                        where createddate between '{train_start_date}' and '{test_end_date}'
                        and facilityid in ({facilities})
                """
    notes_df = pd.read_sql(notes_query, con=client_sql_engine)
    notes_df = notes_df.merge(mpIds[['facilityid', 'patientid', 'masterpatientid', 'patientmrn']],
                    on=["patientid", "facilityid"])


In [None]:
# Filter the data here so we can use it to feed directly to GenAI model

notes_df['createddate'] = pd.to_datetime(notes_df['createddate'])
filtered_df = notes_df[(notes_df['createddate'] > '2023-10-01') & (notes_df['createddate'] < '2023-10-31')]
notes_type = ['eMAR-Medication Administration Note', 'Encounter', '* General NURSING Note', 'Infection Note', 'Skilled Nursing Note',
              'eMar - Shift Level Administration Note']
filtered_df = filtered_df[filtered_df['progressnotetype'].isin(notes_type)]

## BedRock API

In [None]:
boto3_bedrock, _ = get_bedrock_client(region="us-east-1")

In [None]:
def generate_text(text,
                      max_tokens_to_sample=300,
                      temperature=0.5,
                      top_k=250,
                      top_p=0.5,
                      stop_sequences=[]
                      ):
        """
        A basic method to connect and get an output from bedrock models.
        In future if there will be a parameter change in the function arguments then we will need to test that before making the change.
        """
        try:
            body = json.dumps({"prompt": text,
                        "max_tokens_to_sample":max_tokens_to_sample,
                        "temperature":temperature,
                        "top_k":top_k,
                        "top_p":top_p,
                        "stop_sequences":stop_sequences
                        })

            accept = 'application/json'
            contentType = 'application/json'
            response = boto3_bedrock.invoke_model(body=body, modelId='anthropic.claude-v2', accept=accept, contentType=contentType)
            response_body = json.loads(response.get('body').read())
            result = response_body.get('completion')
            return result
        except Exception as e:
            log_message(
                message_type='error',
                message=f'EXCEPTION: Error in connecting to Bedrock API. {e}'
            )
            return None

In [None]:
# ##############################################################################################################
# prompt is one of the most needed item here. If you do not tweak this as per your need then you will fail to get good results.
# So please update the below prompt as per your needs and do not commit those changes as this prompt is for example.
# What is a good prompt anyway?
#  A good prompt is with the following details
#    - Assign a role to your model such as ML engineer, SNF Nurse etc
#    - Provide example of what you want the model to find or do or take care of while doing the task
#    - Create some rules that your model can follow to get you the desired output
# ##############################################################################################################

prompt = """ "\n\nHuman:"
            Suppose you are a nurse at a Skilled Nursing Facility(SNF) and you take care of many patients at a facility.
            Our main objective is to find words that can lead to indication of missing medication order. For example:
            
            1. "Epoetin Alfa Solution 10000 UNIT/ML Inject 10000 unit subcutaneously one time a day every Tue, Thu, Sat for anemia
            on order not available" - here keyword is `order not available`
            2. order on hold, per MD - here keyword is 'order on hold'
            So our main goal is to collect such keywords. These are just examples so please try to use your knowledge to find similar keywords
            Below is the progress note that you have to read and find any keywords that can we can use to catch such missing medications.
            <text>
                {progress_notes}
            </text>
            Your output should be a list of keywords or keywor example - ['order on hold'] or ['order on hold', `order not available`]
            If there are not keywords then output should be only and only []. Do not write any text apart from empty list when no keywords.
            Stricly obey the rules for giving the output. Do not write anything.
            
            Assistant:
            """

In [None]:
# Please update the following code block in order to define how you want to collect the results from model.
results = []

for index, row in filtered_df.iterrows():
    try:
        note = row['notetext']
        text = prompt.format(progress_notes=note)

        result = generate_text(text)
        results.append(result)
    except Exception as ex:
        print("Error: ", ex)
        results.append([])

In [None]:
# Assigning the results as column in DF
filtered_df['keywords'] = results

In [None]:
# storing it back as CSV for post run analysis
filtered_df.to_csv("filtered_notes_with_genai.csv", index=False)