In [2]:
import boto3
import os
from botocore.exceptions import ClientError
import json

In [None]:
os.environ["AWS_PROFILE"] = "profile_name"
os.environ["AWS_DEFAULT_REGION"] = "us-west-2"

In [None]:
sts = boto3.client("sts")
BEDROCK_ROLE_ARN = "role_arn"
sts.assume_role(
    RoleArn=BEDROCK_ROLE_ARN,
    RoleSessionName="session-name",
)

In [None]:
TABLE = "diseases-attributes"
dynamodb = boto3.client("dynamodb")

# grab index 114
try:
    response = dynamodb.get_item(
        TableName=TABLE,
        Key={"csdi_code": {"S": "114"}},
    )
    print(response["Item"])
except Exception as e:
    print(e)
    print("Error getting item from table")
    raise e

In [6]:
def get_csdi_objects(codes: list[str]) -> list[dict]:
    try:
        response = dynamodb.batch_get_item(
            RequestItems={
                TABLE: {
                    "Keys": [{"csdi_code": {"S": code}} for code in codes],
                },
            },
        )
        return response["Responses"][TABLE]
    except ClientError as e:
        print(e)
        print("Error getting items from table")
        raise e

In [None]:
get_csdi_objects(["114", "115", "116"])

In [None]:
# BEDROCK_MODEL = "amazon.titan-text-lite-v1"
BEDROCK_MODEL = "amazon.titan-text-express-v1"

DELIMITER = "***"

STRUCTURING_PROMPT = f"""
The following JSON objects have information about particular conditions, 
including associated disorders, medications, and observations/symptoms. 

For each JSON object representing a condition, describe a patient with 
the condition. Follow the general format:

"The patient has <disease-name>. They have <disorders> and have been 
observed to experience <observations / symptoms>. They are currently 
taking the medications <medications>."

Replace the <...> with the appropriate information from the JSON object.

Delimit each response with {DELIMITER} and nothing else. Don't return anything else; 
just the responses. Be sure to return one response per code.

"""
bedrock = boto3.client("bedrock-runtime")

DEBUG = False
MAX_ATTEMPTS = 3


def construct_semantic_context(code_objs: list[dict]) -> dict:
    try:
        full_prompt = STRUCTURING_PROMPT + "\n\n" + json.dumps(code_objs)
        if DEBUG:
            print("Prompting Bedrock with:")
            print(full_prompt)
        native_request = {
            "inputText": full_prompt,
            "textGenerationConfig": {
                "maxTokenCount": 512,
                "temperature": 0.1,
                "topP": 0.9,
            },
        }
        request = json.dumps(native_request)
        print("Making Bedrock API request")
        response = bedrock.invoke_model(
            modelId=BEDROCK_MODEL,
            body=request,
        )
        print("Bedrock response received")
        body = json.loads(response["body"].read())
        print(response)

        input_tokens = response["ResponseMetadata"]["HTTPHeaders"][
            "x-amzn-bedrock-input-token-count"
        ]
        output_tokens = response["ResponseMetadata"]["HTTPHeaders"][
            "x-amzn-bedrock-output-token-count"
        ]
        print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}")

        code_docs = [
            i.strip()
            for i in body["results"][0]["outputText"].split(DELIMITER)
            if i.strip()
        ]
        return code_docs
    except ClientError as e:
        print(e)
        print("Error invoking bedrock endpoint")
        raise e


def construct_semantic_context_retryable(code_objs: list[dict]) -> dict:
    attempts = 0
    while attempts < MAX_ATTEMPTS:
        try:
            docs = construct_semantic_context(code_objs)
            if len(docs) == len(code_objs):
                return docs
            else:
                attempts += 1
                print(
                    f"Attempt {attempts} failed; {len(docs)} docs returned for {len(code_objs)} codes"
                )
        except ClientError as e:
            attempts += 1
            print(f"Attempt {attempts} failed because of an error {e}")
    print("Max attempts reached")
    raise Exception("Max attempts reached")

In [None]:
csdi_codes = ["114", "115", "116"]
code_docs = construct_semantic_context_retryable(get_csdi_objects(csdi_codes))
code_docs

In [77]:
code_docs

['The patient has [End stage renal disease in the context of End stage renal disease [disorder] ();Dependence on hemodialysis [finding] ();Continuous renal replacement therapy [procedure] ()] and has been observed to experience fatigue, anemia, edema, nausea, vomiting, decreased appetite, muscle cramps, itching, bone pain, and cognitive impairment. They are currently taking erythropoietin, calcitriol, phosphate binders, antihypertensive medications (e.g., ACE inhibitors, ARBs), diuretics, anticoagulants (e.g., heparin, warfarin), and protamine sulfate.',
 'The patient has [Severe allergic reaction to protamine sulfate in the context of Protamine sulfate [substance] ();Protamine allergy [disorder] ()] and has been observed to experience anaphylaxis, hypotension, bronchospasm, urticaria, angioedema, nausea, vomiting, diarrhea, and abdominal pain. They are currently taking epinephrine, antihistamines, and corticosteroids.',
 'The patient has [Severe allergic reaction after previous dose o

In [60]:
def embed_doc(doc: str) -> str:
    """
    Takes a document and embeds it using a Bedrock embedding model.
    """
    EMBED_MODEL = "amazon.titan-embed-text-v2:0"
    try:
        native_request = {
            "inputText": doc,
            "dimensions": 1024,
            "normalize": True,
            "embeddingTypes": ["binary"],
        }
        request = json.dumps(native_request)
        response = bedrock.invoke_model(
            modelId=EMBED_MODEL,
            body=request,
            accept="application/json",
            contentType="application/json",
        )
        vec = json.loads(response["body"].read())["embeddingsByType"]["binary"]
        return vec
    except ClientError as e:
        print(e)
        print("Error invoking bedrock endpoint")
        raise e

In [61]:
def embed_csdi_codes(codes: list[str]) -> list[int]:
    """
    Embeds the documents for the given CSDI codes.
    """
    csdi_objects = get_csdi_objects(codes)
    print("Number of objects:", len(csdi_objects))
    code_docs = construct_semantic_context(csdi_objects)
    print(code_docs)
    return [embed_doc(doc) for doc in code_docs]

In [None]:
codes = ["114", "115", "116"]
vec = embed_csdi_codes(codes)
vec

In [67]:
import numpy as np

embeds = np.array(vec)
embeds

array([[0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 1, 1, 0],
       [0, 1, 0, ..., 0, 1, 1],
       ...,
       [0, 1, 0, ..., 1, 1, 1],
       [1, 1, 0, ..., 1, 1, 1],
       [0, 1, 0, ..., 1, 1, 1]], shape=(8, 1024))