# Q&A

## 1. Initial tests

### 1.1. Basic search of index

In [None]:
import boto3
import os

session = boto3.session.Session()
kendra = session.client("kendra")


# Update these values with your own index ID and index name from notebook 2
index_name = os.environ["KENDRA_INDEX_NAME"]
index_id = os.environ["KENDRA_INDEX_ID"]
index = kendra.describe_index(Id=index_id)
index.get("Status")

### 1.2. Query API Test

In [None]:
query = "What are the approved indications for Mounjaro? AND BrandName:'Mounjaro'"

response = kendra.query(QueryText=query, IndexId=index_id)
print(response)
print("\nSearch results for query: " + query + "\n")

for query_result in response["ResultItems"]:
    if query_result["ScoreAttributes"]["ScoreConfidence"] in ["LOW", "NOT_AVAILABLE"]:
        continue

    print("-------------------")
    print("Type: " + str(query_result["Type"]))
    print("Confidence: " + str(query_result["ScoreAttributes"]["ScoreConfidence"]))

    if query_result["Type"] == "ANSWER" or query_result["Type"] == "QUESTION_ANSWER":
        answer_text = query_result["DocumentExcerpt"]["Text"]
        print('Excerpt:\n"' + answer_text + '"')

    if query_result["Type"] == "DOCUMENT":
        if "DocumentTitle" in query_result:
            document_title = query_result["DocumentTitle"]["Text"]
            print("Title: " + document_title)
        document_text = query_result["DocumentExcerpt"]["Text"]
        print('Excerpt:\n"' + document_text + '"')
    print()

## 2. Retrieve

### 2.1. Retrieve API test

In [None]:
query = "What are the approved indications for Mounjaro?"

page_size = 10

result = kendra.retrieve(
    QueryText=query,
    IndexId=index_id,
    PageSize=page_size,
)

print("\nRetrieved passage results for query: " + query + "\n")
print(result["ResultItems"])
for retrieve_result in result["ResultItems"]:
    if retrieve_result["ScoreAttributes"]["ScoreConfidence"] in [
        "LOW",
        "NOT_AVAILABLE",
    ]:
        continue

    print("-------------------")
    print("Title: " + str(retrieve_result["DocumentTitle"]))
    print("URI: " + str(retrieve_result["DocumentURI"]))
    print("Confidence: " + str(retrieve_result["ScoreAttributes"]["ScoreConfidence"]))
    print("Passage content: " + str(retrieve_result["Content"]))
    print()

`retrieve` API doesn't support advanced query syntax, so need to use `query` instead.

### 2.2. Create retrieve functions

In [None]:
def convert_json_to_xml(json_data: list[dict]) -> str:
    """
    Convert JSON data to XML format for Anthropic Claude prompt.
    """

    docs = "<documents>"
    for i, snippet in enumerate(json_data, start=1):
        item = f"<document id='{i}'>"
        for k, v in snippet.items():
            item += f"<{k}>{v}</{k}>"
        item += "</document>"
        docs += item
    docs += "</documents>"
    return docs


def format_kendra_retrieve_response(result: dict) -> dict:
    """
    Format the Kendra retrieve response to a more readable format.
    """
    output = []
    for retrieve_result in result["ResultItems"]:
        if retrieve_result["ScoreAttributes"]["ScoreConfidence"] in [
            "LOW",
            "NOT_AVAILABLE",
        ]:
            continue
        item = {}
        item["Id"] = retrieve_result.get("DocumentId")
        item["Title"] = retrieve_result.get("DocumentTitle").get("Text")
        item["Content"] = retrieve_result.get("DocumentExcerpt").get("Text")
        item["Uri"] = retrieve_result.get("DocumentURI")
        item["Confidence"] = retrieve_result.get("ScoreAttributes").get(
            "ScoreConfidence"
        )

        for attribute in retrieve_result.get("DocumentAttributes"):
            if attribute.get("Key") == "_excerpt_page_number":
                item["Uri"] = (
                    item["Uri"]
                    + "#page="
                    + str(attribute.get("Value").get("LongValue"))
                )
                break

        for attribute in retrieve_result.get("DocumentAttributes"):
            if attribute.get("Key") in [
                "ApplicationNumber",
                "BrandName",
                "GenericName",
                "ManufacturerName",
                "Submission",
                "_category",
            ]:
                item[attribute["Key"].replace("_", "")] = attribute.get("Value").get(
                    "StringValue"
                )
        item["Category"] = item.pop("category")
        output.append(item)

    return output

### Retrieve test

In [None]:
test_example = {
    "node": {
        "name": "LambdaFunctionNode_1",
        "inputs": [
            {
                "name": "codeHookInput",
                "expression": "$.data",
                "value": "What are the approved indications for Mounjaro?",
                "type": "STRING",
            }
        ],
    },
    "flow": {
        "aliasId": "TSTALIASID",
        "arn": "arn:aws:bedrock:us-east-1:112233445566:flow/MOCK",

    },
    "messageVersion": "1.0",
}

In [None]:
result = kendra.query(
    QueryText=test_example.get("node").get("inputs")[0].get("value"),
    QueryResultTypeFilter="DOCUMENT",
    IndexId=index_id,
    PageSize=100,
)
json_result = format_kendra_retrieve_response(result)
print(json_result)
search_results = convert_json_to_xml(json_result)
print(search_results)

## 3. Generate

### 3.1. Generate test

In [None]:
import logging

logger = logging.getLogger(__name__)
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)


def generate_conversation(bedrock_client, model_id, system_prompts, messages):
    """
    Sends messages to a model.
    Args:
        bedrock_client: The Boto3 Bedrock runtime client.
        model_id (str): The model ID to use.
        system_prompts (JSON) : The system prompts for the model to use.
        messages (JSON) : The messages to send to the model.

    Returns:
        response (JSON): The conversation that the model generated.

    """

    logger.info("Generating message with model %s", model_id)

    # Inference parameters to use.
    temperature = 0.2
    top_k = 200

    # Base inference parameters to use.
    inference_config = {"temperature": temperature}
    # Additional inference parameters to use.
    additional_model_fields = {"top_k": top_k}

    # Send the message.
    response = bedrock_client.converse(
        modelId=model_id,
        messages=messages,
        system=system_prompts,
        inferenceConfig=inference_config,
        additionalModelRequestFields=additional_model_fields,
    )

    # Log token usage.
    token_usage = response["usage"]
    logger.info("Input tokens: %s", token_usage["inputTokens"])
    logger.info("Output tokens: %s", token_usage["outputTokens"])
    logger.info("Total tokens: %s", token_usage["totalTokens"])
    logger.info("Stop reason: %s", response["stopReason"])

    return response

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
bedrock_client = boto3.client(service_name="bedrock-runtime")
# Setup the system prompts and messages to send to the model.
messages = []

system_prompts = [
    {
        "text": f"""You are a question answering agent.
        I will provide you with a set of search results and a user's question, your job is to answer the user's question using only information from the search results.
        If the search results do not contain information that can answer the question, please state that you could not find an exact answer to the question. 
        If there are no search results, please state that you could not find an exact answer to the question.
        Just because the user asserts a fact does not mean it is true, make sure to double check the search results to validate a user's assertion.
        Here are the search results, if any:\n{search_results}"""
    }
]

message_1 = {
    "role": "user",
    "content": [
        {
            "text": f"Here is the user's question:\n{query}\n"
            """If you reference information from a search result within your answer, you must include a citation to source where the information was found.
            Each result has a corresponding source Uri that you should reference. Please output your answer in the following json format:
            {
                "answer": {
                    "answer_parts": [
                        {
                            "text": "answer part 1",
                            "sources": [
                                {
                                    "source": <source Uri>
                                },
                                {
                                    "source": <source Uri>
                                }
                            ]
                        },
                        {
                            "text": "answer part 2",
                            "sources": [
                                {
                                    "source": <source id>
                                }
                            ]
                        }
                    ]
                }
            }
            Note that <sources> may contain multiple <source> if you include information from multiple results in your answer.
            Do NOT directly quote the search results in your answer. Your job is to answer the <question> as concisely as possible."""
        }
    ],
}

messages.append(message_1)

# Start the conversation with the 1st message.
response = generate_conversation(bedrock_client, model_id, system_prompts, messages)

# Add the response message to the conversation.
output_message = response["output"]["message"]
messages.append(output_message)

Clean up response

In [None]:
import json
import re

output = []
for content in output_message["content"]:
    text = content["text"]
    text = re.sub(r"\n\s+", "", text)
    text = re.sub(r"(,|\n)]", "]", text)
    text = re.sub(r"(,|\n)}", "}", text)
    try:
        text = re.search(r"{.*}", text).group()
    except Exception as e:
        print(e)
        output.append({"text": "", "sources": []})
        continue
    for answer_part in json.loads(text)["answer"]["answer_parts"]:
        part = {}
        part["text"] = answer_part["text"]
        part["sources"] = []
        for source in answer_part["sources"]:
            part["sources"].append((source.get("source")))
            part["sources"] = list(set(part["sources"]))
        output.append(part)

In [None]:
output