# Prepare-Rewrite-Retrieve RAG flow

In this notebook we will explore the data retrieval and generation process of the **prepare-then-rewrite-then-retrieve-then-read** framework proposed by the authors of ["Meta Knowledge for Retrieval Augmented Large Language Models"](https://www.amazon.science/publications/meta-knowledge-for-retrieval-augmented-large-language-models) for creating more accurate and enriched RAG workflows.

## Pre-requisites

To run this notebook your role executing the notebook needs:

* Permissions to invoke Bedrock
* Access to the Amazon Nova Pro model
* Write permissions to the DynamoDB table created with the CDK stack in this PoC
* Write permissions to the OpenSearch Serverless host created with the CDK stack in this PoC
* Having executed the [DataIndexingWithInfra.ipynb](./DataIndexingWithInfra.ipynb) notebook

Additionally, we need the following python packages:

In [None]:
!pip install -U boto3 langchain langchain-aws opensearch-py dotenv

In [2]:
import os
import re
import logging
import json
import secrets
import time
import boto3
import langchain_core

from dotenv import load_dotenv

from pydantic import BaseModel, Field
from typing import Literal

from enum import Enum
from PyPDF2 import PdfReader
from botocore.exceptions import ClientError
from langchain_aws import ChatBedrockConverse

from prompts.dataRetrieval.generate_query_augmentation_prompts import get_query_augmentation_prompt_selector, get_structured_questions_prompt_selector
from prompts.dataRetrieval.generate_qa_kb_prompts import get_kb_qa_prompt_selector
from structured_output.answers import Answer
from structured_output.questions import Questions

from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain_aws.embeddings import BedrockEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth

In [None]:
logger = logging.getLogger()
langchain_core.globals.set_debug(False)
load_dotenv()

### Type definitions

In [4]:
# Customize according to the types of document to be processed by the application
class DocumentTypes(Enum):
    SYSTEM_ARCHITECTURE = "systems architecture"
    SECURITY = "information technology security"
    DATA_GOVERNANCE = "data governance"
    TECH_STRATEGY = "tech strategy"
    MANAGEMENT = "management"

class AnalysisPerspectives(Enum):
    SECURITY = "software security engineer"
    DATA_GOVERNANCE = "data governance"
    RESILIENCY = "systems resiliency"
    SYS_OPS = "systems operations"

# Persona definition for generating and answering QA
AnalysisPersonas = {
    "software security engineer": {
        "description": "It is responsible for ensuring that workloads have the necessary security controls in place",
        "perspectives": [AnalysisPerspectives.SECURITY.value, AnalysisPerspectives.DATA_GOVERNANCE.value]
    },
    "solutions architect": {
        "description": "It is responsible for designing scalable and cost-efficient software solutions",
        "perspectives": [AnalysisPerspectives.RESILIENCY.value, AnalysisPerspectives.DATA_GOVERNANCE.value, AnalysisPerspectives.SECURITY.value]
    },
    "software developer": {
        "description":"Implements the system functionalities",
        "perspectives": [AnalysisPerspectives.SYS_OPS.value, AnalysisPerspectives.RESILIENCY.value, AnalysisPerspectives.SECURITY.value]
    }
}

### Execution Variables

In [5]:
BEDROCK_MODEL_ID = "us.amazon.nova-lite-v1:0"
EMBEDDINGS_MODEL_ID="amazon.titan-embed-text-v2:0"

OPENSEARCH_HOST = <OPENSEARCH_HOST> #URL (without the protocol) of the OpenSearch Serverless Host
OPENSEARCH_PORT = 443 #Port of the OpenSearch Serverless Host
OPERNSEARCH_INDEX_NAME = <OPENSEARCH_INDEX_NAME> #Name of the OpenSearch Serverless Index

METAKB_DYNAMODB_TABLE_NAME = <METAKB_DYNAMODB_TABLE_NAME> #Name of the DynamoDB table created with the CDK stack in this PoC

In [None]:
ANALYSIS_PERSONNA = "solutions architect"
ANALYSIS_PERSPECTIVE = AnalysisPersonas[ANALYSIS_PERSONNA]["perspectives"][0]

print(f"Using persona: {ANALYSIS_PERSONNA}")
print(f"Using perspective: {ANALYSIS_PERSPECTIVE}")

## Create clients for AWS services

In [7]:
botoSession = boto3.Session()

meta_kb_table = botoSession.resource("dynamodb").Table(METAKB_DYNAMODB_TABLE_NAME)
bedrock_runtime = boto3.client('bedrock-runtime')

## Helper functions

In [8]:
def encode_text(
        text: str = None,  # the text to be encoded
        dimension: int = 1024,  # 1,024 (default), 384, 256
):
    "Get text embedding using embeddings model"

    payload_body = {
        "inputText": text,
        "dimensions": dimension,
        "normalize": True
    }

    #print("embedding text")
    #print(payload_body)

    response = bedrock_runtime.invoke_model(
        body=json.dumps(payload_body),
        modelId=EMBEDDINGS_MODEL_ID,
        accept="application/json",
        contentType="application/json"
    )

    feature_vector = json.loads(response.get("body").read())["embedding"]

    #print("text embedding")
    #print(feature_vector)

    return feature_vector

def qa_chatbot_answer(
    user_query,
    role,
    perspective,
    context
):
    "Answer query given context using LLMs"

    rag_llm = ChatBedrockConverse(
        model=BEDROCK_MODEL_ID,
        temperature=0.4,
        max_tokens=1000,
        # other params...
    )

    LLM_KB_QA_PROMPT_SELECTOR = get_kb_qa_prompt_selector(lang="en")
    
    gen_kb_qa_prompt = LLM_KB_QA_PROMPT_SELECTOR.get_prompt(BEDROCK_MODEL_ID)
    
    kb_qa_generate = gen_kb_qa_prompt | rag_llm.with_structured_output(Answer)

    rag_qa = kb_qa_generate.invoke(
        {
            "question": user_query,
            "role": role,
            "perspective": perspective,
            "context": context
        }
    )

    return rag_qa

def get_opensearch_connection(
        host: str,
        port: int,
) -> OpenSearch:
    "Establishes a connection to an OpenSearch cluster using AWSV4SignerAuth for authentication."

    # Create an AWSV4SignerAuth instance for authentication
    auth = AWSV4SignerAuth(
        boto3.Session(
            region_name=os.getenv("AWS_REGION")
        ).get_credentials(),
        os.getenv("AWS_REGION"),
        "aoss"
    )

    # Create an OpenSearch client instance
    client = OpenSearch(
        hosts=[{"host": host, "port": port}],
        http_auth=auth,
        use_ssl=True,
        verify_certs=True,
        connection_class=RequestsHttpConnection,
        timeout=30,
    )

    # Return the OpenSearch client instance
    return client

def get_existing_summary(
        user,
        perspective
):
    "Get an existing summary, if exists, for a combination of user and perspective."

    print("Trying to get summary")
    print(f"summary key: {user}-{perspective}")

    try:
        response = meta_kb_table.get_item(
            Key={
                "summary_key": f"{user}-{perspective}",
            }
        )
        if "Item" in response:
            item = response["Item"]
            return item["summary"]
        else:
            return ""
    except ClientError as ex:
        print(f"Summary for {user}-{perspective} does not exist")
        raise ex

def search_knowledge_base(
    index_name,
    embedding,
    oss_client,
    persona,
    perspective,
    k=3
):
    "Given the parameter retrieve context from the KB"

    print(f"Looking for data for {persona} and {perspective}")
    
    matched_qa_pairs = []

    body = {
        "size": k,
        "_source": {
            "exclude": ["embedding"],
        },
        "query":
            {
                "knn":
                    {
                        "embedding": {
                            "vector": embedding,
                            "k": k,
                        }
                    }
            },
        "post_filter": {
            "bool": {
                "filter": [
                    {"term": {"persona": persona}},
                    {"term": {"perspective": perspective}}
                ]
            }
        }
    }

    res = oss_client.search(index=index_name, body=body)

    print("The results")
    print(res)

    for hit in res["hits"]["hits"]:
        matched_qa_pairs.append((hit["_source"]["question"], hit["_source"]["answer"]))

    return matched_qa_pairs

def augment_user_query(
        role,
        user_query,
        mk_summary,
):
    "Augment the user query with additional queries based on the meta-knowledge summary."
    
    query_augmentation_llm = ChatBedrockConverse(
        model=BEDROCK_MODEL_ID,
        temperature=0.4,
        max_tokens=2000,
        # other params...
    )
    
    LLM_AUGMENT_QUERY_PROMPT_SELECTOR = get_query_augmentation_prompt_selector(lang="en")
    
    gen_queries_prompt = LLM_AUGMENT_QUERY_PROMPT_SELECTOR.get_prompt(BEDROCK_MODEL_ID)
    structured_queries = query_augmentation_llm.with_structured_output(Questions)
    
    structured_queries_generate = gen_queries_prompt | structured_queries

    augmented_queries = structured_queries_generate.invoke(
        {
            "role": role,
            "mk_summary": mk_summary,
            "user_query": user_query
        }
    )

    return augmented_queries

## Question answering workflow with RAG

In [9]:
QUERY = "What is the purpose of the multi-agent compliance analysis project?"

### Query augmentation

In this step the original query is augmented using the meta-knowledge summary for the persona-perspective combination

In [None]:
meta_kb_summary = get_existing_summary(ANALYSIS_PERSONNA, ANALYSIS_PERSPECTIVE)

In [11]:
augmented_queries = augment_user_query(
    role=ANALYSIS_PERSONNA,
    user_query=QUERY,
    mk_summary=meta_kb_summary
)

In [None]:
print("Augmenting the query:")
print(QUERY)

print("\n\nExisting summary:")
print(meta_kb_summary)

print("\n\nResulting queries:")
for question in augmented_queries.questions:
    print(question)

### Question answering

We can now take the augmented queries and retrieve information from the knowledge base using the augmented queries rather than the original query wich we will pass onto the LLM as context for the question answering

In [13]:
opensearch_client = get_opensearch_connection(OPENSEARCH_HOST, OPENSEARCH_PORT)

In [None]:
qa_str = ""
qa_pairs = []

for query in augmented_queries.questions:
    
    embedding = encode_text(text=query)
    retrieved_qa_pairs = search_knowledge_base(
        oss_client=opensearch_client,
        index_name=OPERNSEARCH_INDEX_NAME,
        embedding=embedding,
        persona=ANALYSIS_PERSONNA,
        perspective=ANALYSIS_PERSPECTIVE,
        k=3
    )

    qa_pairs.extend(retrieved_qa_pairs)

Looking at the results we can observe that we obtain indeed more comprehensive information but more fine-grained thanks to the indexing of Q&A pairs rather than chunks

In [None]:
for qa_pair in qa_pairs:
    print(f"Question: {qa_pair[0]}")
    print(f"Answer: {qa_pair[1]}\n\n")

### Answering the original query with the context from augmented queries

We use the retrieved information from the augmented queries as context to answer the original question

In [16]:
qa_str = qa_str.join(f"Question:{qa[0]}\nAnswer:{qa[1]}\n\n" for qa in qa_pairs)

answer = qa_chatbot_answer(
    user_query=QUERY,
    role=ANALYSIS_PERSONNA,
    perspective=ANALYSIS_PERSPECTIVE,
    context=qa_str
)

In [None]:
print(QUERY)
print(answer.answer)

In [None]:
answer