## Option 1 - SQL Query Based Self Managed RAG with Aurora Vector Database

Prerequisites before you run these scripts : 
1. Deploy an Aurora PostgreSQL Cluster with RDS Data API enabled
2. Create the vector db schema, table & index using self-managed/1_build_vector_db_on_aurora.sql
3. Note the cluster ARN from the Aurora PostgreSQL Cluster
4. Note the secret Key ARN for the Aurora cluster database username/password.
5. Create a secret key for the database user app_user (used for RLS)
   

#### Install the boto3 library.

In [None]:
%pip install -U boto3==1.34.84
%pip install pypdf
%pip install langchain==0.2.7
%pip install langchain-community==0.2.3


### Restart the Kernel

In [None]:
# restart kernel
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")

### Imports and clients bedrock_agent, bedrock-agent-runtime, bedrock-runtime,S3

In [None]:
import boto3
import json
import uuid
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader

region_name = "us-west-2"

# Create the client for Bedrock
bedrock_runtime = boto3.client(service_name="bedrock-runtime", region_name=region_name)

# Create the client for RDS Data API
rdsData = boto3.client(service_name="rds-data", region_name=region_name)

# Aurora Database Configurations
db_name = "postgres"
cluster_arn = "<update aurora cluster arn>"

# User postgres
secret_arn = (
    "<update postgres secret ARN>"
)
# User - app_user
secret_arn_rls = (
    "<update app_user secret ARN>"
)


### Function to generate vector embeddings

In [None]:
def generate_vector_embeddings(data):
    body = json.dumps(
        {
            "inputText": data,
        }
    )

    # Invoke model
    response = bedrock_runtime.invoke_model(
        body=body,
        modelId="amazon.titan-embed-text-v1",
        accept="application/json",
        contentType="application/json",
    )

    response_body = json.loads(response["body"].read())
    embedding = response_body.get("embedding")

    return embedding


### Function to insert vector embeddings into vector database

In [None]:
def insert_into_vector_db(embedding, chunk, metadata, tenantid):
    # Insert query parameters
    param1 = {"name": "id", "value": {"stringValue": str(uuid.uuid4())}}
    param2 = {"name": "embedding", "value": {"stringValue": str(embedding)}}
    param3 = {"name": "chunks", "value": {"stringValue": chunk}}
    param4 = {"name": "metadata", "value": {"stringValue": json.dumps(metadata)}, "typeHint": "JSON"}
    param5 = {"name": "tenantid", "value": {"stringValue": tenantid}}
    paramSet = [param1, param2, param3, param4, param5]

    # Invoke the Insert query using RDS Data API
    response = rdsData.execute_statement(
        resourceArn=cluster_arn,
        secretArn=secret_arn,
        database=db_name,
        sql="INSERT INTO self_managed.kb(id, embedding, chunks, metadata, tenantid) VALUES (:id::uuid,:embedding::vector,:chunks, :metadata, :tenantid::varchar(10))",
        parameters=paramSet,
    )

    return response



### Function to query the vector database

In [None]:
def query_vector_database(embedding):
    paramSet = [{"name": "embedding", "value": {"stringValue": str(embedding)}}]

    response = rdsData.execute_statement(
        resourceArn=cluster_arn,
        secretArn=secret_arn,
        database=db_name,
        sql="SELECT id,metadata,chunks FROM self_managed.kb ORDER BY embedding <=> :embedding::vector LIMIT 5; ",
        parameters=paramSet,
    )

    return response


### Function to query the vector database using RLS

In [None]:
# function to query the vector database using L2 distance
def query_vector_database_using_rls(embedding, tenantid):
    paramSet = [{"name": "embedding", "value": {"stringValue": str(embedding)}}]

    query = "SET self_managed.kb.tenantid =\""+ str(tenantid) +"\""
    print(query)

    tr = rdsData.begin_transaction(
        resourceArn = cluster_arn,
        secretArn = secret_arn_rls,
        database = db_name)

    rdsData.execute_statement(resourceArn=cluster_arn,
                            secretArn=secret_arn_rls,
                            database=db_name,
                            sql=query,
                            transactionId = tr['transactionId'])

    response = rdsData.execute_statement(resourceArn=cluster_arn,
                                        secretArn=secret_arn_rls,
                                        database=db_name,
                                        sql='SELECT id,tenantid,metadata,chunks FROM self_managed.kb ORDER BY embedding <=> :embedding::vector LIMIT 5; ',
                                        parameters=paramSet,
                                        transactionId = tr['transactionId'])

    cr = rdsData.commit_transaction(
        resourceArn = cluster_arn,
        secretArn = secret_arn_rls,
        transactionId = tr['transactionId'])

    return response


### Function to Invoke Anthrophic Claude LLM on Bedrock

In [None]:
def generate_message(bedrock_runtime, model_id, system_prompt, messages, max_tokens):

    body=json.dumps(
        {
            "anthropic_version": "bedrock-2023-05-31",
            "max_tokens": max_tokens,
            "system": system_prompt,
            "messages": messages
        }  
    )  

    response = bedrock_runtime.invoke_model(body=body, modelId=model_id)
    response_body = json.loads(response.get('body').read())
   
    return response_body

def invoke_llm_with_rag(messages):
    model_id = 'anthropic.claude-3-sonnet-20240229-v1:0'
    
    response = generate_message (bedrock_runtime, model_id, "", messages, 300)

    return response

### Function to generate vector embeddings and insert into vector db


In [None]:
def insert_tenant_document(file_name, tenantid):
    # Load the document
    loader = PyPDFLoader(file_name)
    doc = loader.load()

    # split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=10000,
    chunk_overlap=150
    )
    chunks = text_splitter.split_documents(doc)

    # generate vector embeddings and insert into vector db
    for chunk in chunks:
        embedding = generate_vector_embeddings(chunk.page_content)
        insert_response = insert_into_vector_db(embedding, chunk.page_content, file_name, tenantid)

    return "Embeddings inserted successfully!"

### Step 1: Generate the vector embeddings for Tenant1 and insert into Vector database. 

In [None]:
# Load the document
file_name = "../multi_tenant_survey_reports/Home_Survey_Tenant1.pdf"
loader = PyPDFLoader(file_name)
doc = loader.load()

# split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
   chunk_size=10000,
   chunk_overlap=150
)
chunks = text_splitter.split_documents(doc)

# generate vector embeddings and insert into vector db
for chunk in chunks:
   embedding = generate_vector_embeddings(chunk.page_content)
   insert_response = insert_into_vector_db(embedding, chunk.page_content, file_name, "Tenant1")

print("Embeddings inserted successfully!")

### Step 2: Review the vector embeddings stored in the Vector Database

In [None]:
response = rdsData.execute_statement( resourceArn=cluster_arn, secretArn=secret_arn, database=db_name,
        sql="SELECT id,metadata,chunks FROM self_managed.kb LIMIT 5; ",
    )

print(response)

### Step 3: Run a user query using the vector embedding. 
Review the results from the query 

In [None]:
# Define the query data and convert it to vector embeddings to query from the vector database
question = "What is the condition of the roof in my survey report?"
embedding = generate_vector_embeddings(question)
query_response = query_vector_database(embedding)
print(query_response)

### Step 4: Augment the prompt with the context data from the vector database

In [None]:
def get_contexts(retrievalResults):
    contexts = []
    for retrievedResult in retrievalResults: 
        for chunk in retrievedResult:
            contexts.append(chunk['stringValue'])
    return contexts

contexts = get_contexts(query_response['records'])

prompt = f"""
Human: Use the following pieces of context to provide a concise answer to the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
<context>
{contexts}
</context
Question: {question}
Assistant:
"""

### Step 5: Invoke the LLM with the augmented prompt

In [None]:
messages=[{ "role":'user', "content":[{'type':'text','text': prompt.format(contexts, question)}]}]
llm_response = invoke_llm_with_rag(messages)
print(llm_response['content'][0]['text'])

### Step 6: Add more tenants and their documents
For each tenant generate the vector embeddings of the document and insert the embeddings into the vector database. 

In [None]:
insert_response = insert_tenant_document("../multi_tenant_survey_reports/Home_Survey_Tenant2.pdf", "Tenant2")
print(insert_response);

insert_response = insert_tenant_document("../multi_tenant_survey_reports/Home_Survey_Tenant3.pdf", "Tenant3")
print(insert_response);

insert_response = insert_tenant_document("../multi_tenant_survey_reports/Home_Survey_Tenant4.pdf", "Tenant4")
print(insert_response);

insert_response = insert_tenant_document("../multi_tenant_survey_reports/Home_Survey_Tenant5.pdf", "Tenant5")
print(insert_response);

### Step 7: Run the same user Query against the vector database
Review the query response data and particularly the Tenant ID. You will observe that the query had fetched related data chunks from many tenants. Now the questions is how to implement tenant isolation such that a tenant specific question retrieves data only from that tenants document.

In [None]:
# Define the query data and convert it to vector embeddings to query from the vector database
question = "What is the condition of the roof in my survey report?"
embedding = generate_vector_embeddings(question)
query_response = query_vector_database(embedding)
print(query_response)

### Step 8 : Run the same user query now using RLS
Review the query_vector_database_using_rls() function that uses RDS Data API and implements row level security to restrict the query to fetch only tenant specific data from the vector db. 

In [None]:
# Define the query data and convert it to vector embeddings to query from the vector database
question = "What is the condition of the roof ?"
embedding = generate_vector_embeddings(question)
#print(embedding)
query_response = query_vector_database_using_rls(embedding, "Tenant3")
print(query_response)

### Step 9 - Augment the retrieved data into the prompt

In [None]:
contexts = get_contexts(query_response['records'])

prompt = f"""
Human: Use the following pieces of context to provide a concise answer to the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
<context>
{contexts}
</context
Question: {question}
Assistant:
"""

### Step 10: Invoke the LLM with the augmented prompt
Now the response from the LLM will be based on the context data of the specific tenant. This helps implementing tenant isolation when retrieving data from the vector database for the generative AI use cases. 

In [None]:
messages=[{ "role":'user', "content":[{'type':'text','text': prompt.format(contexts, question)}]}]
llm_response = invoke_llm_with_rag(messages)
print(llm_response['content'][0]['text'])