## RAG Demo:
 A demo that load the S3 bucket PDF into Vector Store (Opensearch) with Bedrock Embdedding. Then QnA using LLM with RetrievalQA chain provided by LangChain

### 1. RAG
- Load Document (PDF, docx, text) from S3 
- Store into Vector store 
- QnA using LLM with RetrievalQA chain provided by LangChain

Ref: https://python.langchain.com/docs/use_cases/question_answering/

In [None]:
%pip -qqq install --no-build-isolation --force-reinstall \
    "boto3>=1.28.57" \
    "awscli>=1.29.57" \
    "botocore>=1.31.57" \
    "sagemaker"

In [None]:
%pip install --quiet langchain==0.0.309

In [None]:
#set True to enable debug mode
import langchain
langchain.debug=False

In [None]:
import os
AWS_REGION = 'us-east-1'

os.environ['AWS_DEFAULT_REGION']=AWS_REGION
os.environ['AWS_ACCESS_KEY_ID']=''
os.environ['AWS_SECRET_ACCESS_KEY']=''
os.environ['AWS_SESSION_TOKEN']=''

## 1. Init LLM model (Llama-2 70b chat on Amazon Sagemaker JumpStart )
In this demo, we use Llama-2 70b chat model as LLM Foundation model, hosted by Amazon Sagemaker JumpStart Endpoint.

In [None]:
# define Sagemaker endpoint model name here
SAGEMAKER_LLM_MODEL_NAME = 'jumpstart-dft-meta-textgeneration-llama-2-70b-f'
SAGEMAKER_IAM_ROLE_NAME = 'AmazonSageMaker-ExecutionRole' # please set IAM role


In [None]:
# deploy Sagemaker JumpStart (Please make sure the EC2 service quota is set)
from sagemaker.jumpstart.model import JumpStartModel
import boto3
iam = boto3.client('iam')
role = iam.get_role(RoleName=SAGEMAKER_IAM_ROLE_NAME)['Role']['Arn']

model = JumpStartModel(model_id='meta-textgeneration-llama-2-70b-f',role=role)
predictor = model.deploy(
    initial_instance_count=1, # number of instances
    endpoint_name=SAGEMAKER_LLM_MODEL_NAME
)

In [None]:
import json
from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint

def init_llm_sm_endpoint():

    endpoint_name = SAGEMAKER_LLM_MODEL_NAME
    aws_region=AWS_REGION
    parameters = {"max_new_tokens": 1000, "temperature": 0.1}

    class ContentHandler(LLMContentHandler):
        content_type = "application/json"
        accepts = "application/json"

        # LLAMA-2 chat
        def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
            input_str = json.dumps({"inputs" : [[{"role" : "system",
            "content" : "You are QnA bot to answer the questions based on the context. If it is not in the context, just reply you don't know."},
            {"role" : "user", "content" : prompt}]],
            "parameters" : {**model_kwargs}})
            return input_str.encode('utf-8')

        def transform_output(self, output: bytes) -> str:
            response_json = json.loads(output.read().decode("utf-8"))
            return response_json[0]["generation"]["content"]
        
        # Flan-T5
        # def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        #     input_str = json.dumps({"text_inputs" : prompt, **model_kwargs})
        #     return input_str.encode('utf-8')

        # def transform_output(self, output: bytes) -> str:
        #     response_json = json.loads(output.read().decode("utf-8"))
        #     return response_json['generated_texts'][0]

    content_handler = ContentHandler()

    sm_llm = SagemakerEndpoint(
        endpoint_name=endpoint_name,
        region_name=aws_region,
        model_kwargs=parameters,
        content_handler=content_handler,
        endpoint_kwargs={"CustomAttributes": "accept_eula=true"},
    )
    return sm_llm



In [None]:
# unit test for LLM
llm = init_llm_sm_endpoint()
text = "What would be a good AWS new service name that allow customers to chat with their own data? Keep answer short. Just give me one answer in your reponse"
llm(text)

## 2. Embedding (Amazon Bedrock Embeddings)
We use Amazon Bedrock Titan Embedding as Embedding Foundation model


In [None]:
from langchain.embeddings import BedrockEmbeddings
import boto3

def init_eb_bedrock():
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='Bedrock-role')['Role']['Arn'] # please set IAM role

    sts_client = boto3.client('sts')

    assumed_role_object=sts_client.assume_role(
        RoleArn=role,
        RoleSessionName="AssumeRoleSession1"
    )
    credentials=assumed_role_object['Credentials']

    bedrock_client = boto3.client('bedrock-runtime',
                                region_name=AWS_REGION,
                                aws_access_key_id=credentials["AccessKeyId"],
                                aws_secret_access_key=credentials["SecretAccessKey"],
                                aws_session_token=credentials["SessionToken"])


    # modelId = "amazon.titan-embed-g1-text-02"
    modelId = "amazon.titan-embed-text-v1"
    bedrock_embeddings = BedrockEmbeddings(
        client=bedrock_client,
        region_name=AWS_REGION,
        model_id=modelId, 
    )

    return bedrock_embeddings



In [None]:
# unit test for embedding
bedrock_embeddings = init_eb_bedrock()
print(bedrock_embeddings.model_id)
embedding_vectors = bedrock_embeddings.embed_documents(['hello', 'world'])
print("len(embedding_vectors): ", len(embedding_vectors))
print("sample vector:\n",embedding_vectors[0][0:10])


## 3. Vector Store (Amazon Opensearch service)
We used Amazon Opensearch service as the vector store. Tried to load S3 bucket documents (AWS Well-Arhitected Framework whitepapers) into the vector store.

In [None]:
S3_BUCKET_NAME = '<your_name>-genai-data'
S3_BUCKET_KEY = 'wa_whitepapers'

OPENSEARCH_URL = "https://search-vectorstore-<your_openserch>.us-east-1.es.amazonaws.com"
OPENSEARCH_VECTOR_INDEX = "rag-demo-aws-wa-whitepapers"


# get secret for OPENSEARCH_http_auth {"username": "xxx", "password": "xxx"}
secret_name = "vectorstore/opensearch/secret" 

session = boto3.session.Session()
client = session.client(
    service_name='secretsmanager',
    region_name=AWS_REGION
)
get_secret_value_response = client.get_secret_value(
    SecretId=secret_name
)
secret = json.loads(get_secret_value_response['SecretString'])
OPENSEARCH_http_auth=(secret['username'], secret['password']) 

### 3.1 prepare sample pdf (AWS Well architected whitepapers) and upload to s3 

In [None]:
! mkdir -p ./wa_whitepapers
! curl -O --output-dir ./wa_whitepapers https://docs.aws.amazon.com/pdfs/wellarchitected/latest/framework/wellarchitected-framework.pdf
! curl -O --output-dir ./wa_whitepapers https://docs.aws.amazon.com/pdfs/wellarchitected/latest/reliability-pillar/wellarchitected-reliability-pillar.pdf
! curl -O --output-dir ./wa_whitepapers https://docs.aws.amazon.com/pdfs/wellarchitected/latest/operational-excellence-pillar/wellarchitected-operational-excellence-pillar.pdf
! curl -O --output-dir ./wa_whitepapers https://docs.aws.amazon.com/pdfs/wellarchitected/latest/performance-efficiency-pillar/wellarchitected-performance-efficiency-pillar.pdf
! curl -O --output-dir ./wa_whitepapers https://docs.aws.amazon.com/pdfs/wellarchitected/latest/cost-optimization-pillar/wellarchitected-cost-optimization-pillar.pdf
! curl -O --output-dir ./wa_whitepapers https://docs.aws.amazon.com/pdfs/wellarchitected/latest/security-pillar/wellarchitected-security-pillar.pdf
! curl -O --output-dir ./wa_whitepapers https://docs.aws.amazon.com/pdfs/wellarchitected/latest/sustainability-pillar/wellarchitected-sustainability-pillar.pdf

! aws s3 cp  ./wa_whitepapers s3://<'<your_name>-genai-data'>/wa_whitepapers/ --recursive


### 3.2 Load content from S3 Directory. Split into churns

In [None]:
from langchain.document_loaders import S3DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
loader = S3DirectoryLoader(S3_BUCKET_NAME, prefix=S3_BUCKET_KEY)

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
    is_separator_regex = False,
)

all_splits = loader.load_and_split(text_splitter)
print(f"Original: Number of document splits = {len(all_splits)}")

In [None]:
print("sample split:\n", all_splits[0:2])

### 3.3 Embedding and Store into Vector Store

In [None]:
## use AOS as vector store

from langchain.vectorstores import OpenSearchVectorSearch
from opensearchpy import RequestsHttpConnection


service = 'es' # must set the service as 'aoss' for Amazon OpenSearch Serverless
region = AWS_REGION


## for AWSAuth login ##
# import boto3
# credentials = boto3.Session(aws_access_key_id='xxxxxx',aws_secret_access_key='xxxxx').get_credentials()
# awsauth = AWS4Auth('xxxxx', 'xxxxxx', region,service, session_token=credentials.token)

vectorstore = OpenSearchVectorSearch.from_documents(
    all_splits,
    init_eb_bedrock(), # embedding model
    opensearch_url=OPENSEARCH_URL,
    http_auth=OPENSEARCH_http_auth,
    timeout = 600,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection,
    index_name=OPENSEARCH_VECTOR_INDEX,
    engine="faiss",
    bulk_size=10000
)




In [None]:
# unit test

from langchain.vectorstores import OpenSearchVectorSearch
from opensearchpy import RequestsHttpConnection
vectorstore = OpenSearchVectorSearch(
    opensearch_url=OPENSEARCH_URL,
    index_name=OPENSEARCH_VECTOR_INDEX,
    embedding_function=init_eb_bedrock(),
    http_auth=OPENSEARCH_http_auth,
    timeout = 600,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection,
    engine="faiss"
)


question = "What is AWS Well Architected Framework?"
# question='Stop guessing your capacity needs in General design principle'
docs = vectorstore.similarity_search(question, k=20)
print(f"Vector search: Number of document related to the question = {len(docs)}")
print('sample result:\n', docs[1])
print('sample result:\n', docs[3])


## 4. QnA the content using RetrievalQA chain
QA using a Retriever

In [None]:
from langchain.chains import RetrievalQA

def find_metadata_sources_from_documents(documents):
    source_list = []
    for document in documents:
        if 'source' in document.metadata:
            source = document.metadata['source']
            source_list.append(source)
    #dedup
    source_list = list(dict.fromkeys(source_list))
    return source_list

llm = init_llm_sm_endpoint()
# retreiver = vectorstore.as_retriever(search_kwargs={'k': 20})
retreiver = vectorstore.as_retriever(search_type="similarity", search_kwargs={'k': 6, 'score_threshold': 0.8})
# retreiver = vectorstore.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25})
qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                    #    chain_type="stuff", #refine, map_reduce
                                       retriever=retreiver,
                                       return_source_documents=True)

#unit test
question = "What is AWS Well Architected Framework? summarize in 100 words"
result = qa_chain({"query": question})
print("answer:\n", result['result'])
print("\nsource:\n", find_metadata_sources_from_documents(result['source_documents']))



In [None]:
langchain.debug=False

In [None]:
question = "Stop guessing your capacity needs in General design principle? explain to a 5-year-old kid"

result = qa_chain({"query": question})
print("answer:\n", result['result'])
print("\nsource:\n", find_metadata_sources_from_documents(result['source_documents']))

In [None]:
# Question that are not related to the documents
question = "Why Siu mei is the best Dim sum in Hong Kong?"

result = qa_chain({"query": question})
print("answer:\n", result['result'])
print("\nsource:\n", find_metadata_sources_from_documents(result['source_documents']))




In [None]:
langchain.debug=True

In [None]:
question = "Think step by step. What is the multi-AZ?"

result = qa_chain({"query": question})
print("answer:\n", result['result'])
print("\nsource:\n", find_metadata_sources_from_documents(result['source_documents']))