# RAG Based on HyDE
- Hybrid Search
- ReRanker
- HyDE (Hypothetical Document Embeddings)

## Setting
 - Auto Reload
 - path for utils

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
module_path = "../../.."
sys.path.append(os.path.abspath(module_path))

## 1. Bedrock Client 생성

In [3]:
import json
import boto3
from pprint import pprint
from termcolor import colored
from utils import bedrock, print_ww
from utils.bedrock import bedrock_info

### ---- ⚠️ Un-comment and edit the below lines as needed for your AWS setup ⚠️ ----
- os.environ["AWS_DEFAULT_REGION"] = "<REGION_NAME>"  # E.g. "us-east-1"
- os.environ["AWS_PROFILE"] = "<YOUR_PROFILE>"
- os.environ["BEDROCK_ASSUME_ROLE"] = "<YOUR_ROLE_ARN>"  # E.g. "arn:aws:..."
- os.environ["BEDROCK_ENDPOINT_URL"] = "<YOUR_ENDPOINT_URL>"  # E.g. "https://..."

In [4]:
boto3_bedrock = bedrock.get_bedrock_client(
    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
    endpoint_url=os.environ.get("BEDROCK_ENDPOINT_URL", None),
    region=os.environ.get("AWS_DEFAULT_REGION", None),
)

aws_region = os.environ.get("AWS_DEFAULT_REGION", None)
print (colored("\n== FM lists ==", "green"))
pprint (bedrock_info.get_list_fm_models())

Create new client
  Using region: None
  Using profile: None
boto3 Bedrock client successfully created!
bedrock-runtime(https://bedrock-runtime.us-west-2.amazonaws.com)
[32m
== FM lists ==[0m
{'Claude-Instant-V1': 'anthropic.claude-instant-v1',
 'Claude-V1': 'anthropic.claude-v1',
 'Claude-V2': 'anthropic.claude-v2',
 'Claude-V2-1': 'anthropic.claude-v2:1',
 'Claude-V3-5-Sonnet': 'anthropic.claude-3-5-sonnet-20240620-v1:0',
 'Claude-V3-Haiku': 'anthropic.claude-3-haiku-20240307-v1:0',
 'Claude-V3-Opus': 'anthropic.claude-3-sonnet-20240229-v1:0',
 'Claude-V3-Sonnet': 'anthropic.claude-3-sonnet-20240229-v1:0',
 'Cohere-Embeddings-En': 'cohere.embed-english-v3',
 'Cohere-Embeddings-Multilingual': 'cohere.embed-multilingual-v3',
 'Command': 'cohere.command-text-v14',
 'Command-Light': 'cohere.command-light-text-v14',
 'Jurassic-2-Mid': 'ai21.j2-mid-v1',
 'Jurassic-2-Ultra': 'ai21.j2-ultra-v1',
 'Llama2-13b-Chat': 'meta.llama2-13b-chat-v1',
 'Titan-Embeddings-G1': 'amazon.titan-embed-text

## 2. Titan Embedding 및 LLM 인 Claude-v3 모델 로딩

### LLM 로딩 (Claude-v3)

In [5]:
from langchain_aws import ChatBedrock
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [6]:
llm_text = ChatBedrock(
    beta_use_converse_api=False,
    model_id=bedrock_info.get_model_id(model_name="Claude-V3-5-Sonnet"),
    client=boto3_bedrock,
    model_kwargs={
        "max_tokens": 1024,
        "stop_sequences": ["\n\nHuman"],
    },
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()],
)
llm_text

ChatBedrock(callbacks=[<langchain_core.callbacks.streaming_stdout.StreamingStdOutCallbackHandler object at 0x7f9e962a3fa0>], client=<botocore.client.BedrockRuntime object at 0x7f9e711baf80>, model_id='anthropic.claude-3-5-sonnet-20240620-v1:0', model_kwargs={'max_tokens': 1024, 'stop_sequences': ['\n\nHuman']}, streaming=True)

### Embedding 모델 선택

In [7]:
from utils.rag import KoSimCSERobertaContentHandler, SagemakerEndpointEmbeddingsJumpStart

In [8]:
def get_embedding_model(is_bedrock_embeddings, is_KoSimCSERobert, aws_region, endpont_name=None):
    
    if is_bedrock_embeddings:
        # We will be using the Titan Embeddings Model to generate our Embeddings.
        from langchain_aws import BedrockEmbeddings
        llm_emb = BedrockEmbeddings(
            client=boto3_bedrock,
            model_id=bedrock_info.get_model_id(
                model_name="Titan-Text-Embeddings-V2"
            )
        )
        print("Bedrock Embeddings Model Loaded")

    elif is_KoSimCSERobert:
        LLMEmbHandler = KoSimCSERobertaContentHandler()
        endpoint_name_emb = endpont_name
        llm_emb = SagemakerEndpointEmbeddingsJumpStart(
            endpoint_name=endpoint_name_emb,
            region_name=aws_region,
            content_handler=LLMEmbHandler,
        )        
        print("KoSimCSERobert Embeddings Model Loaded")
    else:
        llm_emb = None
        print("No Embedding Model Selected")
    
    return llm_emb

#### [중요] is_KoSimCSERobert == True 일시에 endpoint_name 을 꼭 넣어 주세요.

In [9]:
is_bedrock_embeddings = True
is_KoSimCSERobert = False
aws_region = os.environ.get("AWS_DEFAULT_REGION", None)

##############################
# Parameters for is_KoSimCSERobert
##############################
if is_KoSimCSERobert: endpont_name = "<endpoint-name>"
else: endpont_name = None
##############################

llm_emb = get_embedding_model(is_bedrock_embeddings, is_KoSimCSERobert, aws_region, endpont_name)   

Bedrock Embeddings Model Loaded


## 3. Depoly ReRanker model (if needed)

In [10]:
import json
import sagemaker
from sagemaker.huggingface import HuggingFaceModel

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [11]:
depoly = False

In [12]:
if depoly:

    try:
        role = sagemaker.get_execution_role()
    except ValueError:
        iam = boto3.client('iam')
        role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

    # Hub Model configuration. https://huggingface.co/models
    hub = {
        'HF_MODEL_ID':'Dongjin-kr/ko-reranker',
        'HF_TASK':'text-classification'
    }

    # create Hugging Face Model Class
    huggingface_model = HuggingFaceModel(
        transformers_version='4.26.0',
        pytorch_version='1.13.1',
        py_version='py39',
        env=hub,
        role=role, 
    )

    # deploy model to SageMaker Inference
    predictor = huggingface_model.deploy(
        initial_instance_count=1, # number of instances
        instance_type='ml.g5.xlarge' # instance type
    )

    print(f'Accept: {predictor.accept}')
    print(f'ContentType: {predictor.content_type}')
    print(f'Endpoint: {predictor.endpoint}')

#### Save reranker endpoint to Parameter Store

In [13]:
import boto3
from utils.ssm import parameter_store

In [14]:
region=boto3.Session().region_name
pm = parameter_store(region)

In [15]:
pm.put_params(
    key="reranker_endpoint",
    value=f'{predictor.endpoint}',
    overwrite=True,
    enc=False
)

NameError: name 'predictor' is not defined

## 4. Invocation (prediction)

In [16]:
runtime_client = boto3.Session().client('sagemaker-runtime')
print (f'runtime_client: {runtime_client}')

runtime_client: <botocore.client.SageMakerRuntime object at 0x7f9debc29480>


In [17]:
endpoint_name = pm.get_params(
    key="reranker_endpoint",
    enc=False
)
deserializer = "application/json"

In [18]:
payload = json.dumps(
    {
        "inputs": [
            {"text": "I hate you", "text_pair": "I don't like you"},
            {"text": "He hates you", "text_pair": "He like you"}
        ]
    }
)

In [19]:
%%time
response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Accept=deserializer,
    Body=payload
)
## deserialization
out = json.loads(response['Body'].read().decode()) ## for json
print (f'Response: {out}')

Response: [{'label': 'LABEL_0', 'score': 0.9987371563911438}, {'label': 'LABEL_0', 'score': 0.0028581616934388876}]
CPU times: user 10.1 ms, sys: 4.3 ms, total: 14.4 ms
Wall time: 84.8 ms


## 5. LangChainmOpenSearch VectorStore 정의
### 선수 조건
- 01_preprocess_docs/02_load_docs_opensearch.ipynb를 통해서 OpenSearch Index 가 생성이 되어 있어야 합니다.
#### [중요] 아래에 aws parameter store 에 아래 인증정보가 먼저 입력되어 있어야 합니다.
- 01_preprocess_docs/01_parameter_store_example.ipynb 참고

In [20]:
import boto3
from utils.ssm import parameter_store

In [21]:
region=boto3.Session().region_name
pm = parameter_store(region)

In [22]:
opensearch_domain_endpoint = pm.get_params(
    key="opensearch_domain_endpoint",
    enc=False
)

opensearch_user_id = pm.get_params(
    key="opensearch_user_id",
    enc=False
)

opensearch_user_password = pm.get_params(
    key="opensearch_user_password",
    enc=True
)

In [23]:
opensearch_domain_endpoint = opensearch_domain_endpoint
rag_user_name = opensearch_user_id
rag_user_password = opensearch_user_password

http_auth = (rag_user_name, rag_user_password) # Master username, Master password

### Index 이름 셋팅
- 이전 노트북 01_preprocess_docs/02_load_docs_opensearch.ipynb를 통해서 생성된 OpenSearch Index name 입력

In [24]:
index_name = opensearch_user_password = pm.get_params(
    key="opensearch_index_name",
    enc=True
)

print (f'index_name: {index_name}')

index_name: v01-genai-poc-parent-doc-retriever


### OpenSearch Client 생성

In [25]:
from utils.opensearch import opensearch_utils

In [26]:
os_client = opensearch_utils.create_aws_opensearch_client(
    aws_region,
    opensearch_domain_endpoint,
    http_auth
)

## 6. Retriever based on Hybrid Search + HyDE + ReRanker
- LangChain에서 제공하는 **BaseRetriever** 클래스를 상속받아 **Custom Retriever**를 정의 할 수 있습니다.
- Hybrid-Search에 대한 자세한 내용는 **"01_rag_hybrid_search.ipyno"** 에서 확인 가능합니다.
- [HyDE](https://medium.com/prompt-engineering/hyde-revolutionising-search-with-hypothetical-document-embeddings-3474df795af8)
![HyDE.png](../../../imgs/HyDE.png)

In [27]:
from utils.rag import OpenSearchHybridSearchRetriever

- 필터 설정 예시
- filter=[ <BR>
    　{"term": {"metadata.[**your_metadata_attribute_name**]": "**your first keyword**"}}, <BR>
    　{"term": {"metadata.[**your_metadata_attribute_name**]": "**your second keyword**"}},<BR>
]

In [28]:
opensearch_hybrid_retriever = OpenSearchHybridSearchRetriever(
    os_client=os_client,
    index_name=index_name,
    llm_text=llm_text, # llm for query augmentation in both rag_fusion and HyDE
    llm_emb=llm_emb,

    # option for lexical
    minimum_should_match=0,
    filter=[],

    # option for search
    fusion_algorithm="RRF", # ["RRF", "simple_weighted"], rank fusion 방식 정의
    ensemble_weights=[.51, .49], # [for lexical, for semantic], Lexical, Semantic search 결과에 대한 최종 반영 비율 정의
    reranker=True, # enable reranker with reranker model
    reranker_endpoint_name=endpoint_name, # endpoint name for reranking model
    hyde=True, # enable hyde
    hyde_query=["web_search"], # query type in hyde 
    

    # option for async search
    async_mode=False,

    # option for output
    k=5, # 최종 Document 수 정의
    verbose=True,
)

### Retrieval example
- default search

In [29]:
from utils.rag import show_context_used

In [30]:
query = "중지된 경우 이체"

In [31]:
%%time
search_hybrid_result = opensearch_hybrid_retriever.invoke(query)

print("\n==========  Results  ==========\n")
print(f'1. question: {query}')
print (f'2. # documents: {len(search_hybrid_result)}')
print("3. Documents: \n")

show_context_used(search_hybrid_result)



===== HyDE Answers =====
['중지된 경우 이체', '이체가 중지된 경우에는 다음과 같은 조치를 취할 수 있습니다:\n\n1. 은행에 문의: 이체가 중지된 정확한 이유를 확인합니다.\n\n2. 계좌 확인: 잔액 부족이나 한도 초과 등의 문제가 없는지 점검합니다.\n\n3. 수취인 정보 재확인: 계좌번호나 은행 정보가 정확한지 확인합니다.\n\n4. 보안 문제 해결: 보안상의 이유로 중지된 경우 필요한 인증 절차를 밟습니다.\n\n5. 재시도: 문제가 해결되면 이체를 다시 시도합니다.\n\n6. 대체 방법 사용: 필요한 경우 다른 이체 방법을 고려합니다.\n\n문제 해결이 어려울 경우 은행 고객센터에 직접 연락하여 상세한 안내를 받는 것이 좋습니다.']

[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 2

##############################
async_mode
##############################
False
##############################
reranker
##############################
True
##############################
rag_fusion
##############################
False
##############################
HyDE
##############################
True
##############################
parent_document
##############################
False
##############################
complex_document
##############################
False
##############################
similar_docs_semantic
##########

- update parameters

In [32]:
opensearch_hybrid_retriever.update_search_params(
    k=5,
    minimum_should_match=30,
    filter=[
        #{"term": {"metadata.project": "KS"}},
    ],
    reranker=True,
    reranker_endpoint_name=endpoint_name,
    hyde=True, # enable hyde
    hyde_query=["web_search"], # query type in hyde 
    llm_text=llm_text, # llm for query augmentation in rag_fusion
    verbose=True
)

In [33]:
query = "중지된 경우 이체"
search_hybrid_result = opensearch_hybrid_retriever.invoke(query)

print("\n==========  Results  ==========\n")
print(f'1. question: {query}')
print(f'2. # documents: {len(search_hybrid_result)}')
print("3. Documents: \n")

show_context_used(search_hybrid_result)



===== HyDE Answers =====
['중지된 경우 이체', '중지된 이체의 경우, 다음과 같은 절차를 따르는 것이 일반적입니다:\n\n1. 은행에 연락하여 중지 사유를 확인합니다.\n2. 필요한 서류나 정보를 제출하여 중지 상태를 해제합니다.\n3. 이체 재개를 요청합니다.\n4. 은행의 승인을 받은 후 이체가 재개됩니다.\n\n중지 사유에 따라 추가적인 절차가 필요할 수 있으며, 일부 경우 새로운 이체 신청이 필요할 수도 있습니다. 정확한 절차는 해당 은행의 정책에 따라 다를 수 있으므로, 직접 은행에 문의하는 것이 가장 확실한 방법입니다.']

[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 2

##############################
async_mode
##############################
False
##############################
reranker
##############################
True
##############################
rag_fusion
##############################
False
##############################
HyDE
##############################
True
##############################
parent_document
##############################
False
##############################
complex_document
##############################
False
##############################
similar_docs_semantic
##############################

Score: 0.00819672131147541
['. Go to the S

## 5. RAG using RetrievalQA powered by LangChain

In [34]:
from textwrap import dedent

### Prompting
- [TIP] Prompt의 instruction의 경우 한글보다 영어로 했을 때 더 좋은 결과를 얻을 수 있습니다.

In [60]:
system_prompt = dedent(
    """
    You are a master answer bot designed to answer user's questions.
    I'm going to give you contexts which consist of texts, tables and images.
    Read the contexts carefully, because I'm going to ask you a question about it.
    """
)

human_prompt = dedent(
    """
    Here is the contexts as texts: <contexts>{contexts}</contexts>

    First, find a few paragraphs or sentences from the contexts that are most relevant to answering the question.
    Then, answer the question as much as you can.

    Skip the preamble and go straight into the answer.
    Don't insert any XML tag such as <contexts> and </contexts> when answering.
    Answer in Korean.

    Here is the question: <question>{question}</question>

    If the question cannot be answered by the contexts, say "No relevant contexts".
    """
)



===== HyDE Answers =====
['vidio max size?', '비디오의 최대 크기는 플랫폼이나 서비스에 따라 다릅니다:\n\n• YouTube: 128GB 또는 12시간 (더 짧은 쪽 적용)\n• Instagram: 4GB 또는 60분\n• TikTok: 287.6MB 또는 10분\n• Twitter: 512MB 또는 2분 20초\n• Facebook: 4GB 또는 240분\n\n대부분의 경우 파일 크기 제한이 먼저 적용됩니다. 고화질 영상일수록 파일 크기가 커집니다. 업로드 시 각 플랫폼의 최신 제한사항을 확인하는 것이 좋습니다.']


### Update Search Params (Optional)

In [61]:
from utils.rag import rag_chain
from langchain.schema.output_parser import StrOutputParser

In [69]:
opensearch_hybrid_retriever.update_search_params(
    k=10,
    minimum_should_match=0,
    filter=[],
    reranker=True,
    reranker_endpoint_name=endpoint_name,
    hyde=True, # enable hyde
    hyde_query=["web_search"], # query type in hyde 
    llm_text=llm_text, # llm for query augmentation in rag_fusion
    async_mode=False,
    verbose=True,
    
    # option for complex documents consisting of text, table and image
    complex_doc=False, # False 이면, invocation시 text만 넣는다.
)

### Request

In [70]:
qa = rag_chain(
    llm_text=llm_text,
    retriever=opensearch_hybrid_retriever,
    system_prompt=system_prompt,
    human_prompt=human_prompt,
    return_context=True,
    verbose=False,
    #multi_turn=True
)

In [71]:
#query = "중지된 경우 이체"
#query = "초등학교 교육 과정"
query = "vidio max size?"

response, contexts = qa.invoke(
    query=query
)

show_context_used(contexts)



===== HyDE Answers =====
['vidio max size?', '비디오 최대 크기는 플랫폼과 사용 목적에 따라 다릅니다:\n\n- YouTube: 일반적으로 128GB 또는 12시간 제한\n- Instagram: 60초(피드), 15분(IGTV)\n- Facebook: 4GB 또는 240분\n- Twitter: 512MB 또는 2분 20초\n- WhatsApp: 16MB 또는 3분\n- Email 첨부: 일반적으로 25MB 미만\n\n고화질 비디오의 경우 파일 크기가 더 클 수 있습니다. 압축이나 분할 업로드로 크기 제한을 해결할 수 있습니다. 정확한 제한은 각 플랫폼의 최신 가이드라인을 확인하세요.']
##############################
async_mode
##############################
False
##############################
reranker
##############################
True
##############################
rag_fusion
##############################
False
##############################
HyDE
##############################
True
##############################
parent_document
##############################
False
##############################
complex_document
##############################
False
##############################
similar_docs_semantic
##############################

Score: 0.015749007936507936
['.The input value ranges from 1 to 52428800 (50MB). Maxim

In [68]:
print("##################################")
print("query: ", query)
print("##################################")

print (colored("\n\n### Answer ###", "blue"))
print_ww(response)

print (colored("\n\n### Contexts ###", "green"))
show_context_used(contexts)

##################################
query:  vidio max size?
##################################
[34m

### Answer ###[0m
관련 문맥을 찾았습니다:

"Video Select a video file for the screen saver. You can add only one video file in the MP4 or MKV
format. The video file must be less than 50 MB. To upload a video file, click Add and select a file.
To delete a video file, click next to the name of the uploaded video file."

답변:

Knox Configure에서 비디오 파일의 최대 크기는 50MB입니다. 스크린 세이버용 비디오 파일을 추가할 때 MP4 또는 MKV 형식의 파일만 허용되며, 파일 크기는 50MB
미만이어야 합니다.
[32m

### Contexts ###[0m

-----------------------------------------------
1. Chunk: 839 Characters
-----------------------------------------------
. You can add up to 10 image files in the PNG, JPG, JPEG, or GIF format (animated files are not
supported). Each image file must be less than 5 MB. To upload an image file, click Add and select a
file. To delete an image file, click next to the name of the uploaded image file. Note The device
control command must be tra