# RAG Based on parent document
- Hybrid Search
- ReRanker
- [Parent Document](https://medium.aiplanet.com/advanced-rag-providing-broader-context-to-llms-using-parentdocumentretriever-cc627762305a)
    

## Setting
 - Auto Reload
 - path for utils

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
module_path = "../../.."
sys.path.append(os.path.abspath(module_path))

## 1. Bedrock Client 생성

In [3]:
import json
import boto3
from pprint import pprint
from termcolor import colored
from utils import bedrock, print_ww
from utils.bedrock import bedrock_info

### ---- ⚠️ Un-comment and edit the below lines as needed for your AWS setup ⚠️ ----
- os.environ["AWS_DEFAULT_REGION"] = "<REGION_NAME>"  # E.g. "us-east-1"
- os.environ["AWS_PROFILE"] = "<YOUR_PROFILE>"
- os.environ["BEDROCK_ASSUME_ROLE"] = "<YOUR_ROLE_ARN>"  # E.g. "arn:aws:..."
- os.environ["BEDROCK_ENDPOINT_URL"] = "<YOUR_ENDPOINT_URL>"  # E.g. "https://..."

In [4]:
boto3_bedrock = bedrock.get_bedrock_client(
    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
    endpoint_url=os.environ.get("BEDROCK_ENDPOINT_URL", None),
    region=os.environ.get("AWS_DEFAULT_REGION", None),
)

aws_region = os.environ.get("AWS_DEFAULT_REGION", None)
print (colored("\n== FM lists ==", "green"))
pprint (bedrock_info.get_list_fm_models(verbose=False))

Create new client
  Using region: None
  Using profile: None
boto3 Bedrock client successfully created!
bedrock-runtime(https://bedrock-runtime.us-east-1.amazonaws.com)
[32m
== FM lists ==[0m
{'Claude-Instant-V1': 'anthropic.claude-instant-v1',
 'Claude-V1': 'anthropic.claude-v1',
 'Claude-V2': 'anthropic.claude-v2',
 'Claude-V2-1': 'anthropic.claude-v2:1',
 'Cohere-Embeddings-En': 'cohere.embed-english-v3',
 'Cohere-Embeddings-Multilingual': 'cohere.embed-multilingual-v3',
 'Command': 'cohere.command-text-v14',
 'Command-Light': 'cohere.command-light-text-v14',
 'Jurassic-2-Mid': 'ai21.j2-mid-v1',
 'Jurassic-2-Ultra': 'ai21.j2-ultra-v1',
 'Llama2-13b-Chat': 'meta.llama2-13b-chat-v1',
 'Titan-Embeddings-G1': 'amazon.titan-embed-text-v1',
 'Titan-Text-G1': 'amazon.titan-text-express-v1',
 'Titan-Text-G1-Light': 'amazon.titan-text-lite-v1'}


## 2. Titan Embedding 및 LLM 인 Claude-v2 모델 로딩

### LLM 로딩 (Claude-v2)

In [5]:
from langchain.llms.bedrock import Bedrock
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [6]:
llm_text = Bedrock(
    model_id=bedrock_info.get_model_id(model_name="Claude-V2-1"),
    client=boto3_bedrock,
    model_kwargs={
        "max_tokens_to_sample": 4000,
        "temperature": 0.1,
        "top_k": 3,
        "top_p": 0.1,
        "stop_sequences": ["\n\nHuman:"]
    },
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()]
)
llm_text

Bedrock(client=<botocore.client.BedrockRuntime object at 0x7f4812f7bb50>, model_id='anthropic.claude-v2:1', model_kwargs={'max_tokens_to_sample': 4000, 'temperature': 0.1, 'top_k': 3, 'top_p': 0.1, 'stop_sequences': ['\n\nHuman:']}, streaming=True, callbacks=[<langchain_core.callbacks.streaming_stdout.StreamingStdOutCallbackHandler object at 0x7f48364e67a0>])

### Embedding 모델 선택

In [7]:
from utils.rag import KoSimCSERobertaContentHandler, SagemakerEndpointEmbeddingsJumpStart

In [8]:
def get_embedding_model(is_bedrock_embeddings, is_KoSimCSERobert, aws_region, endpont_name=None):
    
    if is_bedrock_embeddings:
        # We will be using the Titan Embeddings Model to generate our Embeddings.
        from langchain.embeddings import BedrockEmbeddings
        llm_emb = BedrockEmbeddings(
            client=boto3_bedrock,
            model_id=bedrock_info.get_model_id(
                model_name="Titan-Embeddings-G1"
            )
        )
        print("Bedrock Embeddings Model Loaded")

    elif is_KoSimCSERobert:
        LLMEmbHandler = KoSimCSERobertaContentHandler()
        endpoint_name_emb = endpont_name
        llm_emb = SagemakerEndpointEmbeddingsJumpStart(
            endpoint_name=endpoint_name_emb,
            region_name=aws_region,
            content_handler=LLMEmbHandler,
        )
        print("KoSimCSERobert Embeddings Model Loaded")
    else:
        llm_emb = None
        print("No Embedding Model Selected")

    return llm_emb

#### [중요] is_KoSimCSERobert == True 일시에 endpoint_name 을 꼭 넣어 주세요.

In [9]:
is_bedrock_embeddings = True
is_KoSimCSERobert = False
aws_region = os.environ.get("AWS_DEFAULT_REGION", None)

##############################
# Parameters for is_KoSimCSERobert
##############################
if is_KoSimCSERobert: endpont_name = "<endpoint-name>"
else: endpont_name = None
##############################

llm_emb = get_embedding_model(is_bedrock_embeddings, is_KoSimCSERobert, aws_region, endpont_name)   

Bedrock Embeddings Model Loaded


## 3. Depoly ReRanker model (if needed)

In [10]:
import json
import sagemaker
from sagemaker.huggingface import HuggingFaceModel

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/SageMaker/.xdg/config/sagemaker/config.yaml


In [11]:
depoly = False

In [12]:
if depoly:

    try:
        role = sagemaker.get_execution_role()
    except ValueError:
        iam = boto3.client('iam')
        role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

    # Hub Model configuration. https://huggingface.co/models
    hub = {
        'HF_MODEL_ID':'BAAI/bge-reranker-large',
        'HF_TASK':'text-classification'
    }

    # create Hugging Face Model Class
    huggingface_model = HuggingFaceModel(
        transformers_version='4.26.0',
        pytorch_version='1.13.1',
        py_version='py39',
        env=hub,
        role=role, 
    )

    # deploy model to SageMaker Inference
    predictor = huggingface_model.deploy(
        initial_instance_count=1, # number of instances
        instance_type='ml.g5.xlarge' # instance type
    )

    print(f'Accept: {predictor.accept}')
    print(f'ContentType: {predictor.content_type}')
    print(f'Endpoint: {predictor.endpoint}')

## 4. Invocation (prediction)

In [13]:
runtime_client = boto3.Session().client('sagemaker-runtime')
print (f'runtime_client: {runtime_client}')

runtime_client: <botocore.client.SageMakerRuntime object at 0x7f4706ccf040>


In [22]:
#endpoint_name = "huggingface-pytorch-inference-2023-11-15-04-37-45-120" # ml.m5.2xlarge
endpoint_name = "huggingface-pytorch-inference-2024-01-02-06-37-47-976" # ml.g5.xlarge
deserializer = "application/json"

In [23]:
payload = json.dumps(
    {
        "inputs": [
            {"text": "I hate you", "text_pair": "I don't like you"},
            {"text": "He hates you", "text_pair": "He like you"}
        ]
    }
)

In [24]:
payload = json.dumps(
    {
        "inputs": [
            {"text": "나는 너를 사랑하지 않아", "text_pair": "나는 너를 좋아하지 않아"},
            {"text": "그는 너를 싫어해", "text_pair": "그는 너를 좋아해"}
        ]
    }
)

In [25]:
%%time
response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Accept=deserializer,
    Body=payload
)
## deserialization
out = json.loads(response['Body'].read().decode()) ## for json
print (f'Response: {out}')

Response: [{'label': 'LABEL_0', 'score': 0.9993010759353638}, {'label': 'LABEL_0', 'score': 0.021486487239599228}]
CPU times: user 2.35 ms, sys: 0 ns, total: 2.35 ms
Wall time: 37.1 ms


## 5. LangChain OpenSearch VectorStore 정의
### 선수 조건
- 01_preprocess_docs/02_load_docs_opensearch.ipynb를 통해서 OpenSearch Index 가 생성이 되어 있어야 합니다.
#### [중요] 아래에 aws parameter store 에 아래 인증정보가 먼저 입력되어 있어야 합니다.
- 01_preprocess_docs/01_parameter_store_example.ipynb 참고

In [26]:
from utils.proc_docs import get_parameter

In [27]:
ssm = boto3.client("ssm", "us-east-1")

opensearch_domain_endpoint = get_parameter(
    boto3_client = ssm,
    parameter_name = 'knox_opensearch_domain_endpoint',
)

opensearch_user_id = get_parameter(
    boto3_client = ssm,
    parameter_name = 'knox_opensearch_userid',
)

opensearch_user_password = get_parameter(
    boto3_client = ssm,
    parameter_name = 'knox_opensearch_password',
)

In [28]:
opensearch_domain_endpoint = opensearch_domain_endpoint
rag_user_name = opensearch_user_id
rag_user_password = opensearch_user_password

http_auth = (rag_user_name, rag_user_password) # Master username, Master password

### Index 이름 셋팅
- 이전 노트북 01_preprocess_docs/02_load_docs_opensearch.ipynb를 통해서 생성된 OpenSearch Index name 입력
- parent document 용으로 생성된 index 사용할 것 

In [29]:
#index_name = "v16-genai-poc-knox-eval-parent-doc-retriever"
#index_name = "v15-genai-poc-knox-parent-doc-retriever"
index_name = "v18-genai-poc-knox-kor-parent-doc-retriever"

### OpenSearch Client 생성

In [30]:
from utils.opensearch import opensearch_utils

In [31]:
os_client = opensearch_utils.create_aws_opensearch_client(
    aws_region,
    opensearch_domain_endpoint,
    http_auth
)

# 아래수정

## 6. Retriever based on Hybrid Search + ParentDocument + ReRanker
- LangChain에서 제공하는 **BaseRetriever** 클래스를 상속받아 **Custom Retriever**를 정의 할 수 있습니다.
- Hybrid-Search에 대한 자세한 내용는 **"01_rag_hybrid_search.ipyno"** 에서 확인 가능합니다.
- [Parent Document](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1)
![parent-document.png](../../../imgs/parent-document.png)

## Parent documents in Hybrid search
- Lexical search: parent documents only
- Semantic search: child documents first, getting parent documents corresponding to that child document

In [32]:
from utils.rag import OpenSearchHybridSearchRetriever

- 필터 설정 예시
- filter=[ <BR>
    　{"term": {"metadata.[**your_metadata_attribute_name**]": "**your first keyword**"}}, <BR>
    　{"term": {"metadata.[**your_metadata_attribute_name**]": "**your second keyword**"}},<BR>
]

In [33]:
opensearch_hybrid_retriever = OpenSearchHybridSearchRetriever(
    os_client=os_client,
    index_name=index_name,
    llm_text=llm_text, # llm for query augmentation in both rag_fusion and HyDE
    llm_emb=llm_emb, # Used in semantic search based on opensearch 

    # option for lexical
    minimum_should_match=0,
    filter=[],

    # option for search
    fusion_algorithm="RRF", # ["RRF", "simple_weighted"], rank fusion 방식 정의
    ensemble_weights=[.51, .49], # [for semantic, for lexical], Semantic, Lexical search 결과에 대한 최종 반영 비율 정의
    reranker=True, # enable reranker with reranker model
    reranker_endpoint_name=endpoint_name, # endpoint name for reranking model
    parent_document = True, # enable parent document

    # option for async search
    async_mode=True,

    # option for output
    k=5, # 최종 Document 수 정의
    verbose=True,
)

### Retrieval example
- default search

In [34]:
from utils.rag import show_context_used

In [35]:
query = "what is verify DM"

In [36]:
%%time
search_hybrid_result = opensearch_hybrid_retriever.get_relevant_documents(query)

print("\n==========  Results  ==========\n")
print(f'1. question: {query}')
print (f'2. # documents: {len(search_hybrid_result)}')
print("3. Documents: \n")

show_context_used(search_hybrid_result)


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 2


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 1


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 2


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 1


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 3


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 2

===== ParentDocument =====
filter: [{'term': {'metadata.family_tree': 'child'}}]
# child_docs: 5
# parent docs: 5
# duplicates: 0
##############################
async_mode
##############################
True
##############################
reranker
##############################
True
##############################
rag_fusion
##############################
False
##############################
HyDE
##############################
False
##############################
parent_document
##############################


- update parameters

In [37]:
opensearch_hybrid_retriever.update_search_params(
    k=5,
    minimum_should_match=0,
    #filter=[],
    filter=[
        #{'term': {'metadata.project': 'KS'}},
        #{'term': {'metadata.family_tree': 'child'}},
    ],
    reranker=True,
    reranker_endpoint_name=endpoint_name,
    parent_document=False, # enable parent document
    verbose=True,
)

In [38]:
#query = "how to vefify DM"
query = "녹스가 무엇인가요?"
search_hybrid_result = opensearch_hybrid_retriever.get_relevant_documents(query)

print("\n==========  Results  ==========\n")
print(f'1. question: {query}')
print(f'2. # documents: {len(search_hybrid_result)}')
print("3. Documents: \n")

show_context_used(search_hybrid_result)


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 1


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 2


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 1


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 3


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 4


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 2

##############################
async_mode
##############################
True
##############################
reranker
##############################
True
##############################
rag_fusion
##############################
False
##############################
HyDE
##############################
False
##############################
parent_document
##############################
False
##############################
similar_docs_semantic
##############################
[(Document(page_content='녹스 가드를 사용하려면 어떤

## 5. RAG using RetrievalQA powered by LangChain

In [39]:
from utils.rag import prompt_repo
from utils.rag import run_RetrievalQA
from langchain.prompts import PromptTemplate

### Prompting
- [TIP] Prompt의 instruction의 경우 한글보다 영어로 했을 때 더 좋은 결과를 얻을 수 있습니다.

In [40]:
PROMPT = prompt_repo.get_qa(prompt_type="answer_only") # ["answer_only", "answer_with_ref", "original"]

In [41]:
pprint (PROMPT.template)

('\n'
 '            \n'
 '\n'
 'Human:\n'
 '            You are a master answer bot designed to answer software '
 "developer's questions.\n"
 "            I'm going to give you a context. Read the context carefully, "
 "because I'm going to ask you a question about it.\n"
 '\n'
 '            Here is the context: <context>{context}</context>\n'
 '            \n'
 '            First, find a few paragraphs or sentences from the context that '
 'are most relevant to answering the question.\n'
 '            Then, answer the question as much as you can.\n'
 '\n'
 '            Skip the preamble and go straight into the answer.\n'
 "            Don't insert any XML tag such as <context> and </context> when "
 'answering.\n'
 '            \n'
 '            Here is the question: <question>{question}</question>\n'
 '\n'
 '            If the question cannot be answered by the context, say "No '
 'relevant context".\n'
 '            \n'
 '\n'
 'Assistant: Here is the answer. ')


### Update Search Params (Optional)

In [69]:
from langchain.chains import RetrievalQA

In [80]:
opensearch_hybrid_retriever.update_search_params(
    k=6,
    minimum_should_match=0,
    filter=[
        {'term': {'metadata.family_tree': 'child'}},
    ],
    ensemble_weights=[0.51, 0.49], #semantic, lexical

    reranker=True,
    reranker_endpoint_name=endpoint_name,

    rag_fusion=False,
    query_augmentation_size=3, # query_augmentation_size in rag_fusion

    hyde=True, # enable hyde
    hyde_query=["web_search"], # query type in hyde 

    parent_document=True, # enable parent document
    verbose=True
)

### Request

In [81]:
qa = RetrievalQA.from_chain_type(
    llm=llm_text,
    chain_type="stuff",
    retriever=opensearch_hybrid_retriever,
    return_source_documents=True,
    chain_type_kwargs={
        "prompt": PROMPT,
        "verbose": False,
    },
    verbose=False
)

In [82]:
#query = "What’s knox?"
query = "knox의 기능에는 어떤 것들이 있나요?"
response = qa(query)

 Knox는 Samsung의 모바일 보안 플랫폼으로, 사용자 데이터와 애플리케이션을 보호하는 다양한 기능을 제공합니다. 

주요 기능으로는 다음이 있습니다:

- 보안 폴더: 개인 데이터와 앱을 안전하게 보관할 수 있는 기능
- 안전한 Wi-Fi: 공공 Wi-Fi 네트워크 사용 시 해킹 방지 기능 
- 실행 중 앱 보호: 앱 실행 중 메모리 해킹 등을 방지
- FRP 잠금 해제 보호: 부팅 시 보안을 강화하는 기능
- 보안 업데이트: 최신 보안 패치 자동 업데이트

이 외에도 Knox는 모바일 기기와 데이터를 전체적으로 보호하는 다양한 기능을 제공합니다.

===== HyDE Answers =====
['knox의 기능에는 어떤 것들이 있나요?', ' Knox는 Samsung의 모바일 보안 플랫폼으로, 사용자 데이터와 애플리케이션을 보호하는 다양한 기능을 제공합니다. \n\n주요 기능으로는 다음이 있습니다:\n\n- 보안 폴더: 개인 데이터와 앱을 안전하게 보관할 수 있는 기능\n- 안전한 Wi-Fi: 공공 Wi-Fi 네트워크 사용 시 해킹 방지 기능 \n- 실행 중 앱 보호: 앱 실행 중 메모리 해킹 등을 방지\n- FRP 잠금 해제 보호: 부팅 시 보안을 강화하는 기능\n- 보안 업데이트: 최신 보안 패치 자동 업데이트\n\n이 외에도 Knox는 모바일 기기와 데이터를 전체적으로 보호하는 다양한 기능을 제공합니다.']

[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 3


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 2


[Exeeds ReRanker token limit] Number of chunk_docs after split and chunking= 3


[Exeeds ReRanker token limit] Number of chunk_docs after split

In [68]:
print("##################################")
print("query: ", query)
print("##################################")

print (colored("\n\n### Answer ###", "blue"))
print_ww(response['result'])


print (colored("\n\n### Contexts ###", "green"))
show_context_used(response['source_documents'])

##################################
query:  knox의 기능에는 어떤 것들이 있나요?
##################################
[34m

### Answer ###[0m


Knox 플랫폼의 주요 기능에는 다음이 포함됩니다:

- 하드웨어 기반 보안: Knox Vault, 실시간 커널 보호(RKP), DualDAR 암호화 등을 통한 하드웨어 기반 보안 기능

- 관리 용이성: 감사 로그, 원격 제어, 주변 장치 프레임워크 등을 통한 편리한 관리 기능

- 세분화된 정책 시행: 방화벽 관리, 앱 격리, 데이터 공유 정책 등을 통한 세밀한 정책 적용

- 인증 강화: 2단계 인증, 보안 인증서 등록 에이전트 등을 통한 인증 강화
[32m

### Contexts ###[0m
-----------------------------------------------
1. Chunk: 1711 Characters
-----------------------------------------------
삼성 녹스 플랫폼.삼성 Knox는 모든 기업에 가장 인기 있는 소비자 디바이스의 방어급 보안을 제공합니다.Knox Platform은 오늘날 모바일 디바이스 시장에서 흔히 볼 수
있는 표준 기능을 뛰어넘는 동급 최고의 하드웨어 기반 보안, 정책 관리 및 규정 준수 기능을 제공합니다.Knox 플랫폼은 다양한 삼성 디바이스를 지원하는 강력한 모바일 보안 전략의
초석입니다.삼성 Knox를 사용하는 이유는 무엇일까요?Knox 플랫폼은 여러분과 기업이 많은 모바일 플랫폼에서 흔히 발생하는 보안 격차를 피할 수 있도록 도와줍니다.Knox는
Gartner가 발표한 2017년 12월 모바일 OS 및 디바이스 보안: 플랫폼 비교에서 28개 부문 중 25개 부문에서*강한* 등급을 받았으며 지난 3년 연속 높은 평가를
받았습니다.Knox 플랫폼의 보안 강화는 모바일 장치 운영의 모든 측면을 지원합니다.Knox 플랫폼은 혁신적인 Knox Va