# RAG Based on parent document
- Hybrid Search
- ReRanker
- [Parent Document](https://medium.aiplanet.com/advanced-rag-providing-broader-context-to-llms-using-parentdocumentretriever-cc627762305a)
    

## Setting
 - Auto Reload
 - path for utils

In [298]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [299]:
import sys, os
module_path = "../../.."
sys.path.append(os.path.abspath(module_path))

## 1. Bedrock Client 생성

In [300]:
import json
import boto3
from pprint import pprint
from termcolor import colored
from utils import bedrock, print_ww
from utils.bedrock import bedrock_info

### ---- ⚠️ Un-comment and edit the below lines as needed for your AWS setup ⚠️ ----
- os.environ["AWS_DEFAULT_REGION"] = "<REGION_NAME>"  # E.g. "us-east-1"
- os.environ["AWS_PROFILE"] = "<YOUR_PROFILE>"
- os.environ["BEDROCK_ASSUME_ROLE"] = "<YOUR_ROLE_ARN>"  # E.g. "arn:aws:..."
- os.environ["BEDROCK_ENDPOINT_URL"] = "<YOUR_ENDPOINT_URL>"  # E.g. "https://..."

In [301]:
boto3_bedrock = bedrock.get_bedrock_client(
    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
    endpoint_url=os.environ.get("BEDROCK_ENDPOINT_URL", None),
    region=os.environ.get("AWS_DEFAULT_REGION", None),
)

aws_region = os.environ.get("AWS_DEFAULT_REGION", None)
print (colored("\n== FM lists ==", "green"))
pprint (bedrock_info.get_list_fm_models(verbose=False))

Create new client
  Using region: None
  Using profile: None
boto3 Bedrock client successfully created!
bedrock-runtime(https://bedrock-runtime.us-east-1.amazonaws.com)
[32m
== FM lists ==[0m
{'Claude-Instant-V1': 'anthropic.claude-instant-v1',
 'Claude-V1': 'anthropic.claude-v1',
 'Claude-V2': 'anthropic.claude-v2',
 'Claude-V2-1': 'anthropic.claude-v2:1',
 'Cohere-Embeddings-En': 'cohere.embed-english-v3',
 'Cohere-Embeddings-Multilingual': 'cohere.embed-multilingual-v3',
 'Command': 'cohere.command-text-v14',
 'Command-Light': 'cohere.command-light-text-v14',
 'Jurassic-2-Mid': 'ai21.j2-mid-v1',
 'Jurassic-2-Ultra': 'ai21.j2-ultra-v1',
 'Llama2-13b-Chat': 'meta.llama2-13b-chat-v1',
 'Titan-Embeddings-G1': 'amazon.titan-embed-text-v1',
 'Titan-Text-G1': 'amazon.titan-text-express-v1',
 'Titan-Text-G1-Light': 'amazon.titan-text-lite-v1'}


## 2. Titan Embedding 및 LLM 인 Claude-v2 모델 로딩

### LLM 로딩 (Claude-v2)

In [302]:
from langchain.llms.bedrock import Bedrock
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [303]:
llm_text = Bedrock(
    model_id=bedrock_info.get_model_id(model_name="Claude-V2-1"),
    client=boto3_bedrock,
    model_kwargs={
        "max_tokens_to_sample": 4000,
        "temperature": 0.1,
        "top_k": 3,
        "top_p": 0.1,
        "stop_sequences": ["\n\nHuman:"]
    },
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()]
)
llm_text

Bedrock(client=<botocore.client.BedrockRuntime object at 0x7fc6836a8490>, model_id='anthropic.claude-v2:1', model_kwargs={'max_tokens_to_sample': 4000, 'temperature': 0.1, 'top_k': 3, 'top_p': 0.1, 'stop_sequences': ['\n\nHuman:']}, streaming=True, callbacks=[<langchain_core.callbacks.streaming_stdout.StreamingStdOutCallbackHandler object at 0x7fc68366d300>])

### Embedding 모델 선택

In [304]:
from utils.rag import KoSimCSERobertaContentHandler, SagemakerEndpointEmbeddingsJumpStart

In [305]:
def get_embedding_model(is_bedrock_embeddings, is_KoSimCSERobert, aws_region, endpont_name=None):
    
    if is_bedrock_embeddings:
        # We will be using the Titan Embeddings Model to generate our Embeddings.
        from langchain.embeddings import BedrockEmbeddings
        llm_emb = BedrockEmbeddings(
            client=boto3_bedrock,
            model_id=bedrock_info.get_model_id(
                model_name="Titan-Embeddings-G1"
            )
        )
        print("Bedrock Embeddings Model Loaded")

    elif is_KoSimCSERobert:
        LLMEmbHandler = KoSimCSERobertaContentHandler()
        endpoint_name_emb = endpont_name
        llm_emb = SagemakerEndpointEmbeddingsJumpStart(
            endpoint_name=endpoint_name_emb,
            region_name=aws_region,
            content_handler=LLMEmbHandler,
        )
        print("KoSimCSERobert Embeddings Model Loaded")
    else:
        llm_emb = None
        print("No Embedding Model Selected")

    return llm_emb

#### [중요] is_KoSimCSERobert == True 일시에 endpoint_name 을 꼭 넣어 주세요.

In [306]:
is_bedrock_embeddings = True
is_KoSimCSERobert = False
aws_region = os.environ.get("AWS_DEFAULT_REGION", None)

##############################
# Parameters for is_KoSimCSERobert
##############################
if is_KoSimCSERobert: endpont_name = "<endpoint-name>"
else: endpont_name = None
##############################

llm_emb = get_embedding_model(is_bedrock_embeddings, is_KoSimCSERobert, aws_region, endpont_name)   

Bedrock Embeddings Model Loaded


## 3. Depoly ReRanker model (if needed)

In [307]:
import json
import sagemaker
from sagemaker.huggingface import HuggingFaceModel

In [308]:
depoly = False

In [309]:
if depoly:

    try:
        role = sagemaker.get_execution_role()
    except ValueError:
        iam = boto3.client('iam')
        role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

    # Hub Model configuration. https://huggingface.co/models
    hub = {
        'HF_MODEL_ID':'BAAI/bge-reranker-large',
        'HF_TASK':'text-classification'
    }

    # create Hugging Face Model Class
    huggingface_model = HuggingFaceModel(
        transformers_version='4.26.0',
        pytorch_version='1.13.1',
        py_version='py39',
        env=hub,
        role=role, 
    )

    # deploy model to SageMaker Inference
    predictor = huggingface_model.deploy(
        initial_instance_count=1, # number of instances
        instance_type='ml.g5.xlarge' # instance type
    )

    print(f'Accept: {predictor.accept}')
    print(f'ContentType: {predictor.content_type}')
    print(f'Endpoint: {predictor.endpoint}')

## 4. Invocation (prediction)

In [310]:
runtime_client = boto3.Session().client('sagemaker-runtime')
print (f'runtime_client: {runtime_client}')

runtime_client: <botocore.client.SageMakerRuntime object at 0x7fc68d5e7790>


In [311]:
#endpoint_name = "huggingface-pytorch-inference-2023-11-15-04-37-45-120" # ml.m5.2xlarge
endpoint_name = "huggingface-pytorch-inference-2023-11-15-07-53-21-605" # ml.g5.xlarge
deserializer = "application/json"

In [312]:
payload = json.dumps(
    {
        "inputs": [
            {"text": "I hate you", "text_pair": "I don't like you"},
            {"text": "He hates you", "text_pair": "He like you"}
        ]
    }
)

In [313]:
payload = json.dumps(
    {
        "inputs": [
            {"text": "나는 너를 사랑하지 않아", "text_pair": "나는 너를 좋아하지 않아"},
            {"text": "그는 너를 싫어해", "text_pair": "그는 너를 좋아해"}
        ]
    }
)

In [314]:
payload = json.dumps(
    {
        "inputs": [
            {"text": "리버럴 아츠란 무엇인가?", "text_pair": "인문학. 1. 전문 또는 기술 과목이 아닌 예술, 인문학, 자연 과학 및 사회 과학을 포함하는 일반 지식을 제공하기 위한 대학의 학업 교육 과정."},
            {"text": "리버럴 아츠란 무엇인가?", "text_pair": "자유주의 교육: 개인의 역량을 강화하고 복잡성, 다양성 및 변화에 대처할 수 있도록 준비시키는 대학 학습 접근 방식.이 접근법은 광범위한 세계 (예: 과학, 문화, 사회) 에 대한 폭넓은 지식과 특정 관심 분야에서의 심층적인 성취를 강조합니다."}
        ]
    }
)

In [315]:
%%time
response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Accept=deserializer,
    Body=payload
)
## deserialization
out = json.loads(response['Body'].read().decode()) ## for json
print (f'Response: {out}')

Response: [{'label': 'LABEL_0', 'score': 0.00045185565249994397}, {'label': 'LABEL_0', 'score': 9.195056190947071e-05}]
CPU times: user 27.9 ms, sys: 0 ns, total: 27.9 ms
Wall time: 105 ms


## 5. LangChain OpenSearch VectorStore 정의
### 선수 조건
- 01_preprocess_docs/02_load_docs_opensearch.ipynb를 통해서 OpenSearch Index 가 생성이 되어 있어야 합니다.
#### [중요] 아래에 aws parameter store 에 아래 인증정보가 먼저 입력되어 있어야 합니다.
- 01_preprocess_docs/01_parameter_store_example.ipynb 참고

In [316]:
from utils.proc_docs import get_parameter

In [317]:
ssm = boto3.client("ssm", "us-east-1")

opensearch_domain_endpoint = get_parameter(
    boto3_clinet = ssm,
    parameter_name = 'knox_opensearch_domain_endpoint',
)

opensearch_user_id = get_parameter(
    boto3_clinet = ssm,
    parameter_name = 'knox_opensearch_userid',
)

opensearch_user_password = get_parameter(
    boto3_clinet = ssm,
    parameter_name = 'knox_opensearch_password',
)

In [318]:
opensearch_domain_endpoint = opensearch_domain_endpoint
rag_user_name = opensearch_user_id
rag_user_password = opensearch_user_password

http_auth = (rag_user_name, rag_user_password) # Master username, Master password

### Index 이름 셋팅
- 이전 노트북 01_preprocess_docs/02_load_docs_opensearch.ipynb를 통해서 생성된 OpenSearch Index name 입력
- parent document 용으로 생성된 index 사용할 것 

In [319]:
#index_name = "v16-genai-poc-knox-eval-parent-doc-retriever"
index_name = "v15-genai-poc-knox-parent-doc-retriever"
#index_name = "v18-genai-poc-knox-kor-parent-doc-retriever"

### OpenSearch Client 생성

In [320]:
from utils.opensearch import opensearch_utils

In [321]:
os_client = opensearch_utils.create_aws_opensearch_client(
    aws_region,
    opensearch_domain_endpoint,
    http_auth
)

# 아래수정

## 6. Retriever based on Hybrid Search + ParentDocument + ReRanker
- LangChain에서 제공하는 **BaseRetriever** 클래스를 상속받아 **Custom Retriever**를 정의 할 수 있습니다.
- Hybrid-Search에 대한 자세한 내용는 **"01_rag_hybrid_search.ipyno"** 에서 확인 가능합니다.
- [Parent Document](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1)
![parent-document.png](../../../imgs/parent-document.png)

## Parent documents in Hybrid search
- Lexical search: parent documents only
- Semantic search: child documents first, getting parent documents corresponding to that child document

In [322]:
from utils.rag import OpenSearchHybridSearchRetriever

- 필터 설정 예시
- filter=[ <BR>
    　{"term": {"metadata.[**your_metadata_attribute_name**]": "**your first keyword**"}}, <BR>
    　{"term": {"metadata.[**your_metadata_attribute_name**]": "**your second keyword**"}},<BR>
]

In [323]:
opensearch_hybrid_retriever = OpenSearchHybridSearchRetriever(
    os_client=os_client,
    index_name=index_name,
    llm_text=llm_text, # llm for query augmentation in both rag_fusion and HyDE
    llm_emb=llm_emb, # Used in semantic search based on opensearch 

    # option for lexical
    minimum_should_match=0,
    filter=[],

    # option for search
    fusion_algorithm="RRF", # ["RRF", "simple_weighted"], rank fusion 방식 정의
    ensemble_weights=[.5, .5], # [for lexical, for semantic], Lexical, Semantic search 결과에 대한 최종 반영 비율 정의
    reranker=True, # enable reranker with reranker model
    reranker_endpoint_name=endpoint_name, # endpoint name for reranking model
    parent_document = True, # enable parent document

    # option for async search
    async_mode=True,

    # option for output
    k=5, # 최종 Document 수 정의
    verbose=True,
)

### Retrieval example
- default search

In [324]:
from utils.rag import show_context_used

In [325]:
query = "what is verify DM"

In [326]:
%%time
search_hybrid_result = opensearch_hybrid_retriever.get_relevant_documents(query)

print("\n==========  Results  ==========\n")
print(f'1. question: {query}')
print (f'2. # documents: {len(search_hybrid_result)}')
print("3. Documents: \n")

show_context_used(search_hybrid_result)

===== ParentDocument =====
filter: [{'term': {'metadata.family_tree': 'child'}}]
# child_docs: 5
# parent docs: 5
# duplicates: 0
##############################
async_mode
##############################
True
##############################
reranker
##############################
True
##############################
rag_fusion
##############################
False
##############################
HyDE
##############################
False
##############################
parent_document
##############################
True
##############################
similar_docs_semantic
##############################
[(Document(page_content='. The Admin package signature checksum is the Base64 encoded SHA-256 hash of the MDM APK signature, which is URL friendly. You can get this value from your MDM. Go here for information on the `EXTRA_PROVISIONING_DEVICE_ADMIN_SIGNATURE_CHECKSUM`. Alternatively, you can use utilities such as keytool on Linux to get this value. For additional information, go here.', metada

- update parameters

In [327]:
opensearch_hybrid_retriever.update_search_params(
    k=5,
    minimum_should_match=0,
    #filter=[],
    filter=[
        #{'term': {'metadata.project': 'KS'}},
        #{'term': {'metadata.family_tree': 'child'}},
    ],
    reranker=True,
    reranker_endpoint_name=endpoint_name,
    rag_fusion=False,
    parent_document=False, # enable parent document
    verbose=True,
)

In [328]:
query = "how to vefify DM"
search_hybrid_result = opensearch_hybrid_retriever.get_relevant_documents(query)

print("\n==========  Results  ==========\n")
print(f'1. question: {query}')
print(f'2. # documents: {len(search_hybrid_result)}')
print("3. Documents: \n")

show_context_used(search_hybrid_result)

##############################
async_mode
##############################
True
##############################
reranker
##############################
True
##############################
rag_fusion
##############################
False
##############################
HyDE
##############################
False
##############################
parent_document
##############################
False
##############################
similar_docs_semantic
##############################
[(Document(page_content='. 1. In the EMM groups tab, select the device groups you want to enroll. 2. Click Actions > Enroll devices in groups. The Select license pop-up shows the active licenses that have enough seats to accommodate all devices in your selected device group(s). If you want to see all licenses, click Clear Filters. 3. Select the license you want to use, and click Done. Assign a device group to a campaign Perform this procedure if you want to assign all devices in a device group to a campaign. 1. In the EM

## 5. RAG using RetrievalQA powered by LangChain

In [329]:
from utils.rag import prompt_repo
from utils.rag import run_RetrievalQA
from langchain.prompts import PromptTemplate

### Prompting
- [TIP] Prompt의 instruction의 경우 한글보다 영어로 했을 때 더 좋은 결과를 얻을 수 있습니다.

In [330]:
PROMPT = prompt_repo.get_qa(prompt_type="answer_only") # ["answer_only", "answer_with_ref", "original"]

In [331]:
pprint (PROMPT.template)

('\n'
 '            \n'
 '\n'
 'Human:\n'
 '            You are a master answer bot designed to answer software '
 "developer's questions.\n"
 "            I'm going to give you a context. Read the context carefully, "
 "because I'm going to ask you a question about it.\n"
 '\n'
 '            Here is the context: <context>{context}</context>\n'
 '            \n'
 '            First, find a few paragraphs or sentences from the context that '
 'are most relevant to answering the question.\n'
 '            Then, answer the question as much as you can.\n'
 '\n'
 '            Skip the preamble and go straight into the answer.\n'
 "            Don't insert any XML tag such as <context> and </context> when "
 'answering.\n'
 '            \n'
 '            Here is the question: <question>{question}</question>\n'
 '\n'
 '            If the question cannot be answered by the context, say "No '
 'relevant context".\n'
 '            \n'
 '\n'
 'Assistant: Here is the answer. ')


### Update Search Params (Optional)

In [332]:
from langchain.chains import RetrievalQA

In [334]:
opensearch_hybrid_retriever.update_search_params(
    k=6,
    minimum_should_match=0,
    filter=[
        #{'term': {'metadata.family_tree': 'child'}},
    ],
    reranker=True,
    reranker_endpoint_name=endpoint_name,
    rag_fusion=False,
    #hyde=True,
    parent_document=False, # enable parent document
    verbose=False
)

### Request

In [335]:
qa = RetrievalQA.from_chain_type(
    llm=llm_text,
    chain_type="stuff",
    retriever=opensearch_hybrid_retriever,
    return_source_documents=True,
    chain_type_kwargs={
        "prompt": PROMPT,
        "verbose": False,
    },
    verbose=False
)

In [336]:
query = "How do I add an admin to Knox Mobile Enrollment?"
#query = "내 조직을 위해 Knox Mobile 등록을 설정하려면 어떻게 해야 하나요?"
response = qa(query)



To add an admin to Knox Mobile Enrollment:

1. Select Administrator & Roles from the left-hand navigation menu. Ensure the ADMINISTRATORS tab is selected.
2. Select INVITE ADMINISTRATOR from the upper, right-hand side of the screen. 
3. Provide the following details:
    - First name 
    - Last name
    - Email 
    - Role - Use the drop-down menu to assign this new administrator a role appropriate to their intended administrative function

4. Once the invitation is sent, the new admin will need to verify their Samsung Account details before accessing the Knox Mobile Enrollment console.

In [337]:
print("##################################")
print("query: ", query)
print("##################################")

print (colored("\n\n### Answer ###", "blue"))
print_ww(response['result'])


print (colored("\n\n### Contexts ###", "green"))
show_context_used(response['source_documents'])

##################################
query:  How do I add an admin to Knox Mobile Enrollment?
##################################
[34m

### Answer ###[0m


To add an admin to Knox Mobile Enrollment:

1. Select Administrator & Roles from the left-hand navigation menu. Ensure the ADMINISTRATORS tab is
selected.
2. Select INVITE ADMINISTRATOR from the upper, right-hand side of the screen.
3. Provide the following details:
    - First name
    - Last name
    - Email
    - Role - Use the drop-down menu to assign this new administrator a role appropriate to their
intended administrative function

4. Once the invitation is sent, the new admin will need to verify their Samsung Account details
before accessing the Knox Mobile Enrollment console.
[32m

### Contexts ###[0m
-----------------------------------------------
1. Chunk: 1007 Characters
-----------------------------------------------
Add an admin. Knox Mobile Enrollment allows you to invite and manage admins, as well as assign
requisit

In [None]:
import pickle
def load_pickle():
    with open('data/data.pickle', 'rb') as f:
        data = pickle.load(f)
    return data


