# Step 2. KB(Knowledge Base) 검색

### Setup

In [None]:
# 패키지 설치 후 Kernel Restart 를 수행하시고 다음 쉘로 진행하세요.

%pip install --upgrade pip
%pip install boto3 --force-reinstall --quiet
%pip install botocore --force-reinstall --quiet
%pip install sqlalchemy==2.0.0 --quiet
%pip install langchain --force-reinstall --quiet

### 사전에 생성한 KB 목록 확인

* 사용하려는 KB의 ID 를 확인합니다.

In [1]:
import boto3

def list_knowledge_bases():
    # Bedrock 클라이언트 생성
    bedrock_agent = boto3.client('bedrock-agent')

    # Knowledge Base 목록 가져오기
    response = bedrock_agent.list_knowledge_bases(
        maxResults=100  # 한 번에 가져올 최대 결과 수
    )

    # 결과 출력
    if 'knowledgeBaseSummaries' in response:
        print("Found Knowledge Bases:")
        for kb in response['knowledgeBaseSummaries']:
            print(f"- Name: {kb['name']}")
            print(f"  ID: {kb['knowledgeBaseId']}")
            print(f"  Description: {kb.get('description', 'N/A')}")
            print(f"  Status: {kb['status']}")
            print(f"  Last Updated: {kb['updatedAt']}")
            print("---")
    else:
        print("No Knowledge Bases found.")

    # 페이지네이션 처리
    while 'nextToken' in response:
        response = bedrock_agent.list_knowledge_bases(
            maxResults=100,
            nextToken=response['nextToken']
        )
        
        for kb in response['knowledgeBaseSummaries']:
            print(f"- Name: {kb['name']}")
            print(f"  ID: {kb['knowledgeBaseId']}")
            print(f"  Description: {kb.get('description', 'N/A')}")
            print(f"  Status: {kb['status']}")
            print(f"  Last Updated: {kb['updatedAt']}")
            print("---")

if __name__ == "__main__":
    list_knowledge_bases()

Found Knowledge Bases:
- Name: 2-civil-eng-01
  ID: VIIQSV2GCL
  Description: N/A
  Status: ACTIVE
  Last Updated: 2024-07-23 03:06:15.586610+00:00
---
- Name: 240722-test-kb
  ID: PIWYW746HW
  Description: N/A
  Status: ACTIVE
  Last Updated: 2024-07-22 04:51:26.268276+00:00
---
- Name: 2-civil-eng-02
  ID: LO9DRKGBTX
  Description: N/A
  Status: ACTIVE
  Last Updated: 2024-07-23 03:08:42.294328+00:00
---
- Name: knowledge-base-quick-start-aafi3
  ID: KRQKUHNJIV
  Description: N/A
  Status: ACTIVE
  Last Updated: 2024-02-26 08:27:55.881890+00:00
---
- Name: knowledge-base-quick-start-en
  ID: DJLWVWNGBT
  Description: N/A
  Status: ACTIVE
  Last Updated: 2024-02-26 12:26:22.625358+00:00
---
- Name: 1-es-eng
  ID: 2S7W5D8WOO
  Description: N/A
  Status: ACTIVE
  Last Updated: 2024-07-23 02:55:14.020228+00:00
---


## 방법1: boto3 SDK 로 구현

In [2]:
# Bedrock Client 초기화

import boto3
import pprint
from botocore.client import Config
import json
import time

pp = pprint.PrettyPrinter(indent=2)
session = boto3.session.Session()
region = session.region_name
bedrock_config = Config(connect_timeout=120, read_timeout=120, retries={'max_attempts': 0})
bedrock_client = boto3.client('bedrock-runtime', region_name = region)
bedrock_agent_client = boto3.client("bedrock-agent-runtime",
                              config=bedrock_config, region_name = region)
print(region)

us-east-1


In [3]:
# 사용자 쿼리

query = """
Reference Site condition
"""

### KB 리트리버 설정 (Hybrid Search)
* 위에서 사용할 KB ID 를 확인하고 kb_id 에 수정하세요.

In [4]:
# 위에서 KB ID를 확인하고 수정합니다.
kb_id = "2S7W5D8WOO"  ## KB name : 1-es-eng

# numberOfResults 는 RAG 검색에서 가져올 청크 갯수 입니다.
def retrieve(query, kbId, numberOfResults=10):
    return bedrock_agent_client.retrieve(
        retrievalQuery= {
            'text': query
        },
        knowledgeBaseId=kbId,
        retrievalConfiguration= {
            'vectorSearchConfiguration': {
                'numberOfResults': numberOfResults,
                'overrideSearchType': "HYBRID", # optional
            }
        }
    )

# fetch context from the response
def get_contexts(retrievalResults):
    contexts = []
    for retrievedResult in retrievalResults: 
        contexts.append(retrievedResult['content']['text'])
    return contexts

response = retrieve(query, kb_id, 20) # Chunk 20개 요청
retrievalResults = response['retrievalResults']
contexts = get_contexts(retrievalResults)

#pp.pprint(contexts)

In [5]:
# 프롬프트 템플릿

prompt = f"""
Human: You are an ITB(Invitation To Bid) advisor AI system, and provides answers to questions by using fact based. 
Use the following pieces of information to provide a detail answer to the question enclosed in <question> tags. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.

<context>
{contexts}
</context>

<question>
{query}
</question>

The response should be specific and use statistics or numbers.

Assistant:"""

### Amazon Bedrock에서 Claude 3 모델 호출

In [6]:
%%time

# payload with model paramters
messages=[{ "role":'user', "content":[{'type':'text','text': prompt.format(contexts, query)}]}]
sonnet_payload = json.dumps({
    "anthropic_version": "bedrock-2023-05-31",
    "max_tokens": 4096,
    "messages": messages,
    "temperature": 0.0,
    "top_p": 0
        }  )

#modelId = "anthropic.claude-3-5-sonnet-20240620-v1:0"  # Claude 3.5 Sonnet
modelId = 'anthropic.claude-3-sonnet-20240229-v1:0'     # Claude 3 Sonnet
accept = 'application/json'
contentType = 'application/json'
response = bedrock_client.invoke_model(body=sonnet_payload, modelId=modelId, accept=accept, contentType=contentType)
response_body = json.loads(response.get('body').read())
response_text = response_body.get('content')[0]['text']

#pp.pprint(response_text)

print(response_text)

According to the provided context, the Reference Site Conditions (RSC) are defined as:

- Ambient air temperature of 46°C
- Ambient relative air humidity of 40% 
- Ambient air pressure of 1,013 mbar
- Seawater temperature of 33°C
- Transmission System frequency of 50 Hz
CPU times: user 15 ms, sys: 314 μs, total: 15.4 ms
Wall time: 11 s


---

---

## 방법2 : LangChain 으로 구현

In [None]:
!pip install langchain_aws langchain-community -U

In [8]:
# Bedrock Client 초기화

import boto3
import pprint
from botocore.client import Config
import json
import time

pp = pprint.PrettyPrinter(indent=2)
session = boto3.session.Session()
region = session.region_name
bedrock_config = Config(connect_timeout=120, read_timeout=120, retries={'max_attempts': 0})
bedrock_client = boto3.client('bedrock-runtime', region_name = region)
bedrock_agent_client = boto3.client("bedrock-agent-runtime",
                              config=bedrock_config, region_name = region)
print(region)

us-east-1


In [9]:
# from langchain.llms.bedrock import Bedrock
import langchain
from langchain_aws import ChatBedrock
from langchain.retrievers.bedrock import AmazonKnowledgeBasesRetriever

llm = ChatBedrock(model_id=modelId, 
                  client=bedrock_client)

In [10]:
# 사용자 쿼리
query = """
Reference Site condition 에 대해서 알려주세요.
"""


kb_id = "2S7W5D8WOO"  ## KB name : 1-es-eng

# KB에서 관련 내용 검색
retriever = AmazonKnowledgeBasesRetriever(
        knowledge_base_id=kb_id,
        retrieval_config={"vectorSearchConfiguration": 
                          {"numberOfResults": 20,  # Chunk 20개 요청
                           'overrideSearchType': "HYBRID", # optional
                           }
                          },
        # endpoint_url=endpoint_url,
        # region_name=region,
        # credentials_profile_name="<profile_name>",
    )
docs = retriever.get_relevant_documents(
        query=query
    )

#pp.pprint(docs)

  warn_deprecated(


In [13]:
from langchain.prompts import PromptTemplate

# 프롬프트 템플릿
PROMPT_TEMPLATE = """
Human: You are an ITB(Invitation To Bid) advisor AI system, and provides answers to questions by using fact based. 
Use the following pieces of information to provide a detail answer to the question enclosed in <question> tags. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.

<context>
{context}
</context>

<question>
{question}
</question>

The response should be specific and use statistics or numbers.

Assistant:"""
claude_prompt = PromptTemplate(template=PROMPT_TEMPLATE, 
                               input_variables=["context","question"])

In [15]:
%%time

from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": claude_prompt}
)

answer = qa.invoke(query)

# 답변 출력
print("답변:")
print(answer['result'])
print()

# 출처 문서 정보 출력
print("\n출처:")
for i, doc in enumerate(answer['source_documents'], 1):
    print(f"{i}. {doc.metadata['location']['s3Location']['uri']}")
    
print()

답변:
계약서에 명시된 Reference Site Condition(RSC)는 다음과 같습니다:

- 대기 온도 46°C
- 상대 습도 40% 
- 대기압 1,013 mbar
- 해수 온도 33°C
- 해수 염분 40.5 g/kg
- 주파수 50 Hz

이는 플랜트의 설계 및 성능 평가의 기준이 되는 현장 조건입니다. 플랜트의 설계 용량, 연료 운전 한계, 전력 산출량 등이 이 RSC 조건에서 결정됩니다.


출처:
1. s3://240719-jesamkim-bucket/ES_Contracts/2_UHP/Schedules_Execution Version/Execution Version_Schedule 16A_Part 1_MFS/Part 1 Appendix C/Sch 16A Part 1 App C - Annex 6 Section 3.2_Design and Performance Data.pdf
2. s3://240719-jesamkim-bucket/ES_Contracts/1_UHP/Schedules_Execution Version/Execution Version_Schedule 16A_Part 3 EPC Contractor Proposal/Section 3/Section 3.2 Design and Peroformance Data/Section 3.2_Design and Performance Data.pdf
3. s3://240719-jesamkim-bucket/ES_Contracts/2_UHP/Schedules_Execution Version/Execution Version_Schedule 22A_Technical Limits/Schedule 22A - Annex 3 - Section 3.2_Design and Performance Data.pdf
4. s3://240719-jesamkim-bucket/ES_Contracts/UAE_F3/EPC/Fujairah F3 IPP - EPC Contract - Appendix C (Final).DOC
5. s3://240