# Step 2. KB(Knowledge Base) 쿼리

### Setup

In [None]:
# 패키지 설치 후 Kernel Restart 를 수행하시고 다음 쉘로 진행하세요.

%pip install --upgrade pip
%pip install boto3 --force-reinstall --quiet
%pip install botocore --force-reinstall --quiet
%pip install sqlalchemy==2.0.0 --quiet
%pip install langchain --force-reinstall --quiet

### 사전에 생성한 KB 목록 확인

* 사용하려는 KB의 ID 를 확인합니다.

In [1]:
import boto3

def list_knowledge_bases():
    # Bedrock 클라이언트 생성
    bedrock_agent = boto3.client('bedrock-agent')

    # Knowledge Base 목록 가져오기
    response = bedrock_agent.list_knowledge_bases(
        maxResults=100  # 한 번에 가져올 최대 결과 수
    )

    # 결과 출력
    if 'knowledgeBaseSummaries' in response:
        print("Found Knowledge Bases:")
        for kb in response['knowledgeBaseSummaries']:
            print(f"- Name: {kb['name']}")
            print(f"  ID: {kb['knowledgeBaseId']}")
            print(f"  Description: {kb.get('description', 'N/A')}")
            print(f"  Status: {kb['status']}")
            print(f"  Last Updated: {kb['updatedAt']}")
            print("---")
    else:
        print("No Knowledge Bases found.")

    # 페이지네이션 처리
    while 'nextToken' in response:
        response = bedrock_agent.list_knowledge_bases(
            maxResults=100,
            nextToken=response['nextToken']
        )
        
        for kb in response['knowledgeBaseSummaries']:
            print(f"- Name: {kb['name']}")
            print(f"  ID: {kb['knowledgeBaseId']}")
            print(f"  Description: {kb.get('description', 'N/A')}")
            print(f"  Status: {kb['status']}")
            print(f"  Last Updated: {kb['updatedAt']}")
            print("---")

if __name__ == "__main__":
    list_knowledge_bases()

Found Knowledge Bases:
- Name: ITB-Civil_Eng
  ID: RYYEVXXULD
  Description: N/A
  Status: ACTIVE
  Last Updated: 2024-07-19 02:34:45.555184+00:00
---
- Name: 240718-test
  ID: LEX6UDHS1Q
  Description: N/A
  Status: ACTIVE
  Last Updated: 2024-07-18 04:14:32.625010+00:00
---
- Name: knowledge-base-quick-start-aafi3
  ID: KRQKUHNJIV
  Description: N/A
  Status: ACTIVE
  Last Updated: 2024-02-26 08:27:55.881890+00:00
---
- Name: ITB-ES_Contracts
  ID: JXGUVQQIK6
  Description: N/A
  Status: ACTIVE
  Last Updated: 2024-07-19 00:42:45.537844+00:00
---
- Name: knowledge-base-quick-start-en
  ID: DJLWVWNGBT
  Description: N/A
  Status: ACTIVE
  Last Updated: 2024-02-26 12:26:22.625358+00:00
---


### Bedrock Client 초기화

In [2]:
import boto3
import pprint
from botocore.client import Config
import json

pp = pprint.PrettyPrinter(indent=2)
session = boto3.session.Session()
region = session.region_name
bedrock_config = Config(connect_timeout=120, read_timeout=120, retries={'max_attempts': 0})
bedrock_client = boto3.client('bedrock-runtime', region_name = region)
bedrock_agent_client = boto3.client("bedrock-agent-runtime",
                              config=bedrock_config, region_name = region)
print(region)

us-east-1


In [4]:
# 사용자 쿼리

query = """
Reference Site condition
"""

### KB 리트리버 설정 (Hybrid Search)
* 위에서 사용할 KB ID 를 확인하고 kb_id 에 수정하세요.

In [5]:
# 위에서 KB ID를 확인하고 수정합니다.
kb_id = "JXGUVQQIK6"

# numberOfResults 는 RAG 검색에서 가져올 청크 갯수 입니다.
def retrieve(query, kbId, numberOfResults=10):
    return bedrock_agent_client.retrieve(
        retrievalQuery= {
            'text': query
        },
        knowledgeBaseId=kbId,
        retrievalConfiguration= {
            'vectorSearchConfiguration': {
                'numberOfResults': numberOfResults,
                'overrideSearchType': "HYBRID", # optional
            }
        }
    )

# fetch context from the response
def get_contexts(retrievalResults):
    contexts = []
    for retrievedResult in retrievalResults: 
        contexts.append(retrievedResult['content']['text'])
    return contexts

response = retrieve(query, kb_id, 10)
retrievalResults = response['retrievalResults']
contexts = get_contexts(retrievalResults)

#pp.pprint(contexts)

In [6]:
# 프롬프트 템플릿

prompt = f"""
Human: You are a ITB advisor AI system, and provides answers to questions by using fact based when possible. 
Use the following pieces of information to provide a detail answer to the question enclosed in <question> tags. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
<context>
{contexts}
</context>

<question>
{query}
</question>

The response should be specific and use statistics or numbers when possible.

Assistant:"""

### Amazon Bedrock에서 Claude 3 모델 호출

In [7]:
# payload with model paramters
messages=[{ "role":'user', "content":[{'type':'text','text': prompt.format(contexts, query)}]}]
sonnet_payload = json.dumps({
    "anthropic_version": "bedrock-2023-05-31",
    "max_tokens": 4096,
    "messages": messages,
    "temperature": 0.0,
    "top_p": 0
        }  )

modelId = 'anthropic.claude-3-sonnet-20240229-v1:0' # change this to use a different version from the model provider
accept = 'application/json'
contentType = 'application/json'
response = bedrock_client.invoke_model(body=sonnet_payload, modelId=modelId, accept=accept, contentType=contentType)
response_body = json.loads(response.get('body').read())
response_text = response_body.get('content')[0]['text']

#pp.pprint(response_text)

print(response_text)

According to the provided context, the Reference Site Conditions (RSC) for the plant are:

- Ambient air temperature (dry bulb): 50°C
- Relative humidity: 35%
- Atmospheric pressure: 1013 mbar
- Seawater temperature: 35°C
- Seawater Total Dissolved Solids (TDS): 45900 mg/l


---

---

## LangChain 으로 구현

In [12]:
!pip install langchain_aws langchain-community -U

Collecting langchain-community
  Downloading langchain_community-0.2.7-py3-none-any.whl.metadata (2.5 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.21.3-py3-none-any.whl.metadata (7.1 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_community-0.2.7-py3-none-any.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m100.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (2

In [14]:
# from langchain.llms.bedrock import Bedrock
import langchain
from langchain_aws import ChatBedrock
from langchain.retrievers.bedrock import AmazonKnowledgeBasesRetriever

llm = ChatBedrock(model_id=modelId, 
                  client=bedrock_client)

In [16]:
# 사용자 쿼리
query = """
Reference Site condition 에 대해서 알려주세요.
"""

# KB에서 관련 내용 검색
retriever = AmazonKnowledgeBasesRetriever(
        knowledge_base_id=kb_id,
        retrieval_config={"vectorSearchConfiguration": 
                          {"numberOfResults": 10,
                           'overrideSearchType': "HYBRID", # optional
                           }
                          },
        # endpoint_url=endpoint_url,
        # region_name=region,
        # credentials_profile_name="<profile_name>",
    )
docs = retriever.get_relevant_documents(
        query=query
    )

#pp.pprint(docs)

In [17]:
from langchain.prompts import PromptTemplate

# 프롬프트 템플릿
PROMPT_TEMPLATE = """
Human: You are a ITB advisor AI system, and provides answers to questions by using fact based when possible. 
Use the following pieces of information to provide a detail answer to the question enclosed in <question> tags. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
<context>
{context}
</context>

<question>
{question}
</question>

The response should be specific and use statistics or numbers when possible.

Assistant:"""
claude_prompt = PromptTemplate(template=PROMPT_TEMPLATE, 
                               input_variables=["context","question"])

In [20]:
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": claude_prompt}
)

answer = qa.invoke(query)

# 답변 출력
print("답변:")
print(answer['result'])

# 출처 문서 정보 출력
print("\n출처:")
for i, doc in enumerate(answer['source_documents'], 1):
    print(f"{i}. {doc.metadata['location']['s3Location']['uri']}")

답변:
문서에서 제시된 Reference Site Condition (RSC)은 다음과 같습니다:

- 대기 압력: 1013 mbar
- 주위 공기 온도 (건구 온도): 50°C  
- 주위 공기 온도 (습구 온도): 34°C
- 상대 습도: 35%
- 해수 온도: 35°C  
- 해수 총 용존 고형분 (TDS): 45,900 mg/l

또한 연료 데이터로는 다음이 제공되었습니다:

- 연료 종류: 천연가스
- 저위 발열량 (LHV): 46,244 kJ/Nm3
- 카타르 석유에 공급되는 압력 범위: 33 - 42 bar (a)

이것이 본 프로젝트의 설계 기준이 되는 현장 조건(Reference Site Condition)입니다.

출처:
1. s3://240719-jesamkim-bucket/ES_Contracts/2_UHP/Schedules_Execution Version/Execution Version_Schedule 22A_Technical Limits/Schedule 22A - Annex 3 - Section 3.2_Design and Performance Data.pdf
2. s3://240719-jesamkim-bucket/ES_Contracts/1_UHP/Schedules_Execution Version/Execution Version_Schedule 16A_Part 3 EPC Contractor Proposal/Section 3/Section 3.2 Design and Peroformance Data/Section 3.2_Design and Performance Data.pdf
3. s3://240719-jesamkim-bucket/ES_Contracts/2_UHP/Schedules_Execution Version/Execution Version_Schedule 16A_Part 1_MFS/Part 1 Appendix C/Sch 16A Part 1 App C - Annex 6 Section 3.2_Design and Performance Data