In [18]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.retrievers import AmazonKendraRetriever

from langchain import SQLDatabase
from langchain_experimental.sql import SQLDatabaseChain

import os
import boto3

os.environ["AWS_DEFAULT_REGION"] = "us-east-1"  # E.g. "us-east-1"
# os.environ["AWS_PROFILE"] = "bedrock_claude"
os.environ["BEDROCK_ENDPOINT_URL"] = "https://bedrock-runtime.us-east-1.amazonaws.com"  # E.g. "https://..."

session = boto3.Session(
    profile_name=os.environ.get("AWS_PROFILE")
) # sets the profile name to use for AWS credentials

bedrock = session.client(
    service_name='bedrock-runtime', # creates a Bedrock client
    region_name=os.environ.get("AWS_DEFAULT_REGION"),
    endpoint_url=os.environ.get("BEDROCK_ENDPOINT_URL")
) 

from langchain.llms.bedrock import Bedrock

# - create the Anthropic Model
llm = Bedrock(model_id="anthropic.claude-v2", client=bedrock, model_kwargs={'max_tokens_to_sample':1000, 'temperature': 0})

In [19]:
kendra_index_id = "<YOUR_KENDRA_INDEX>" # Example: 65702b79-bbae-4c93-b45b-9702f17fb994
kendra_index_id = "458952bb-3b13-4dc4-9321-e625b077bab9"

retriever = AmazonKendraRetriever(
    index_id=kendra_index_id,
    region_name=os.environ.get("AWS_DEFAULT_REGION", None),
    top_k=3,
    attribute_filter = {
        "EqualsTo": {      
            "Key": "_language_code",
            "Value": {
                "StringValue": "ko"
            }
        }
    }
)

In [25]:
prompt_template = """

Human: This is a friendly conversation between a human and an AI. 
The AI is talkative and provides specific details from its context but limits it to 240 tokens.
If the AI does not know the answer to a question, it truthfully says it 
does not know.

Assistant: OK, got it, I'll be a talkative truthful AI assistant.

Human: Here are a few documents in <documents> tags:
<documents>
{context}
</documents>
Based on the above documents, provide a detailed answer for, {question} 
Answer "시스템에 관련된 정보가 없습니다." if not present in the document. 

Assistant:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [26]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)


In [27]:
%%time
query = "근무시간 알려줘"
result = qa({"query": query})

CPU times: user 9.42 ms, sys: 3.77 ms, total: 13.2 ms
Wall time: 10.2 s


In [28]:
print(result['result'])
result['source_documents']

 문서에 따르면,

- 주중 근무 시간은 대부분 매장에서 오전 9시부터 오후 9시까지입니다. 

- 주말에도 영업하는 매장에서는 주말에도 근무가 있으며, 주말 근무는 로테이션 제도가 적용될 수 있습니다.

- 대형 매장의 경우 교대 근무 제도가 적용되어 오전/오후 교대 근무가 있습니다.

- 일부 직원은 시간제 근무를 통해 유연한 근무 시간을 가질 수 있습니다.

- 근무 스케줄은 매장 관리자나 인사팀에서 주간/월간으로 작성하며 미리 안내됩니다. 

- 휴가나 휴식 시간도 스케줄에 고려됩니다.

이상이 근무 시간에 대한 주요 내용입니다. 시스템에 대한 구체적인 언급은 문서에 없는 것 같습니다.


[Document(page_content='Document Title: 근무 시간과 스케줄.docx\nDocument Excerpt: \n1. 근무 시간: · 주중 근무: 대부분의 매장은 주중에 영업하며, 근무 시간은 보통 오전 9시부터 오후 9시까지입니다. 다만, 이 시간은 매장 및 지역에 따라 다를 수 있습니다. · 주말 근무: 주말에도 영업하는 매장의 경우 직원은 주말에 근무할 수 있으며, 주말 근무 로테이션 시스템이 적용될 수 있습니다. · 교대 근무: 대형 매장의 경우, 교대 근무 시스템을 운영하여 오전 근무와 오후 근무로 나누어 직원의 피로를 분산시킬 수 있습니다. · 시간제 근무: 일부 직원들은 시간제 근무를 통해 풀타임 또는 파트타임으로 근무할 수 있으며, 이에 따라 근무 시간이 유연하게 조절됩니다. 2. 스케줄: · 스케줄 작성: 매장 관리자 또는 인사팀은 직원들의 스케줄을 작성합니다. 이는 주간 또는 월간 스케줄로 작성될 수 있으며, 직원들에게 미리 통보됩니다.\n', metadata={'result_id': '44f4a173-5117-41b0-9803-88227d128577-3bbc5e16-09d7-4a8e-95bb-6a69c157dfde', 'document_id': 's3://kendra-for-workshop-d44bb6c0-767f-11ee-9d7b-0ea1c2170c63/근무 시간과 스케줄.docx', 'source': 'https://kendra-for-workshop-d44bb6c0-767f-11ee-9d7b-0ea1c2170c63.s3.amazonaws.com/%E1%84%80%E1%85%B3%E1%86%AB%E1%84%86%E1%85%AE%20%E1%84%89%E1%85%B5%E1%84%80%E1%85%A1%E1%86%AB%E1%84%80%E1%85%AA%20%E1%84%89%E1%85%B3%E1%84%8F%E1%85%A6%E1%84%8C%E1%85%AE%E1%86%AF.docx', '

In [None]:
%%writefile kendra_claude.py

from langchain.retrievers import AmazonKendraRetriever
from langchain.prompts import PromptTemplate
import sys
import os

import boto3
from langchain.chains import RetrievalQA
from langchain.llms.bedrock import Bedrock

def build_chain():

  session = boto3.Session(
      profile_name=os.environ.get("AWS_PROFILE")
  ) 
  boto3_bedrock = session.client(
    service_name='bedrock-runtime', 
    region_name=os.environ.get("AWS_DEFAULT_REGION"),
    endpoint_url=os.environ.get("BEDROCK_ENDPOINT_URL")
  ) 
    
  region = os.environ["AWS_REGION"]
  kendra_index_id = "<YOUR_KENDRA_INDEX>" # Example: 65702b79-bbae-4c93-b45b-9702f17fb994
  kendra_index_id = "458952bb-3b13-4dc4-9321-e625b077bab9"

  # llm = Anthropic(temperature=0, anthropic_api_key=ANTHROPIC_API_KEY, max_tokens_to_sample = 512)
  llm = Bedrock(model_id="anthropic.claude-v2", client=boto3_bedrock, model_kwargs={'max_tokens_to_sample':1000})
  
  retriever = AmazonKendraRetriever(
    index_id=kendra_index_id,
    region_name=os.environ.get("AWS_DEFAULT_REGION", None),
    top_k=3,
    attribute_filter = {
        "EqualsTo": {      
            "Key": "_language_code",
            "Value": {
                "StringValue": "ko"
            }
        }
    }
  )
  # prompt_template = """Human: Use the following pieces of context to provide a concise answer to the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
  prompt_template = """Human: Use the following pieces of context to provide a concise answer to the question at the end. If the answer is not in the context, just say "시스템에 관련 내용을 찾을 수 없습니다.", don't try to make up an answer.

  {context}

  Question: {question}
  Assistant:"""

  PROMPT = PromptTemplate(
      template=prompt_template, input_variables=["context", "question"]
  )


  qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
  )

  return qa

def run_chain(chain, prompt: str):
  return chain({"query": prompt})

In [None]:
%%writefile app.py

import streamlit as st
import sys

import kendra_claude as claude

USER_ICON = "images/user-icon.png"
AI_ICON = "images/ai-icon.png"
MAX_HISTORY_LENGTH = 5

if 'llm_chain' not in st.session_state:
    st.session_state['llm_app'] = claude
    st.session_state['llm_chain'] = claude.build_chain()

if 'chat_history' not in st.session_state:
    st.session_state['chat_history'] = []
    
if "chats" not in st.session_state:
    st.session_state.chats = [
        {
            'id': 0,
            'question': '',
            'answer': ''
        }
    ]

if "questions" not in st.session_state:
    st.session_state.questions = []

if "answers" not in st.session_state:
    st.session_state.answers = []

if "input" not in st.session_state:
    st.session_state.input = ""


st.markdown("""
        <style>
               .block-container {
                    padding-top: 32px;
                    padding-bottom: 32px;
                    padding-left: 0;
                    padding-right: 0;
                }
                .element-container img {
                    background-color: #000000;
                }

                .main-header {
                    font-size: 24px;
                }
        </style>
        """, unsafe_allow_html=True)

def write_logo():
    col1, col2, col3 = st.columns([5, 1, 5])
    with col2:
        st.image(AI_ICON, use_column_width='always') 


def write_top_bar():
    col1, col2, col3 = st.columns([1,10,2])
    with col1:
        st.image(AI_ICON, use_column_width='always')
    with col2:
        header = f"Amazon Bedrock이 제공하는 AI 서비스!"
        st.write(f"<h3 class='main-header'>{header}</h3>", unsafe_allow_html=True)
    with col3:
        clear = st.button("Clear Chat")
    return clear

clear = write_top_bar()

if clear:
    st.session_state.questions = []
    st.session_state.answers = []
    st.session_state.input = ""
    st.session_state["chat_history"] = []

def handle_input():
    input = st.session_state.input
    question_with_id = {
        'question': input,
        'id': len(st.session_state.questions)
    }
    st.session_state.questions.append(question_with_id)

    chat_history = st.session_state["chat_history"]
    if len(chat_history) == MAX_HISTORY_LENGTH:
        chat_history = chat_history[:-1]

    llm_chain = st.session_state['llm_chain']
    chain = st.session_state['llm_app']
    result = chain.run_chain(llm_chain, input)
    answer = result['result']
    chat_history.append((input, answer))
    
    document_list = []
    if 'source_documents' in result:
        for d in result['source_documents']:
            if not (d.metadata['source'] in document_list):
                document_list.append((d.metadata['source']))

    st.session_state.answers.append({
        'answer': result,
        'sources': document_list,
        'id': len(st.session_state.questions)
    })
    st.session_state.input = ""

def write_user_message(md):
    col1, col2 = st.columns([1,12])
    
    with col1:
        st.image(USER_ICON, use_column_width='always')
    with col2:
        st.warning(md['question'])


def render_result(result):
    answer, sources = st.tabs(['Answer', 'Sources'])
    with answer:
        render_answer(result['answer'])
    with sources:
        if 'source_documents' in result:
            render_sources(result['source_documents'])
        else:
            render_sources([])

def render_answer(answer):
    col1, col2 = st.columns([1,12])
    with col1:
        st.image(AI_ICON, use_column_width='always')
    with col2:
        st.info(answer['result'])

def render_sources(sources):
    col1, col2 = st.columns([1,12])
    with col2:
        with st.expander("Sources"):
            for s in sources:
                st.write(s)

    
#Each answer will have context of the question asked in order to associate the provided feedback with the respective question
def write_chat_message(md, q):
    chat = st.container()
    with chat:
        render_answer(md['answer'])
        render_sources(md['sources'])
    
        
with st.container():
  for (q, a) in zip(st.session_state.questions, st.session_state.answers):
    write_user_message(q)
    write_chat_message(a, q)

st.markdown('---')
input = st.text_input("질문을 해주세요!", key="input", on_change=handle_input)

In [None]:
%%writefile requirements.txt

boto3==1.28.64
streamlit==1.20.0
langchain

In [None]:
%%writefile setup.sh

pip install --no-cache-dir -r requirements.txt
sudo yum install -y iproute
sudo yum install -y jq
sudo yum install -y lsof

In [None]:
%%writefile run.sh

#!/bin/sh
CURRENTDATE=`date +"%Y-%m-%d %T"`
RED='\033[0;31m'
CYAN='\033[1;36m'
GREEN='\033[1;32m'
NC='\033[0m'
S3_PATH=$1

# Run the Streamlit app and save the output to "temp.txt"
streamlit run app.py > temp.txt & 

# Read the text file using cat
echo "Getting the URL to view your Streamlit app in the browser"

# Extract the last four digits of the port number from the Network URL
sleep 5
PORT=$(grep "Network URL" temp.txt | awk -F':' '{print $NF}' | awk '{print $1}' | tail -c 5)
echo -e "${CYAN}${CURRENTDATE}: [INFO]:${NC} Port Number ${PORT}" 



# Get Studio domain information
DOMAIN_ID=$(jq .DomainId /opt/ml/metadata/resource-metadata.json || exit 1)
RESOURCE_NAME=$(jq .ResourceName /opt/ml/metadata/resource-metadata.json || exit 1)
RESOURCE_ARN=$(jq .ResourceArn /opt/ml/metadata/resource-metadata.json || exit 1)

# Remove quotes from string
DOMAIN_ID=`sed -e 's/^"//' -e 's/"$//' <<< "$DOMAIN_ID"`
RESOURCE_NAME=`sed -e 's/^"//' -e 's/"$//' <<< "$RESOURCE_NAME"`
RESOURCE_ARN=`sed -e 's/^"//' -e 's/"$//' <<< "$RESOURCE_ARN"`
RESOURCE_ARN_ARRAY=($(echo "$RESOURCE_ARN" | tr ':' '\n'))

# Get Studio domain region
REGION=$(echo "${RESOURCE_ARN_ARRAY[3]}")

# Check if it's Collaborative Space
SPACE_NAME=$(jq .SpaceName /opt/ml/metadata/resource-metadata.json || exit 1)

# if it's not a collaborative space 
if [ -z "$SPACE_NAME" ] || [ $SPACE_NAME == "null" ] ;
then
    # If it's a user-profile access
    echo -e "${CYAN}${CURRENTDATE}: [INFO]:${NC} Domain Id ${DOMAIN_ID}"
    STUDIO_URL="https://${DOMAIN_ID}.studio.${REGION}.sagemaker.aws"
    
# It is a collaborative space
else

    SEM=true
    SPACE_ID=

    # Check if Space Id was previously configured
    if [ -f /tmp/space-metadata.json ]; then
        SAVED_SPACE_ID=$(jq .SpaceId /tmp/space-metadata.json || exit 1)
        SAVED_SPACE_ID=`sed -e 's/^"//' -e 's/"$//' <<< "$SAVED_SPACE_ID"`

        if [ -z "$SAVED_SPACE_ID" ] || [ $SAVED_SPACE_ID == "null" ]; then
            ASK_INPUT=true
        else
            ASK_INPUT=false
        fi
    else
        ASK_INPUT=true
    fi

    # If Space Id is not available, ask for it
    while [[ $SPACE_ID = "" ]] ; do
        # If Space Id already configured, skeep the ask
        if [ "$ASK_INPUT" = true ]; then
            echo -e "${CYAN}${CURRENTDATE}: [INFO]:${NC} Please insert the Space Id from your url. e.g. https://${GREEN}<SPACE_ID>${NC}.studio.${REGION}.sagemaker.aws/jupyter/default/lab"
            read SPACE_ID
            SEM=true
        else
            SPACE_ID=$SAVED_SPACE_ID
        fi

        if ! [ -z "$SPACE_ID" ] && ! [ $SPACE_ID == "null" ] ;
        then
            while $SEM; do
                echo "${SPACE_ID}"
                read -p "Should this be used as Space Id? (y/N) " yn
                case $yn in
                    [Yy]* )
                        echo -e "${CYAN}${CURRENTDATE}: [INFO]:${NC} Domain Id ${DOMAIN_ID}"
                        echo -e "${CYAN}${CURRENTDATE}: [INFO]:${NC} Space Id ${SPACE_ID}"

                        jq -n --arg space_id $SPACE_ID '{"SpaceId":$space_id}' > /tmp/space-metadata.json

                        STUDIO_URL="https://${SPACE_ID}.studio.${REGION}.sagemaker.aws"

                        SEM=false
                        ;;
                    [Nn]* ) 
                        SPACE_ID=
                        ASK_INPUT=true
                        SEM=false
                        ;;
                    * ) echo "Please answer yes or no.";;
                esac
            done
        fi
    done
fi

echo -e "${CYAN}${CURRENTDATE}: [INFO]:${NC} Studio Url ${STUDIO_URL}"


link="${STUDIO_URL}/jupyter/${RESOURCE_NAME}/proxy/${PORT}/"

echo -e "${CYAN}${CURRENTDATE}: [INFO]:${NC} Starting Streamlit App"
echo -e "${CYAN}${CURRENTDATE}: [INFO]: ${GREEN}${link}${NC}"

exit 0
fi