# RAG
* Container: `Data Science 3.0` (studio, python 3.10), `conda_python3` (notebook)

## 0. Install packages and Setup env

In [1]:
import sys

In [2]:
%load_ext autoreload
%autoreload 2
sys.path.append('../utils') # src 폴더 경로 설정

In [4]:
install_needed = True  # should only be True once

In [5]:
import sys
import IPython

if install_needed:
    print("installing deps and restarting kernel")
    !{sys.executable} -m pip install -U pip
    !{sys.executable} -m pip install -U sagemaker
    !{sys.executable} -m pip install -U langchain
    !{sys.executable} -m pip install -U faiss-cpu
    !{sys.executable} -m pip install -U opensearch-py
    
    IPython.Application.instance().kernel.do_shutdown(True)

installing deps and restarting kernel
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting pip
  Downloading pip-23.2-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m77.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-23.2
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting sagemaker
  Downloading sagemaker-2.173.0.tar.gz (854 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m854.4/854.4 kB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: sagemaker
  Building wheel for sagemaker (setup.py) ... [?25ldone
[?25h  Created wheel for sagemaker: filename

### 1. SageMaker Endpoint Wrapper

### 1.1. SageMaker LLM_TEXT Wrapper

In [3]:
import json
import boto3
import numpy as np
from inference_utils import Prompter
from typing import Any, Dict, List, Optional
from langchain.embeddings import SagemakerEndpointEmbeddings
from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint
from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler

In [4]:
prompter = Prompter("kullm")
params = {
      'do_sample': False,
      'max_new_tokens': 128,
      'temperature': 1.0,
      'top_k': 0,
      'top_p': 0.9,
      'return_full_text': False,
      'repetition_penalty': 1.1,
      'presence_penalty': None,
      'eos_token_id': 2
}

class KullmContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs={}) -> bytes:
        '''
        입력 데이터 전처리 후에 리턴
        '''
        context, question = prompt.split("||SPEPERATOR||") 
        prompt = prompter.generate_prompt(question, context)

        print ("prompt", prompt)
        payload = {
            'prompt': [prompt],
            'params': model_kwargs
        }
                           
        input_str = json.dumps(payload)
        
        return input_str.encode('utf-8')
    

    def transform_output(self, output: bytes) -> str:
        
        response_json = json.loads(output.read().decode("utf-8"))              
        generated_text = response_json[0][0]["generated_text"]
        
        return generated_text    

In [5]:
aws_region = boto3.Session().region_name
LLMTextContentHandler = KullmContentHandler()
endpoint_name_text = "Kullm-polyglot-12-8b-v2-2023-07-10-05-48-59"
seperator = "||SPEPERATOR||"

In [6]:
llm_text = SagemakerEndpoint(
    endpoint_name=endpoint_name_text,
    region_name=aws_region,
    model_kwargs=params,    
    content_handler=LLMTextContentHandler,
)

### 1.2. SageMaker LLM_EMB Wrapper

In [7]:
class SagemakerEndpointEmbeddingsJumpStart(SagemakerEndpointEmbeddings):
    def embed_documents(self, texts: List[str], chunk_size: int=1) -> List[List[float]]:
        """Compute doc embeddings using a SageMaker Inference Endpoint.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size defines how many input texts will
                be grouped together as request. If None, will use the
                chunk size specified by the class.

        Returns:
            List of embeddings, one for each text.
        """
        results = []
        _chunk_size = len(texts) if chunk_size > len(texts) else chunk_size
        
        print("text size: ", len(texts))
        print("_chunk_size: ", _chunk_size)

        for i in range(0, len(texts), _chunk_size):
            
            #print (i, texts[i : i + _chunk_size])
            response = self._embedding_func(texts[i : i + _chunk_size])
            #print (i, response, len(response[0].shape))
            
            results.extend(response)
        return results

In [8]:
class KoSimCSERobertaContentHandler(EmbeddingsContentHandler):
    
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs={}) -> bytes:
        
        input_str = json.dumps({"inputs": prompt, **model_kwargs})
        
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        
        response_json = json.loads(output.read().decode("utf-8"))
        ndim = np.array(response_json).ndim    
        
        if ndim == 4:
            # Original shape (1, 1, n, 768)
            emb = response_json[0][0][0]
            emb = np.expand_dims(emb, axis=0).tolist()
        elif ndim == 2:
            # Original shape (n, 1)
            emb = []
            for ele in response_json:
                e = ele[0][0]
                emb.append(e)
        else:
            print(f"Other # of dimension: {ndim}")
            emb = None
        return emb

In [9]:
LLMEmbHandler = KoSimCSERobertaContentHandler()
endpoint_name_emb = "KoSimCSE-roberta-2023-07-10-05-26-01"

In [10]:
llm_emb = SagemakerEndpointEmbeddingsJumpStart(
    endpoint_name=endpoint_name_emb,
    region_name=aws_region,
    content_handler=LLMEmbHandler,
)

**Now, we can build an QA application. <span style="color:red">LangChain makes it extremly simple with following few lines of code</span>.**

## 2. Vector Store
FAISS Vector Store
- https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/faiss <BR>

OpenSearch
- https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/opensearch

### 2.1. Create OpenSearch domain
* Follow below
    - https://docs.aws.amazon.com/ko_kr/opensearch-service/latest/developerguide/gsgcreate-domain.html
* Add policy (using SDK)
    - AmazonOpenSearchServiceFullAccess

**step 1. opensearch console로 이동** <BR>
![nn](../../Images/s1.png)

**step 2. Navigator에서 Domain 이동 후 Create domain 선택** <BR>
![nn](../../Images/s2.png)

**step 3. domain config 셋팅** <BR>
 - Domain name
 - Domain creation Method: Easy create
 - Engine options: OpenSearch_2.7
 - Network: Public access
 - Master user: Create master user
    - Master username, Master password and Confirm master password 입력
 - 오른쪽 아래 주황색 create 선택

![nn](../../Images/s3.png)

**step 4.Domain enapoint 복사** <BR>
![nn](../../Images/s4_.png)

* create_domain: https://boto3.amazonaws.com/v1/documentation/api/1.18.51/reference/services/opensearch.html#OpenSearchService.Client.create_domain
*     
**It takes about 15 mins**

In [36]:
opensearch_domain_endpoint = "https://search-os-rag-7xoyqbyedqulapmnkt6bie43ne.us-east-1.es.amazonaws.com"

In [37]:
http_auth = ("os-rag", "Q!w2e3r4") # Master username, Master password

### 2.2. load context files and build indexer
We are now ready to create scripts which will read data from the local directory, use langchain to create embeddings and then upload the embeddings into OpenSearch.

In [13]:
import json
import boto3
from langchain.document_loaders.csv_loader import CSVLoader

In [14]:
loader = CSVLoader(
    file_path="../dataset/quenstion_answer_ko.csv",
    source_column="Source",
    encoding="utf-8"
)
context_documents = loader.load()

In [15]:
len(context_documents), context_documents[5]

(8482,
 Document(page_content='Information: 이마트 창동점의 주차정보, 주차요금, 무료주차 정보는 다음과 같습니다. \n※ 임대매장(스타벅스, 식당 등)이용 무료주차    해당매장에서 무료주차 2시간 적용※ 계산대 출력영수증, 모바일영수증에     하단 바코드를 사용하여    신용카드 전용 사전무인정산기 정산후 출차    -케이엠파크:080-330-3600※ 전기차 충전소 위치: 옥외 주차장 8F     차지비,완속 3대,24:00~24:00, 고객센터 1600-4047 매장 전체 주차가능대수: 159대<sep>이마트 창동점\nSource: 이마트 창동점', metadata={'source': '이마트 창동점', 'row': 5}))

### 2.3. Insert data to OpenSearch

This script puts everything together, it divides the documents into chunks, then uses the langchain package to create embeddings (through `SagemakerEndpointEmbeddingsJumpStart`) and then ingests the data into OpenSearch using `OpenSearchVectorSearch`. 

To keep things simple the chunks size is set to a fixed length of 500 tokens, with an overlap of 30 tokens. The langchain `OpenSearchVectorSearch` provides a wrapper over the `opensearch-py` package. It uses the `/_bulk` API endpoint for ingesting multiple records in a single PUT request.

In [17]:
import time
import pprint
import logging
import sagemaker
from langchain.vectorstores import OpenSearchVectorSearch
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter

In [18]:
#pp = pprint.PrettyPrinter(indent=4)

In [19]:
# global constants
logger = logging.getLogger()
logging.basicConfig(format='%(asctime)s,%(module)s,%(processName)s,%(levelname)s,%(message)s', level=logging.INFO, stream=sys.stderr)

role = sagemaker.get_execution_role()

2023-07-18 00:43:32,104,credentials,MainProcess,INFO,Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


* For OpenSearch

In [21]:
index_name = "genai-rag-index"

In [22]:
%%time
logger.info('Loading documents ...')
docs = loader.load()

# # add a custom metadata field, such as timestamp
for doc in docs:
    doc.metadata['timestamp'] = time.time()
    doc.metadata['embeddings_model'] = endpoint_name_emb

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
documents = text_splitter.split_documents(docs)

# by default langchain would create a k-NN index and the embeddings would be ingested as a k-NN vector type
docsearch = OpenSearchVectorSearch.from_documents(
    index_name=index_name,
    documents=documents,
    embedding=llm_emb,
    opensearch_url=opensearch_domain_endpoint,
    http_auth=http_auth,
    bulk_size=10000,
    timeout=60
)

2023-07-18 00:43:51,675,<timed exec>,MainProcess,INFO,Loading documents ...


text size:  8524
_chunk_size:  1



KeyboardInterrupt



## 5. QnA

In [30]:
from functools import lru_cache
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain

### 5.1. Query and Response

In [38]:
import copy
import functools
import concurrent.futures
from itertools import chain as ch
from operator import itemgetter
from langchain.schema import Document
from opensearchpy import OpenSearch, RequestsHttpConnection

In [39]:
prompt_template = ''.join(["{context}", seperator, "{question}"])
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain = load_qa_chain(llm=llm_text, chain_type="stuff", prompt=PROMPT, verbose=True)

In [40]:
vectro_db = OpenSearchVectorSearch(
    index_name=index_name,
    opensearch_url=opensearch_domain_endpoint,
    embedding_function=llm_emb,
    http_auth=http_auth,
    engine="faiss",
    space_type="l2"
)

In [51]:
def get_client(server_url: str) -> OpenSearch:
    os_instance = OpenSearch(
        server_url,
        http_auth=http_auth
    )
    return os_instance

os_client = get_client(opensearch_domain_endpoint)

In [52]:
os_client

<OpenSearch([{'host': 'search-os-rag-7xoyqbyedqulapmnkt6bie43ne.us-east-1.es.amazonaws.com', 'port': 443, 'use_ssl': True}])>

In [53]:
def get_similiar_docs(query, k=10, fetch_k=300, score=False, store="", search_type="approximate_search"):

    query = f'{store}, {query}'
    store = "*" + store.replace("이마트", "").strip() + "*"
    print (query, search_type, store)

    if score:
        similar_docs = vectro_db.similarity_search_with_score(
            query,
            k=k,
            search_type=search_type, # approximate_search, script_scoring, painless_scripting
            space_type="cosinesimil",     #"l2", "l1", "linf", "cosinesimil", "innerproduct", "hammingbit";
            pre_filter={"bool": {"filter": {"wildcard": {"text": store}}}}, #  only for script_scoring
            boolean_filter={"bool": {"filter": {"wildcard": {"text": store}}}} # term, wildcard, Exists, only for approximate_search
        )
    else:
        similar_docs = vectro_db.similarity_search(
            query,
            k=k,
            search_type=search_type, # approximate_search, script_scoring, painless_scripting
            space_type="cosinesimil",     #"l2", "l1", "linf", "cosinesimil", "innerproduct", "hammingbit";
            pre_filter={"bool": {"filter": {"wildcard": {"text": store}}}}, #  only for script_scoring
            boolean_filter={"bool": {"filter": {"wildcard": {"text": store}}}} # term, wildcard, Exists, only for approximate_search
            
        )
    similar_docs_copy = copy.deepcopy(similar_docs)
    
    if len(similar_docs_copy) != 0:
        max_score = max(similar_docs_copy, key=itemgetter(1))[1]
        similar_docs_copy = [(doc[0], doc[1]/max_score) for doc in similar_docs_copy]
    
    return similar_docs_copy


def get_similiar_docs_with_keywords(query, index_name, k=10):
    
    def normalize_search_formula(score, max_score):
        return score / max_score

    def normalize_search_results(search_results):
        hits = (search_results["hits"]["hits"])
        max_score = search_results["hits"]["max_score"]
        for hit in hits:
            hit["_score"] = normalize_search_formula(hit["_score"], max_score)
        search_results["hits"]["max_score"] = hits[0]["_score"]
        search_results["hits"]["hits"] = hits
        return search_results
    
    search_query = {
        "size": k,
        "query": {
            "match": {
                "text": query
            }
        },
        "_source": ["text"],
    }    
    search_results = os_client.search(body=search_query, index=index_name)
    
    results = []
    if search_results["hits"]["hits"]:
        search_results = normalize_search_results(search_results)
        for res in search_results["hits"]["hits"]:
            source = res["_source"]["text"].rsplit("\n", 2)[-1].split("Source: ")[-1]
            doc = Document(
                page_content=res["_source"]["text"],
                metadata={'source': source}
            )
            results.append((doc, res["_score"]))

    return results

def interpolate_results(semantic, keyword, k):

    semantic_set = set([doc.page_content for doc, score in semantic])
    keyword_set = set([doc.page_content for doc, score in keyword])
    common = semantic_set.intersection(keyword_set)

    results = []
    for doc, score in list(ch(semantic, keyword)):
        if doc.page_content in common: results.append((doc, 2))
        else: results.append((doc, score))
        
    results = [doc for doc, score in results[:k]]
    
    return results
        
def get_answer(query, store="", k=10):
    
    similar_docs_semantic = get_similiar_docs(query, k=k, score=True, store=store, search_type="approximate_search")
    similar_docs_keyword = get_similiar_docs_with_keywords(query, k=k, index_name=index_name)    
    similar_docs = interpolate_results(similar_docs_semantic, similar_docs_keyword, k=k)
    
    answer = chain.run(input_documents=similar_docs, question=query)
    
    return answer

In [54]:
%%time
question = "이마트 동탄점 무료주차 하려면 얼마나 사야해?"
response = get_answer(question, store="이마트 동탄점", k=5)

2023-07-18 00:48:42,500,base,MainProcess,INFO,POST https://search-os-rag-7xoyqbyedqulapmnkt6bie43ne.us-east-1.es.amazonaws.com:443/genai-rag-index/_search [status:200 request:0.051s]


이마트 동탄점, 이마트 동탄점 무료주차 하려면 얼마나 사야해? approximate_search *동탄점*
이마트 동탄점 무료주차 하려면 얼마나 사야해?


2023-07-18 00:48:42,587,base,MainProcess,INFO,POST https://search-os-rag-7xoyqbyedqulapmnkt6bie43ne.us-east-1.es.amazonaws.com:443/genai-rag-index/_search [status:200 request:0.084s]




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mInformation: 이마트 동탄점의 주차정보, 주차요금, 무료주차 정보는 다음과 같습니다. 유료주차 이용안내(사전/무인정산기)시행일자 : 2019년 7월 1일(월요일)[일반요금]1. 30분 회차 - 무료 ※30분 초과 시 회차시간 포함 요금징수2. 기본 10분 - 1,000원3. 운영시간 - 10:00 ~ 23:00[고객할인]1. 1만원이상 - 2시간2. 3만원 이상 - 3시간3. 5만원 이상 - 4시간※최대 4시간을 초과 할 수 없습니다.(영수증 중복적용 가능)<sep>이마트 동탄점
Source: 이마트 동탄점

Information: 이마트 동탄점의 주차정보, 주차요금, 무료주차 정보는 다음과 같습니다. 
[유의사항]1. 이마트 계산대에서 발행된 당일 영수증만 사용가능 합니다.2. 그 외 영수증은 구매하신 매장에서 주차할인을 적용 받으시길 바랍니다.*사전 정산 시 구매하신 영수증을 모두 적용해 주세요!*사전 정산 후 30분 이내 출차하셔야 됩니다.[사전무인 정산기 위치안내]2층 - EV 앞 1대 / 문화센터 앞 1대4층 - 수선실 앞 2대 / EV 앞 1대5층 - EV 앞 1대6층 - 무빙워크 상행 1대 / EV 앞 1대7층 - EV 앞 1대정기권 문의 1588-5783 매장 전체 주차가능대수: 828대<sep>이마트 동탄점
Source: 이마트 동탄점

Information: 네, 이마트 동탄점에는 더페이스샵가 있고, 전화번호는 031-613-3672, 운영시간은 10:00~22:00 입니다.<sep>이마트 동탄점
Source: 이마트 동탄점

Information: 네, 이마트 동탄점에는 전기차 충전소(차지비/완속/2대)가 있고, 전화번호는 1600-4047-, 운영시간은 10:00~21:00 입니다.<sep>이

In [55]:
print (f'question: {question}')
print (f'response: {response}')

question: 이마트 동탄점 무료주차 하려면 얼마나 사야해?
response: 이마트 동탄점 무료주차를 위해서는 최소 30분 회차 또는 10분 회차를 구매해야 합니다. 고객 할인 혜택으로 최대 4시간까지 무료주차가 가능합니다. 사전 무인 정산기는 2층, 4층, 5층, 6층, 7층에 위치하고 있으며, 정기권 문의는 1588-5782로 가능합니다.


## 6. Cleanup

### 6.1. delete opensearch domain

In [41]:
client = boto3.client('opensearch')

In [42]:
response = client.delete_domain(
    DomainName=opnsearch_config["domain"]
)