# 스키마 정의 후 Vector Store 생성 

이 노트북은 오픈 서치 인덱스를 정의하고, 전체 문서를 청킹하여 인덱스를 생성을 하는 노트북 입니다. 이후에 렉시컬, 시멘틱, 하이브리드 검색을 통하여 인덱스의 동작을 확인 합니다. 
이후에 전체 문서의 일부를 검증 인덱스로 생성하여, 검증용으로 사용을 합니다.

---

## [중요] 사전 실행 노트북
이 노트북은 아래 두개의 셋업 노트북이 먼저 실행이 되어야 합니다.
- (1) Setup 노트북
    - 경로는 aws-ai-ml-workshop-kr/genai/aws-gen-ai-kr/00_setup/setup.ipynb 와 같습니다.
    -  [Setup Notebook](https://github.com/aws-samples/aws-ai-ml-workshop-kr/blob/master/genai/aws-gen-ai-kr/00_setup/setup.ipynb)
- (2) Amazon OpenSearch 설치 노트북    
    - 경로는 aws-ai-ml-workshop-kr/genai/aws-gen-ai-kr/00_setup/setup_opensearch.ipynb 와 같습니다.
    - [Setup OpenSearch](https://github.com/aws-samples/aws-ai-ml-workshop-kr/blob/master/genai/aws-gen-ai-kr/00_setup/setup_opensearch.ipynb)

# 1. Bedrock Client 생성

In [3]:
%load_ext autoreload
%autoreload 2

import sys, os

def add_python_path(module_path):
    if os.path.abspath(module_path) not in sys.path:
        sys.path.append(os.path.abspath(module_path))
        print(f"python path: {os.path.abspath(module_path)} is added")
    else:
        print(f"python path: {os.path.abspath(module_path)} already exists")
    print("sys.path: ", sys.path)

module_path = ".."
add_python_path(module_path)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
python path: /home/sagemaker-user/aws-ai-ml-workshop-kr/genai/aws-gen-ai-kr/20_applications/02_qa_chatbot already exists
sys.path:  ['/home/sagemaker-user/aws-ai-ml-workshop-kr/genai/aws-gen-ai-kr/20_applications/02_qa_chatbot/01_preprocess_docs', '/opt/conda/lib/python310.zip', '/opt/conda/lib/python3.10', '/opt/conda/lib/python3.10/lib-dynload', '', '/opt/conda/lib/python3.10/site-packages', '/home/sagemaker-user/aws-ai-ml-workshop-kr/genai/aws-gen-ai-kr/20_applications/02_qa_chatbot']


In [4]:
import json
import boto3
from pprint import pprint
from termcolor import colored
from local_utils import bedrock, print_ww
from local_utils.bedrock import bedrock_info

# ---- ⚠️ Un-comment and edit the below lines as needed for your AWS setup ⚠️ ----

# os.environ["AWS_DEFAULT_REGION"] = "<REGION_NAME>"  # E.g. "us-east-1"
# os.environ["AWS_PROFILE"] = "<YOUR_PROFILE>"
# os.environ["BEDROCK_ASSUME_ROLE"] = "<YOUR_ROLE_ARN>"  # E.g. "arn:aws:..."
# os.environ["BEDROCK_ENDPOINT_URL"] = "<YOUR_ENDPOINT_URL>"  # E.g. "https://..."


boto3_bedrock = bedrock.get_bedrock_client(
    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
    endpoint_url=os.environ.get("BEDROCK_ENDPOINT_URL", None),
    region=os.environ.get("AWS_DEFAULT_REGION", None),
)

print(colored("\n== FM lists ==", "green"))
pprint(bedrock_info.get_list_fm_models())

Create new client
  Using region: us-east-1
  Using profile: None
boto3 Bedrock client successfully created!
bedrock-runtime(https://bedrock-runtime.us-east-1.amazonaws.com)
[32m
== FM lists ==[0m
{'Claude-Instant-V1': 'anthropic.claude-instant-v1',
 'Claude-V1': 'anthropic.claude-v1',
 'Claude-V2': 'anthropic.claude-v2',
 'Claude-V2-1': 'anthropic.claude-v2:1',
 'Cohere-Embeddings-En': 'cohere.embed-english-v3',
 'Cohere-Embeddings-Multilingual': 'cohere.embed-multilingual-v3',
 'Command': 'cohere.command-text-v14',
 'Command-Light': 'cohere.command-light-text-v14',
 'Jurassic-2-Mid': 'ai21.j2-mid-v1',
 'Jurassic-2-Ultra': 'ai21.j2-ultra-v1',
 'Llama2-13b-Chat': 'meta.llama2-13b-chat-v1',
 'Titan-Embeddings-G1': 'amazon.titan-embed-text-v1',
 'Titan-Text-G1': 'amazon.titan-text-express-v1',
 'Titan-Text-G1-Light': 'amazon.titan-text-lite-v1'}


# 2. Embedding 모델 로딩

## Embedding Model 선택

In [5]:
Use_Titan_Embedding = True
Use_Cohere_English_Embedding = False

## Embedding Model 로딩

In [6]:
# We will be using the Titan Embeddings Model to generate our Embeddings.
from langchain.embeddings import BedrockEmbeddings
from langchain.llms.bedrock import Bedrock

if Use_Titan_Embedding:
    llm_emb = BedrockEmbeddings(client=boto3_bedrock, model_id = "amazon.titan-embed-text-v1")
    dimension = 1536
elif Use_Cohere_English_Embedding:
    llm_emb = BedrockEmbeddings(client=boto3_bedrock, model_id = "cohere.embed-english-v3")    
    dimension = 1024
else:
    lim_emb = None

llm_emb

BedrockEmbeddings(client=<botocore.client.BedrockRuntime object at 0x7fe1500a6620>, region_name=None, credentials_profile_name=None, model_id='amazon.titan-embed-text-v1', model_kwargs=None, endpoint_url=None, normalize=False)

# 3. Load all Json files

In [7]:
from local_utils.proc_docs import get_load_json, show_doc_json

In [8]:
import glob

# Specify the directory and file pattern for .txt files
folder_path = 'data/poc/preprocessed_json/all_processed_data.json'

# List all .txt files in the specified folder
json_files = glob.glob(folder_path)
# json_files = ['data/poc/customer_EFOTA.json']

# Load each item per json file and append to a list
doc_json_list = []
for file_path in json_files:
    doc_json = get_load_json(file_path)
    doc_json_list.append(doc_json)

print("all json files: ", len(doc_json_list))    
# Flatten the list of lists into a single list
all_docs = []
for item in doc_json_list:
        all_docs.extend(item)
        
print("all items: ", len(all_docs))

.[]
all json files:  1
all items:  1732


# 4. Chunking JSON Doc 

## Chunk Size and Chunk Overlap Size 결정

In [9]:
chunk_size = 1024
chunk_overlap = 256

## Chunking

In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, SpacyTextSplitter


text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = chunk_size,
    chunk_overlap  = chunk_overlap,
    separators=["\n\n", "\n", ".", " ", ""],
    length_function = len,
)

chunk_docs = text_splitter.split_documents(all_docs)
print(f"Number of chunk_docs after split and chunking= {len(chunk_docs)}")

Number of chunk_docs after split and chunking= 6825


In [11]:
chunk_docs[0:1]

[Document(page_content='How-to videos. Contains videos on how to use Knox E-FOTA. This section contains videos on how to use Knox E-FOTA. Getting started with Knox E-FOTA This video walks you through the Knox E-FOTA console and demonstrates how you can register a reseller, approve a device, create a campaign, assign a campaign, and monitor device status. Creating a campaign on Knox E-FOTA The following video provides in-depth information on how to create and apply a Knox E-FOTA campaign to your Samsung devices. Connecting Knox E-FOTA to VMware Workspace ONE The following video describes the simple steps of connecting Knox E-FOTA with VMware Workspace ONE, while adding device groups from Workspace ONE.', metadata={'source': 'all_processed_data.json', 'seq_num': 1, 'title': 'How-to videos', 'url': 'https://docs.samsungknox.com/admin/efota-one/how-to-videos', 'project': 'EFOTA', 'last_updated': '2023-09-27'})]

# 5. Index 생성

## Index 이름 결정

In [12]:
index_name = "genai-poc-knox-cohere-en-1024c-256o-v20"



## Index 스키마 정의

In [13]:
index_body = {
    'settings': {
        'index': {
            'knn': True,
            'knn.space_type': 'cosinesimil'  # Example space type
        }
    },
    'mappings': {
        'properties': {
            'metadata': {
                'properties': {
                               'source' : {'type': 'keyword'},
                               'last_updated': {'type': 'date'},
                               'project': {'type': 'keyword'},
                               'seq_num': {'type': 'long'},
                               'title': {'type': 'text'},  # For full-text search
                               'url': {'type': 'text'},  # For full-text search
                            }
            },            
            'text': {
                'type': 'text'
            },
            'vector_field': {
                'type': 'knn_vector',
                'dimension': f"{dimension}"  # Replace with your vector dimension
            }
        }
    }
}


# 5. LangChain OpenSearch VectorStore 생성 
## 선수 조건


## 오픈 서치 도메인 및 인증 정보 세팅

- [langchain.vectorstores.opensearch_vector_search.OpenSearchVectorSearch](https://api.python.langchain.com/en/latest/vectorstores/langchain.vectorstores.opensearch_vector_search.OpenSearchVectorSearch.html)

#### [중요] 아래에 aws parameter store 에 아래 인증정보가 먼저 입력되어 있어야 합니다.

In [15]:
from local_utils.proc_docs import get_parameter

In [16]:
import boto3
ssm = boto3.client('ssm', 'us-east-1')

opensearch_domain_endpoint = get_parameter(
    boto3_clinet = ssm,
    parameter_name = 'knox_opensearch_domain_endpoint',
)

opensearch_user_id = get_parameter(
    boto3_clinet = ssm,
    parameter_name = 'knox_opensearch_userid',
)

opensearch_user_password = get_parameter(
    boto3_clinet = ssm,
    parameter_name = 'knox_opensearch_password',
)


In [17]:
opensearch_domain_endpoint = opensearch_domain_endpoint
rag_user_name = opensearch_user_id
rag_user_password = opensearch_user_password

http_auth = (rag_user_name, rag_user_password) # Master username, Master password

## OpenSearch Client 생성

In [19]:
from local_utils.opensearch import opensearch_utils

In [20]:
aws_region = os.environ.get("AWS_DEFAULT_REGION", None)

os_client = opensearch_utils.create_aws_opensearch_client(
    aws_region,
    opensearch_domain_endpoint,
    http_auth
)

## 오픈 서치 인덱스 생성 
- 오픈 서치에 해당 인덱스가 존재하면, 삭제 합니다. 

In [22]:
from local_utils.opensearch import opensearch_utils

In [23]:

index_exists = opensearch_utils.check_if_index_exists(
    os_client,
    index_name
)

if index_exists:
    opensearch_utils.delete_index(
        os_client,
        index_name
    )

opensearch_utils.create_index(os_client, index_name, index_body)
index_info = os_client.indices.get(index=index_name)
print("Index is created")
pprint(index_info)

index_name=genai-poc-knox-cohere-en-1024c-256o-v20, exists=False

Creating index:
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'genai-poc-knox-cohere-en-1024c-256o-v20'}
Index is created
{'genai-poc-knox-cohere-en-1024c-256o-v20': {'aliases': {},
                                             'mappings': {'properties': {'metadata': {'properties': {'last_updated': {'type': 'date'},
                                                                                                     'project': {'type': 'keyword'},
                                                                                                     'seq_num': {'type': 'long'},
                                                                                                     'source': {'type': 'keyword'},
                                                                                                     'title': {'type': 'text'},
                                                                               

## 랭체인 인덱스 연결 오브젝트 생성

- [langchain.vectorstores.opensearch_vector_search.OpenSearchVectorSearch](https://api.python.langchain.com/en/latest/vectorstores/langchain.vectorstores.opensearch_vector_search.OpenSearchVectorSearch.html)

In [24]:
from langchain.vectorstores import OpenSearchVectorSearch

In [25]:
vector_db = OpenSearchVectorSearch(
    index_name=index_name,
    opensearch_url=opensearch_domain_endpoint,
    embedding_function=llm_emb,
    http_auth=http_auth, # http_auth
    is_aoss =False,
    engine="faiss",
    space_type="l2",
    bulk_size=100000,
    timeout=60    
)
vector_db

<langchain_community.vectorstores.opensearch_vector_search.OpenSearchVectorSearch at 0x7fe11daff220>

## OpenSearch 에 문서 삽입

In [26]:
%%time

vector_db.add_documents(documents = chunk_docs, 
                        vector_field = "vector_field",
                        bulk_size = 1000000)


CPU times: user 15 s, sys: 771 ms, total: 15.8 s
Wall time: 11min 43s


['fb62d2a5-26b0-41b2-af48-cf2c58b55b66',
 '6754a94f-fd39-46ad-8b54-73459be2787e',
 '0bcd6e74-1d35-48d1-9eb2-33304925c81a',
 '0e9f3dfa-8d0a-4b57-bb58-25c888806a25',
 '118ddf98-a1de-4a5c-b5d9-d5f62120f13d',
 'a7fa9c90-3532-467a-9a34-ab71a4a69cdb',
 'b3095ba2-118d-4f8e-a99a-f2e1e17ee015',
 '3f713262-2f67-4d68-a08b-45d6fa9f0f3a',
 'd3376d9e-a450-4403-80ff-7f6ab21fb5ea',
 '631072f0-e289-4947-a3ad-da9eb5730506',
 'a27e8d53-30c1-4e0e-bba9-411c620bd675',
 '73743e78-8dc7-4546-84c2-fc42e8d4228e',
 '5421c198-3a41-4660-b547-10101b14d129',
 'bc92053c-a985-4016-bedf-a0f434d8773e',
 'e48194e8-d06a-4a34-93d3-34870c97268b',
 'cc16561a-f9ca-4f03-a243-693548a46abb',
 'd2d46f9d-fafa-45f0-a87d-977306a992ff',
 '62167737-603f-4f94-bf93-cff8e31676a4',
 '658f66e3-52df-4e80-9145-bfbf8c887df6',
 'b8aaa89a-62fd-49a4-bb7f-ab1cdc9b2eba',
 '73e231b3-b4ad-4d1b-ae89-40c9a5f40b23',
 '1a16305a-6245-43ef-a415-a0f8a551947b',
 '62c3582d-0865-4005-a087-752e58036536',
 'f1e520f9-bb74-40c1-8d44-05407d1d1237',
 '2cbb0fba-b630-

# 6. 검색 테스트

## Lexical 검색

In [30]:
query = "how to add image"
 #query = "how to add image"
query = opensearch_utils.get_query(
    query=query
)

print("query: ", query)
response = opensearch_utils.search_document(os_client, query, index_name)
opensearch_utils.parse_keyword_response(response, show_size=3)

query:  {'query': {'bool': {'must': [{'match': {'text': {'query': 'how to add image', 'minimum_should_match': '0%', 'operator': 'or'}}}], 'filter': []}}}
# of searched docs:  10
# of display: 3
---------------------
_id in index:  16d8da0d-a568-4c1e-a8ff-e52942140379
10.45657
. Values Allow users to use Parallels desktop &mdash; Enables Parallels Desktop. When set, you must accept the end-user license agreement. Do not allow users to use Parallels desktop &mdash; Disables Parallels Desktop. Chrome OS 99 and higher Parallels Desktop Windows image The policy set for configuring the Windows OS image that the device user downloads on their Chromebooks in order to use Parallels Desktop. Chrome OS 99 and higher &gt; URL Specifies the address for the Windows image. Values Enter the URL. Chrome OS 99 and higher &gt; SHA-256 hash Specifies the SHA-256 hash of the Windows image. Values Enter the hash. Chrome OS 99 and higher Required disk space Specifies the free disk space required for Parallel

## 시멘틱 검색

In [31]:
vector_db.similarity_search("How to add image")

[Document(page_content='. You can add up to 10 image files in the PNG, JPG, JPEG, or GIF format (animated files are not supported). Each image file must be less than 5 MB. To upload an image file, click Add and select a file. To delete an image file, click next to the name of the uploaded image file. Note The device control command must be transferred to the device to apply an image file to it. &gt;&gt;&gt; Video Select a video file for the screen saver. You can add only one video file in the MP4 or MKV format. The video file must be less than 50 MB. To upload a video file, click Add and select a file. To delete a video file, click next to the name of the uploaded video file. Note The device control command must be transferred to the device to apply a video to it. &gt; Session timeout Allows the use of the session timeout feature for the Kiosk Browser', metadata={'source': 'all_processed_data.json', 'seq_num': 893, 'title': 'Android Enterprise policies', 'url': 'https://docs.samsungkno

## 하이브리드 검색

In [32]:
from langchain.chains.question_answering import load_qa_chain
from local_utils.rag import get_semantic_similar_docs, get_lexical_similar_docs, get_ensemble_results

In [33]:
import copy
from langchain.schema import Document
from langchain import PromptTemplate
from operator import itemgetter

In [34]:
from local_utils.proc_docs import search_hybrid

In [35]:
%%time


filter01 = "[]"
filter02 = "[]"

# query = "how to add image"
query = "How to add image"

search_hybrid_result = search_hybrid(
    query=query,
    vector_db=vector_db,
    k=3,
    index_name= index_name,
    os_client=os_client,
    filter=[
        {"term": {"metadata.type": filter01}},
        {"term": {"metadata.source": filter02}},
    ],
    Semantic_Search = False,    
    Lexical_Search = False,    
    Hybrid_Search = True,     
    minimum_should_match = 75,   
    fusion_algorithm="RRF", # ["RRF", "simple_weighted"]
    ensemble_weights=[.5, .5], # 시멘트 서치에 가중치 0.5 , 키워드 서치 가중치 0.5 부여.
    verbose=True
)



Query: 
 How to add image
##############################
similar_docs_semantic
##############################

Score: 1.0
['. You can add up to 10 image files in the PNG, JPG, JPEG, or GIF format (animated files are not supported). Each image file must be less than 5 MB. To upload an image file, click Add and select a file. To delete an image file, click next to the name of the uploaded image file. Note The device control command must be transferred to the device to apply an image file to it. &gt;&gt;&gt; Video Select a video file for the screen saver. You can add only one video file in the MP4 or MKV format. The video file must be less than 50 MB. To upload a video file, click Add and select a file. To delete a video file, click next to the name of the uploaded video file. Note The device control command must be transferred to the device to apply a video to it. &gt; Session timeout Allows the use of the session timeout feature for the Kiosk Browser']
{'source': 'all_processed_data.jso

# 7. 검증 인덱스 생성

## Index 이름 결정

In [36]:
eval_index_name = "genai-poc-knox-cohere-en-eval-1024c-256o-v21"

## Sampling

In [37]:
import random
def get_sampling_doc(seed, ratio, docs):

    random.seed(seed)
    
    eval_docs = docs[:int(len(docs)*ratio)]
    
    return eval_docs
    
eval_docs = get_sampling_doc(seed=200, ratio=0.02, docs= all_docs)
print("eval docs: ", len(eval_docs))
eval_docs[0:2]
    
    

eval docs:  34


[Document(page_content='How-to videos. Contains videos on how to use Knox E-FOTA. This section contains videos on how to use Knox E-FOTA. Getting started with Knox E-FOTA This video walks you through the Knox E-FOTA console and demonstrates how you can register a reseller, approve a device, create a campaign, assign a campaign, and monitor device status. Creating a campaign on Knox E-FOTA The following video provides in-depth information on how to create and apply a Knox E-FOTA campaign to your Samsung devices. Connecting Knox E-FOTA to VMware Workspace ONE The following video describes the simple steps of connecting Knox E-FOTA with VMware Workspace ONE, while adding device groups from Workspace ONE.', metadata={'source': 'all_processed_data.json', 'seq_num': 1, 'title': 'How-to videos', 'url': 'https://docs.samsungknox.com/admin/efota-one/how-to-videos', 'project': 'EFOTA', 'last_updated': '2023-09-27'}),
 Document(page_content='Knox E-FOTA. Knox E-FOTA enables enterprise IT admins t

In [38]:
chunk_docs = text_splitter.split_documents(eval_docs)
print(f"Number of chunk_docs after split and chunking= {len(chunk_docs)}")

Number of chunk_docs after split and chunking= 129


## 오픈 서치 인덱스 유무에 따라 삭제
오픈 서치에 해당 인덱스가 존재하면, 삭제 합니다. 

In [39]:
index_exists = opensearch_utils.check_if_index_exists(
    os_client,
    eval_index_name
)

if index_exists:
    opensearch_utils.delete_index(
        os_client,
        eval_index_name
    )
    
opensearch_utils.create_index(os_client, eval_index_name, index_body)
index_info = os_client.indices.get(index=eval_index_name)
print("Index is created")
pprint(index_info)    

index_name=genai-poc-knox-cohere-en-eval-1024c-256o-v21, exists=False

Creating index:
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'genai-poc-knox-cohere-en-eval-1024c-256o-v21'}
Index is created
{'genai-poc-knox-cohere-en-eval-1024c-256o-v21': {'aliases': {},
                                                  'mappings': {'properties': {'metadata': {'properties': {'last_updated': {'type': 'date'},
                                                                                                          'project': {'type': 'keyword'},
                                                                                                          'seq_num': {'type': 'long'},
                                                                                                          'source': {'type': 'keyword'},
                                                                                                          'title': {'type': 'text'},
                                       

## 검증 인덱스 생성

In [40]:
eval_vector_db = OpenSearchVectorSearch(
    index_name= eval_index_name,
    opensearch_url=opensearch_domain_endpoint,
    embedding_function=llm_emb,
    http_auth=http_auth, # http_auth
    is_aoss =False,
    engine="faiss",
    space_type="l2",
    bulk_size=100000,
    timeout=60    
)
vector_db

<langchain_community.vectorstores.opensearch_vector_search.OpenSearchVectorSearch at 0x7fe11daff220>

In [41]:
%%time

eval_vector_db.add_documents(documents = chunk_docs, 
                        vector_field = "vector_field",
                        bulk_size = 1000000)


CPU times: user 292 ms, sys: 21.3 ms, total: 313 ms
Wall time: 13.1 s


['4ba4a4cd-d962-4bb8-a11f-31d9a8b393f4',
 '3c362f52-4dd9-4fdf-a1f0-0c63263ec692',
 '8827fdc1-a315-4631-8a98-d2cf49557f18',
 '79e199d0-0bd2-4e5d-a623-c0d9e19ce2db',
 '9e806319-a908-4986-a20a-78120d7d7323',
 '11990f92-8a01-45ea-b96f-e5f797ef30a5',
 'eca6c5af-06d0-4036-87b4-025ccda2d9c5',
 '712e26ec-e6c2-4c02-a3b9-63b819dbf4c1',
 'cff7c6a4-51fe-47b0-ba1d-6c3fabe899c1',
 '00aef10d-2174-406f-a2f2-4b16123b0307',
 'edf904b1-e5c8-4cd5-a46d-930fda497b66',
 'c6b1358c-5f02-49c3-ae2d-d3592b8be1fe',
 '1587c68a-fb84-446f-ad04-906480b8bcf1',
 'aeb08985-23bf-436b-9117-88b58e76104a',
 '0f7e763a-09a2-4ccf-b0ed-8316b5f9032a',
 'bea07acc-8735-4035-b536-05037f139559',
 'a8cc66ee-f37d-44da-b105-b1269953ad55',
 '51c5406b-23c5-4ea0-a1b4-2fcc754ecfbd',
 '05d7158f-40dc-4734-a14c-6429a4be2cc8',
 '442ba596-e479-48e4-990e-ff66937c55c7',
 '6ec9e01c-389c-4602-b629-d24496cab6e1',
 '7cdf688e-f107-4c4c-b01d-143618ca980e',
 'ab242803-6c6b-4820-9190-5260244f71c4',
 '1481b419-0f54-4e42-8d4c-46ad5dbba2de',
 'c74b046f-942d-

In [42]:
%%time


filter01 = "[]"
filter02 = "[]"

query = "how to add image"


search_hybrid_result = search_hybrid(
    query=query,
    vector_db= eval_vector_db,
    k=3,
    index_name= eval_index_name,
    os_client=os_client,
    filter=[
        {"term": {"metadata.type": filter01}},
        {"term": {"metadata.source": filter02}},
    ],
    Semantic_Search = False,    
    Lexical_Search = False,    
    Hybrid_Search = True,     
    minimum_should_match = 75,   
    fusion_algorithm="RRF", # ["RRF", "simple_weighted"]
    ensemble_weights=[.5, .5], # 시멘트 서치에 가중치 0.5 , 키워드 서치 가중치 0.5 부여.
    verbose=True
)



Query: 
 how to add image
##############################
similar_docs_semantic
##############################

Score: 1.0
['. Create a custom role You can create custom roles and choose their permissions. To create a custom role: 1. Go to Administrators & Roles > Roles. 2. Click Create Role. 3. Select a service for this role to manage. 4. Enter a name and description for the new role. 5. Choose which permissions to grant to the role: Campaigns , Devices and Uploads , Licenses , Resellers , EMM , Activity Log , Administration and Roles , Support , Privacy policy , Knox Cloud API 6. Click Save. The new role is added to the Roles list.']
{'source': 'all_processed_data.json', 'seq_num': 26, 'title': 'Manage roles', 'url': 'https://docs.samsungknox.com/admin/efota-one/features/manage-roles', 'project': 'EFOTA', 'last_updated': '2023-07-26'}
--------------------------------------------------

Score: 0.9946105780310577
['. Follow this procedure to approve uploads from resellers you have not r

# A. Reference

- [Building a RAG AI with OpenSearch Serverless and LangChain](https://caylent.com/blog/building-a-rag-with-open-search-serverless-and-lang-chain)