# Parent-Child Chunk 를 OpenSearch 에 저장

---

---

# 1. Bedrock Client 생성

In [2]:
! pip list | grep langchain # 0.0.312
! pip list | grep opensearch # 2.3.2
! pip list | grep pypdf

langchain                                0.0.338
opensearch-py                            2.3.2
pypdf                                    3.17.1


In [3]:
%load_ext autoreload
%autoreload 2

import sys, os
# module_path = "../../../utils"
# sys.path.append(os.path.abspath(module_path))
# print(os.path.abspath(module_path))

module_path = "./utils"
sys.path.append(os.path.abspath(module_path))
print(os.path.abspath(module_path))

/root/aws-ai-ml-workshop-kr/genai/aws-gen-ai-kr/20_applications/02_qa_chatbot/01_preprocess_docs/utils


In [4]:
import json
import boto3
from pprint import pprint
from termcolor import colored
from utils import bedrock, print_ww
from utils.bedrock import bedrock_info

# ---- ⚠️ Un-comment and edit the below lines as needed for your AWS setup ⚠️ ----

# os.environ["AWS_DEFAULT_REGION"] = "<REGION_NAME>"  # E.g. "us-east-1"
# os.environ["AWS_PROFILE"] = "<YOUR_PROFILE>"
# os.environ["BEDROCK_ASSUME_ROLE"] = "<YOUR_ROLE_ARN>"  # E.g. "arn:aws:..."
# os.environ["BEDROCK_ENDPOINT_URL"] = "<YOUR_ENDPOINT_URL>"  # E.g. "https://..."


boto3_bedrock = bedrock.get_bedrock_client(
    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
    endpoint_url=os.environ.get("BEDROCK_ENDPOINT_URL", None),
    region=os.environ.get("AWS_DEFAULT_REGION", None),
)

print(colored("\n== FM lists ==", "green"))
pprint(bedrock_info.get_list_fm_models())

Create new client
  Using region: us-east-1
  Using profile: None
boto3 Bedrock client successfully created!
bedrock-runtime(https://bedrock-runtime.us-east-1.amazonaws.com)

== FM lists ==
{'Claude-Instant-V1': 'anthropic.claude-instant-v1',
 'Claude-V1': 'anthropic.claude-v1',
 'Claude-V2': 'anthropic.claude-v2',
 'Command': 'cohere.command-text-v14',
 'Jurassic-2-Mid': 'ai21.j2-mid-v1',
 'Jurassic-2-Ultra': 'ai21.j2-ultra-v1',
 'Llama2-13b-Chat': 'meta.llama2-13b-chat-v1',
 'Titan-Embeddings-G1': 'amazon.titan-embed-text-v1',
 'Titan-Text-G1': 'TBD'}


# 2. Embedding 모델 로딩

## Embedding Model 선택

In [5]:
Use_Titan_Embedding = True
Use_Cohere_English_Embedding = False

## Embedding Model 로딩

In [6]:
# We will be using the Titan Embeddings Model to generate our Embeddings.
from langchain.embeddings import BedrockEmbeddings
from langchain.llms.bedrock import Bedrock

if Use_Titan_Embedding:
    llm_emb = BedrockEmbeddings(client=boto3_bedrock, model_id = "amazon.titan-embed-text-v1")
    dimension = 1536
elif Use_Cohere_English_Embedding:
    llm_emb = BedrockEmbeddings(client=boto3_bedrock, model_id = "cohere.embed-english-v3")    
    dimension = 1024
else:
    lim_emb = None

llm_emb

BedrockEmbeddings(client=<botocore.client.BedrockRuntime object at 0x7ff19ba8d570>, region_name=None, credentials_profile_name=None, model_id='amazon.titan-embed-text-v1', model_kwargs=None, endpoint_url=None)

# 3. Load all Json files

In [7]:
from utils.proc_docs import get_load_json, show_doc_json

In [8]:
import glob

# Specify the directory and file pattern for .txt files
folder_path = 'data/poc/preprocessed_json/all_processed_data.json'

# List all .txt files in the specified folder
json_files = glob.glob(folder_path)
# json_files = ['data/poc/customer_EFOTA.json']

# Load each item per json file and append to a list
doc_json_list = []
for file_path in json_files:
    doc_json = get_load_json(file_path)
    doc_json_list.append(doc_json)

print("all json files: ", len(doc_json_list))    
# Flatten the list of lists into a single list
all_docs = []
for item in doc_json_list:
        all_docs.extend(item)
        
print("all items: ", len(all_docs))

all json files:  1
all items:  1732


# 4. Index 생성

## Index 이름 결정

In [9]:
index_name = "v15-genai-poc-knox-parent-doc-retriever"

## Index 스키마 정의

In [10]:
index_body = {
    'settings': {
        'index': {
            'knn': True,
            'knn.space_type': 'cosinesimil'  # Example space type
        }
    },
    'mappings': {
        'properties': {
            'metadata': {
                'properties': {
                               'source' : {'type': 'keyword'},
                               'family_tree' : {'type': 'keyword'},                                        
                               'parent_id' : {'type': 'keyword'},                    
                               'last_updated': {'type': 'date'},
                               'project': {'type': 'keyword'},
                               'seq_num': {'type': 'long'},
                               'title': {'type': 'text'},  # For full-text search
                               'url': {'type': 'text'},  # For full-text search
                            }
            },            
            'text': {
                'type': 'text'
            },
            'vector_field': {
                'type': 'knn_vector',
                'dimension': f"{dimension}"  # Replace with your vector dimension
            }
        }
    }
}


# 5. LangChain OpenSearch VectorStore 생성 
## 선수 조건


## 오픈 서치 도메인 및 인증 정보 세팅

- [langchain.vectorstores.opensearch_vector_search.OpenSearchVectorSearch](https://api.python.langchain.com/en/latest/vectorstores/langchain.vectorstores.opensearch_vector_search.OpenSearchVectorSearch.html)

#### [중요] 아래에 aws parameter store 에 아래 인증정보가 먼저 입력되어 있어야 합니다.

In [11]:
from utils.proc_docs import get_parameter

In [12]:
import boto3
ssm = boto3.client('ssm', 'us-east-1')

opensearch_domain_endpoint = get_parameter(
    boto3_client = ssm,
    parameter_name = 'knox_opensearch_domain_endpoint',
)

opensearch_user_id = get_parameter(
    boto3_client = ssm,
    parameter_name = 'knox_opensearch_userid',
)

opensearch_user_password = get_parameter(
    boto3_client = ssm,
    parameter_name = 'knox_opensearch_password',
)


In [13]:
opensearch_domain_endpoint = opensearch_domain_endpoint
rag_user_name = opensearch_user_id
rag_user_password = opensearch_user_password

http_auth = (rag_user_name, rag_user_password) # Master username, Master password

## OpenSearch Client 생성

In [14]:
from utils.opensearch import opensearch_utils

In [15]:
aws_region = os.environ.get("AWS_DEFAULT_REGION", None)

os_client = opensearch_utils.create_aws_opensearch_client(
    aws_region,
    opensearch_domain_endpoint,
    http_auth
)

## 오픈 서치 인덱스 생성 
- 오픈 서치에 해당 인덱스가 존재하면, 삭제 합니다. 

In [16]:
from utils.opensearch import opensearch_utils

In [17]:

index_exists = opensearch_utils.check_if_index_exists(
    os_client,
    index_name
)

if index_exists:
    opensearch_utils.delete_index(
        os_client,
        index_name
    )

opensearch_utils.create_index(os_client, index_name, index_body)
index_info = os_client.indices.get(index=index_name)
print("Index is created")
pprint(index_info)

index_name=v15-genai-poc-knox-parent-doc-retriever, exists=True

Deleting index:
{'acknowledged': True}

Creating index:
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'v15-genai-poc-knox-parent-doc-retriever'}
Index is created
{'v15-genai-poc-knox-parent-doc-retriever': {'aliases': {},
                                             'mappings': {'properties': {'metadata': {'properties': {'family_tree': {'type': 'keyword'},
                                                                                                     'last_updated': {'type': 'date'},
                                                                                                     'parent_id': {'type': 'keyword'},
                                                                                                     'project': {'type': 'keyword'},
                                                                                                     'seq_num': {'type': 'long'},
                            

## 랭체인 인덱스 연결 오브젝트 생성

- [langchain.vectorstores.opensearch_vector_search.OpenSearchVectorSearch](https://api.python.langchain.com/en/latest/vectorstores/langchain.vectorstores.opensearch_vector_search.OpenSearchVectorSearch.html)

In [18]:
from langchain.vectorstores import OpenSearchVectorSearch

In [19]:
vector_db = OpenSearchVectorSearch(
    index_name=index_name,
    opensearch_url=opensearch_domain_endpoint,
    embedding_function=llm_emb,
    http_auth=http_auth, # http_auth
    is_aoss =False,
    engine="faiss",
    space_type="l2",
    bulk_size=100000,
    timeout=60    
)
vector_db

<langchain.vectorstores.opensearch_vector_search.OpenSearchVectorSearch at 0x7ff18fec0b20>

# 6. Chunking JSON Doc 

## Chunk Size and Chunk Overlap Size 결정

In [20]:
parent_chunk_size = 4096
parent_chunk_overlap = 0

child_chunk_size = 1024
child_chunk_overlap = 256

opensearch_parent_key_name = "parent_id"
opensearch_family_tree_key_name = "family_tree"


In [21]:
from utils.proc_docs import create_parent_chunk, create_child_chunk

## Parent Chunking

create_parent_chunk() 아래와 같은 작업을 합니다.
- all_docs 에 있는 문서를 parent_chunk_size 만큼으로 청킹 합니다.
- Parent Chunk 에 두개의 메타 데이타를 생성 합니다.
    - family_tree: parent
    - parent_id : None

In [22]:
parent_chunk_docs = create_parent_chunk(all_docs, opensearch_parent_key_name, 
                                        opensearch_family_tree_key_name,parent_chunk_size, parent_chunk_overlap)
print(f"Number of parent_chunk_docs= {len(parent_chunk_docs)}")


Number of parent_chunk_docs= 2325


In [23]:
parent_chunk_docs[0:1]

[Document(page_content='AR demo barcodes. Use the following barcodes to test the Knox Capture AR features. Barcode 1 Barcode 2 Barcode 3 Barcode 4 Barcode 5 Barcode 6', metadata={'source': 'all_processed_data.json', 'seq_num': 1, 'title': 'AR demo barcodes', 'url': 'https://docs.samsungknox.com/admin/knox-capture/ar-demo-barcodes', 'project': 'KCAP', 'last_updated': '2023-10-16', 'family_tree': 'parent', 'parent_id': None})]

OpenSearch 에 parent chunk 삽입

In [24]:
%%time

parent_ids = vector_db.add_documents(
                        documents = parent_chunk_docs, 
                        vector_field = "vector_field",
                        bulk_size = 1000000
                    )



CPU times: user 6.51 s, sys: 374 ms, total: 6.89 s
Wall time: 4min 47s


In [25]:
total_count_docs = opensearch_utils.get_count(os_client, index_name)
print("total count docs: ", total_count_docs)


total count docs:  {'count': 2325, '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}}


삽입된 Parent Chunk 의 첫번째를 확인 합니다. family_tree, parent_id 의 값을 확인 하세요.

In [26]:
def show_opensearch_doc_info(response):
    print("opensearch document id:" , response["_id"])
    print("family_tree:" , response["_source"]["metadata"]["family_tree"])
    print("parent document id:" , response["_source"]["metadata"]["parent_id"])
    print("parent document text: \n" , response["_source"]["text"])

response = opensearch_utils.get_document(os_client, doc_id = parent_ids[0], index_name = index_name)
show_opensearch_doc_info(response)    

opensearch document id: f1713860-f037-41f3-af8c-8d99a4242b7e
family_tree: parent
parent document id: None
parent document text: 
 AR demo barcodes. Use the following barcodes to test the Knox Capture AR features. Barcode 1 Barcode 2 Barcode 3 Barcode 4 Barcode 5 Barcode 6


## Child Chunking

### Child Chunk 생성

아래의 create_child_chunk() 는 다음과 같은 작업을 합니다.
- parent_chunk_docs 각각에 대해서 Child Chunk 를 생성 합니다. 
- Child Chunk 에 두개의 메타 데이타를 생성 합니다.
    - family_tree: child
    - parent_id : parent 에 대한 OpenSearch document id

In [27]:
# child_chunk_docs = create_child_chunk(parent_chunk_docs[0:1], parent_ids)
child_chunk_docs = create_child_chunk(child_chunk_size, child_chunk_overlap, parent_chunk_docs, parent_ids, 
                                      opensearch_parent_key_name, opensearch_family_tree_key_name)
print(f"Number of child_chunk_docs= {len(child_chunk_docs)}")


Number of child_chunk_docs= 6884


### 생성된 Child 와 이에 대한 Parent 정보 확인

Child Chunk 한개에 대한 정보를 확인 합니다.

In [28]:
child_chunk_docs[0:1]

[Document(page_content='AR demo barcodes. Use the following barcodes to test the Knox Capture AR features. Barcode 1 Barcode 2 Barcode 3 Barcode 4 Barcode 5 Barcode 6', metadata={'source': 'all_processed_data.json', 'seq_num': 1, 'title': 'AR demo barcodes', 'url': 'https://docs.samsungknox.com/admin/knox-capture/ar-demo-barcodes', 'project': 'KCAP', 'last_updated': '2023-10-16', 'family_tree': 'child', 'parent_id': 'f1713860-f037-41f3-af8c-8d99a4242b7e'})]

Child 에 대한 Parent 정보 확인

In [29]:
parent_id = child_chunk_docs[0].metadata["parent_id"]
print("child's parent_id: ", parent_id)
print("\n###### Search parent in OpenSearch")
response = opensearch_utils.get_document(os_client, doc_id = parent_id, index_name = index_name)
show_opensearch_doc_info(response)    


child's parent_id:  f1713860-f037-41f3-af8c-8d99a4242b7e

###### Search parent in OpenSearch
opensearch document id: f1713860-f037-41f3-af8c-8d99a4242b7e
family_tree: parent
parent document id: None
parent document text: 
 AR demo barcodes. Use the following barcodes to test the Knox Capture AR features. Barcode 1 Barcode 2 Barcode 3 Barcode 4 Barcode 5 Barcode 6


### OpenSearch 에 Child 삽입

In [30]:
%%time

child_ids = vector_db.add_documents(
                        documents = child_chunk_docs, 
                        vector_field = "vector_field",
                        bulk_size = 1000000
                    )

print("length of child_ids: ", len(child_ids))

length of child_ids:  6884
CPU times: user 18.9 s, sys: 878 ms, total: 19.8 s
Wall time: 11min 26s


In [31]:
total_count_docs = opensearch_utils.get_count(os_client, index_name)
print("total count docs: ", total_count_docs)


total count docs:  {'count': 9209, '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}}


In [32]:
response = opensearch_utils.get_document(os_client, doc_id = child_ids[0], index_name = index_name)
show_opensearch_doc_info(response)    

opensearch document id: 921b9610-ad45-47a0-a5c7-0140f3553506
family_tree: child
parent document id: f1713860-f037-41f3-af8c-8d99a4242b7e
parent document text: 
 AR demo barcodes. Use the following barcodes to test the Knox Capture AR features. Barcode 1 Barcode 2 Barcode 3 Barcode 4 Barcode 5 Barcode 6


# 7. 검색 테스트

## Lexical 검색

In [33]:
q = "'how to add image"
query ={'query': 
        {'bool': {'must': 
                  [{'match': 
                    {'text': 
                     {'query': "{q}", 'minimum_should_match': '0%', 'operator': 'or'}}}], 
                  'filter': {
                    "term": {
                      "metadata.family_tree": "child"
                    }                      
                  }
                 }
        }
       }
pprint(query)

{'query': {'bool': {'filter': {'term': {'metadata.family_tree': 'child'}},
                    'must': [{'match': {'text': {'minimum_should_match': '0%',
                                                 'operator': 'or',
                                                 'query': '{q}'}}}]}}}


In [34]:
# query = "how to add image"
# query = opensearch_utils.get_query(
#     query=query
# )

response = opensearch_utils.search_document(os_client, query, index_name)
opensearch_utils.parse_keyword_response(response, show_size=3)

# of searched docs:  10
# of display: 3
---------------------
_id in index:  53c5f947-3513-460b-915f-2be171011a44
8.998719
. Q - When I renew an expired license, does the license come into effect and enroll the relevant devices immediately? A - For iOS and Windows devices, they are enrolled immediately. For Android devices, they can be enrolled according to the schedule set on the system or by sending a device command. Q - When a license expires, does it expire in the order the devices were enrolled? A - When a license being used on various devices expires, all the devices become unable to use Knox Manage simultaneously. Q - If I want to allocate the renewed licenses only to new devices (not already existing registered devices), what should I do? A - After increasing the number of licenses, you should first unenroll the existing registered devices, and then enroll the new devices. The renewed licenses will then be allocated to the new devices
{'source': 'all_processed_data.json', 'seq_

## 시맨틱 검색

In [35]:
vector_db.similarity_search(q, k=2)

[Document(page_content='. You can add up to 10 image files in the PNG, JPG, JPEG, or GIF format (animated files are not supported). Each image file must be less than 5 MB. To upload an image file, click Add and select a file. To delete an image file, click next to the name of the uploaded image file. Note The device control command must be transferred to the device to apply an image file to it. &gt;&gt;&gt; Video Select a video file for the screen saver. You can add only one video file in the MP4 or MKV format. The video file must be less than 50 MB. To upload a video file, click Add and select a file. To delete a video file, click next to the name of the uploaded video file. Note The device control command must be transferred to the device to apply a video to it. &gt; Session timeout Allows the use of the session timeout feature for the Kiosk Browser', metadata={'source': 'all_processed_data.json', 'seq_num': 911, 'title': 'Android Enterprise policies', 'url': 'https://docs.samsungkno

# 8. 검증 인덱스 생성

## Index 이름 결정

In [36]:
eval_index_name = "v16-genai-poc-knox-eval-parent-doc-retriever"

## Sampling

In [37]:
import random
def get_sampling_doc(seed, ratio, docs):

    random.seed(seed)
    
    eval_docs = docs[:int(len(docs)*ratio)]
    
    return eval_docs
    
eval_docs = get_sampling_doc(seed=200, ratio=0.05, docs= all_docs)
print("eval docs: ", len(eval_docs))
eval_docs[0:2]
    
    

eval docs:  86


[Document(page_content='AR demo barcodes. Use the following barcodes to test the Knox Capture AR features. Barcode 1 Barcode 2 Barcode 3 Barcode 4 Barcode 5 Barcode 6', metadata={'source': 'all_processed_data.json', 'seq_num': 1, 'title': 'AR demo barcodes', 'url': 'https://docs.samsungknox.com/admin/knox-capture/ar-demo-barcodes', 'project': 'KCAP', 'last_updated': '2023-10-16'}),
 Document(page_content="Videos. This section contains product and how-to videos related to Knox Capture. Getting started with Samsung Knox Capture In this video, we'll show you how to use Samsung Knox Capture to transform your mobile devices into powerful barcode scanners that can read, process, and output barcode data into other applications. Samsung Knox Capture: Enterprise-grade mobile scanning solution in Galaxy device This product intro video shows you how easy it is to transform rugged Samsung devices like the Galaxy XCover Pro into enterprise-grade barcode scanners.", metadata={'source': 'all_processe

## 오픈 서치 인덱스 유무에 따라 삭제
오픈 서치에 해당 인덱스가 존재하면, 삭제 합니다. 

In [38]:
index_exists = opensearch_utils.check_if_index_exists(
    os_client,
    eval_index_name
)

if index_exists:
    opensearch_utils.delete_index(
        os_client,
        eval_index_name
    )
    
opensearch_utils.create_index(os_client, eval_index_name, index_body)
index_info = os_client.indices.get(index=eval_index_name)
print("Index is created")
pprint(index_info)    

index_name=v16-genai-poc-knox-eval-parent-doc-retriever, exists=True

Deleting index:
{'acknowledged': True}

Creating index:
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'v16-genai-poc-knox-eval-parent-doc-retriever'}
Index is created
{'v16-genai-poc-knox-eval-parent-doc-retriever': {'aliases': {},
                                                  'mappings': {'properties': {'metadata': {'properties': {'family_tree': {'type': 'keyword'},
                                                                                                          'last_updated': {'type': 'date'},
                                                                                                          'parent_id': {'type': 'keyword'},
                                                                                                          'project': {'type': 'keyword'},
                                                                                                          'seq_num': {'type

## 검증 인덱스 연결 오브젝트 생성

In [40]:
eval_vector_db = OpenSearchVectorSearch(
    index_name= eval_index_name,
    opensearch_url=opensearch_domain_endpoint,
    embedding_function=llm_emb,
    http_auth=http_auth, # http_auth
    is_aoss =False,
    engine="faiss",
    space_type="l2",
    bulk_size=100000,
    timeout=60    
)
eval_vector_db

<langchain.vectorstores.opensearch_vector_search.OpenSearchVectorSearch at 0x7ff18f3030d0>

## Parent Chunking

In [41]:
parent_chunk_docs = create_parent_chunk(eval_docs, opensearch_parent_key_name, 
                                        opensearch_family_tree_key_name,parent_chunk_size, 
                                        parent_chunk_overlap)
print(f"Number of parent_chunk_docs= {len(parent_chunk_docs)}")


Number of parent_chunk_docs= 98


In [42]:
parent_chunk_docs[0:1]

[Document(page_content='AR demo barcodes. Use the following barcodes to test the Knox Capture AR features. Barcode 1 Barcode 2 Barcode 3 Barcode 4 Barcode 5 Barcode 6', metadata={'source': 'all_processed_data.json', 'seq_num': 1, 'title': 'AR demo barcodes', 'url': 'https://docs.samsungknox.com/admin/knox-capture/ar-demo-barcodes', 'project': 'KCAP', 'last_updated': '2023-10-16', 'family_tree': 'parent', 'parent_id': None})]

In [43]:
%%time

parent_ids = eval_vector_db.add_documents(
                        documents = parent_chunk_docs, 
                        vector_field = "vector_field",
                        bulk_size = 1000000
                    )



CPU times: user 279 ms, sys: 6.84 ms, total: 286 ms
Wall time: 11.3 s


In [44]:
total_count_docs = opensearch_utils.get_count(os_client, eval_index_name)
print("total count docs: ", total_count_docs)


total count docs:  {'count': 98, '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}}


In [45]:
response = opensearch_utils.get_document(os_client, doc_id = parent_ids[0], index_name = eval_index_name)
show_opensearch_doc_info(response)    

opensearch document id: e582072d-bfd0-42d3-b96a-756b011b464f
family_tree: parent
parent document id: None
parent document text: 
 AR demo barcodes. Use the following barcodes to test the Knox Capture AR features. Barcode 1 Barcode 2 Barcode 3 Barcode 4 Barcode 5 Barcode 6


## Child Chunking

In [46]:
# child_chunk_docs = create_child_chunk(parent_chunk_docs[0:1], parent_ids)
child_chunk_docs = create_child_chunk(child_chunk_size, child_chunk_overlap, parent_chunk_docs, 
                                      parent_ids, 
                                      opensearch_parent_key_name, opensearch_family_tree_key_name)
print(f"Number of child_chunk_docs= {len(child_chunk_docs)}")


Number of child_chunk_docs= 244


In [47]:
child_chunk_docs[0:1]

[Document(page_content='AR demo barcodes. Use the following barcodes to test the Knox Capture AR features. Barcode 1 Barcode 2 Barcode 3 Barcode 4 Barcode 5 Barcode 6', metadata={'source': 'all_processed_data.json', 'seq_num': 1, 'title': 'AR demo barcodes', 'url': 'https://docs.samsungknox.com/admin/knox-capture/ar-demo-barcodes', 'project': 'KCAP', 'last_updated': '2023-10-16', 'family_tree': 'child', 'parent_id': 'e582072d-bfd0-42d3-b96a-756b011b464f'})]

In [48]:
parent_id = child_chunk_docs[0].metadata["parent_id"]
print("child's parent_id: ", parent_id)
print("\n###### Search parent in OpenSearch")
response = opensearch_utils.get_document(os_client, doc_id = parent_id, index_name = eval_index_name)
show_opensearch_doc_info(response)    


child's parent_id:  e582072d-bfd0-42d3-b96a-756b011b464f

###### Search parent in OpenSearch
opensearch document id: e582072d-bfd0-42d3-b96a-756b011b464f
family_tree: parent
parent document id: None
parent document text: 
 AR demo barcodes. Use the following barcodes to test the Knox Capture AR features. Barcode 1 Barcode 2 Barcode 3 Barcode 4 Barcode 5 Barcode 6


In [49]:
%%time

child_ids = eval_vector_db.add_documents(
                        documents = child_chunk_docs, 
                        vector_field = "vector_field",
                        bulk_size = 1000000
                    )

print("length of child_ids: ", len(child_ids))

length of child_ids:  244
CPU times: user 685 ms, sys: 12.9 ms, total: 698 ms
Wall time: 23.6 s


In [50]:
total_count_docs = opensearch_utils.get_count(os_client, eval_index_name)
print("total count docs: ", total_count_docs)


total count docs:  {'count': 342, '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}}


In [55]:
response = opensearch_utils.get_document(os_client, doc_id = child_ids[5], index_name = eval_index_name)
show_opensearch_doc_info(response)    

opensearch document id: 90148840-bf50-467e-be82-7076abece051
family_tree: child
parent document id: 2dfcb471-aea7-42eb-9dc6-75fa1ad22653
parent document text: 
 . #Admin mode (Default) When Knox Capture is downloaded from Google Play and installed on a compatible Android device - and a valid license key is activated - the app operates in its default Admin mode. This mode allows the device user - typically an enterprise IT admin - to utilize the full functionality of the app to primarily do the following: 1. Create scanning profiles that define which apps can launch the barcode scanner, which barcode types can be scanned (QR, Code 128, EAN8, etc.), and how scanned data is formatted (append Tab or Enter key, insert special characters before or after data, etc.). 2. Test scanning profiles to ensure that appropriate business apps can launch the scanner and receive data back as keystrokes. 3. Export profile settings as a configuration file, which can then be used by EMM/UEM administrators t

In [56]:
parent_id = response["_source"]["metadata"]["parent_id"]
print("child's parent_id: ", parent_id)
print("\n###### Search parent in OpenSearch")
response = opensearch_utils.get_document(os_client, doc_id = parent_id, index_name = eval_index_name)
show_opensearch_doc_info(response)    


child's parent_id:  2dfcb471-aea7-42eb-9dc6-75fa1ad22653

###### Search parent in OpenSearch
opensearch document id: 2dfcb471-aea7-42eb-9dc6-75fa1ad22653
family_tree: parent
parent document id: None
parent document text: 
 Overview. Get started with the basics by learning what Knox Capture is and how it works. What is Knox Capture? To help explain what Knox Capture does, it may be helpful to start with an understanding of what a *wedge* scanner is, and how it relates to the Knox Capture solution. In traditional hardware-based barcode scanning terminology, a wedge scanner is a physical device that attaches between a keyboard and a computer. The keyboard attaches to the scanner, and the scanner attaches to the computer. From the computer's perspective, the scanner behaves just like an additional keyboard. When the computer runs a software program (like an inventory spreadsheet app), and barcodes are read using the wedge scanner, data is sent from the scanner to the program in the form of

## 검색 테스트

In [52]:
q = "'how to use barcode"
query ={'query': 
        {'bool': {'must': 
                  [{'match': 
                    {'text': 
                     {'query': f"{q}", 'minimum_should_match': '0%', 'operator': 'or'}}}], 
                  'filter': {
                    "term": {
                      "metadata.family_tree": "child"
                    }                      
                  }
                 }
        }
       }
pprint(query)

{'query': {'bool': {'filter': {'term': {'metadata.family_tree': 'child'}},
                    'must': [{'match': {'text': {'minimum_should_match': '0%',
                                                 'operator': 'or',
                                                 'query': "'how to use "
                                                          'barcode'}}}]}}}


In [53]:
response = opensearch_utils.search_document(os_client, query, eval_index_name)
opensearch_utils.parse_keyword_response(response, show_size=3)

# of searched docs:  10
# of display: 3
---------------------
_id in index:  709c077e-5440-4ef4-afc8-d96e4209f659
4.949938
. Test mode - A feature that lets you test a scanning profile's apps, barcode types, and keystroke ouput rules before exporting the configuration for deployment. Deploy configuration to EMM/UEM - Learn about the various scanning profile export options, and how to use the exported configuration file in an EMM/UEM to deploy Knox Capture to end-users. Set camera scan trigger - A feature that allows IT admins to specify whether a user can trigger a camera scan using a hardware button on the device, an on-screen floating action button, or both. Learn about the different button options, and how to configure the triggers from both the admin and end-user's point of view. Connect a hardware scanner - Certain enterprises may wish to use their existing hardware wedge scanners alongside Knox Capture. Learn how to connect Bluetooth and USB scanners to a device running Knox Capt

In [54]:
eval_vector_db.similarity_search(q, k=2)

[Document(page_content='. Batch &mdash; Lets the user scan multiple barcodes in a single camera frame using the scan button. Aim and scan &mdash; Requires the user to aim the camera at a set of barcodes and tap the Scan button to capture the barcodes, one at a time. Tap to select &mdash; Lets a user aim the camera at multiple barcodes, freeze the scanning overlay, tap to select barcodes, and tap the Scan button to capture data from the selected barcodes. Scan session limit This defines the number of barcodes a user must scan before the camera preview window closes. A counter in the top-left corner of the preview window gets updated after each scan. Once the user has scanned the set number of barcodes, the camera preview window closes and the user returns to the business app. Users can exit the camera preview window at any time by tapping Done, regardless of the number of barcodes captured. You can set any integer value. By default, this value is set to 0 to indicate infinite scans', me