## 사전작업
- OpenSearch 클러스터 구성
- ML Connector 구성
- Bedrock 임베딩 모델 integration
- Ingestion pipeline 생성
- 참고 : https://www.notion.so/yongho1037/OpenSearch-15bc3cd13fbf8017a128dc4845fbe2d5?pvs=4

In [13]:
import json
!pip install pyyaml opensearch-py

Collecting opensearch-py
  Obtaining dependency information for opensearch-py from https://files.pythonhosted.org/packages/23/35/a957c6fb88ff6874996be688448b889475cf0ea978446cd5a30e764e0561/opensearch_py-2.8.0-py3-none-any.whl.metadata
  Downloading opensearch_py-2.8.0-py3-none-any.whl.metadata (6.9 kB)
Collecting Events (from opensearch-py)
  Obtaining dependency information for Events from https://files.pythonhosted.org/packages/25/ed/e47dec0626edd468c84c04d97769e7ab4ea6457b7f54dcb3f72b17fcd876/Events-0.5-py3-none-any.whl.metadata
  Downloading Events-0.5-py3-none-any.whl.metadata (3.9 kB)
Downloading opensearch_py-2.8.0-py3-none-any.whl (353 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m353.5/353.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hDownloading Events-0.5-py3-none-any.whl (6.8 kB)
Installing collected packages: Events, opensearch-py
Successfully installed Events-0.5 opensearch-py-2.8.0

[1m[[0m[34;49mnotice[0m[1;39;49m]

In [39]:
from opensearchpy import OpenSearch

# OpenSearch 클라이언트 설정
client = OpenSearch(
    hosts=[{'host': 'search-es-demo-iyxa4jcjjehtfbsf3e4d7kjcm4.ap-northeast-2.es.amazonaws.com', 'port': 443}],
    http_auth=('admin', 'GenaiSearch1!'),  # 필요한 경우 인증 정보 입력
    use_ssl=True,
    verify_certs=True
)

index_name = "genai_search"
response = client.indices.delete(index=index_name)
print(f"인덱스 '{index_name}' 삭제 결과:", response)

index_mapping = {
    "settings": {
        "index": {
            "knn": True,
            "number_of_shards": 1,
            "number_of_replicas": 2
        },
        "default_pipeline": "nlp-ingest-pipeline"
    },
    "mappings": {
        "properties": {
            "current_stock": {
                "type": "integer"
            },
            "name": {
                "type": "keyword"
            },
            "category": {
                "type": "keyword"
            },
            "style": {
                "type": "keyword"
            },
            "description": {
                "type": "keyword"
            },
            "price": {
                "type": "integer"
            },
            "image": {
                "type": "keyword"
            },
            "gender_affinity": {
                "type": "keyword"
            },
            "where_visible": {
                "type": "keyword"
            },
            "name_v": {
                "type": "knn_vector",
                "dimension": 1536,
                "method": {
                    "name": "hnsw",
                    "space_type": "l2",
                    "engine": "faiss"
                }
            },
            "description_v": {
                "type": "knn_vector",
                "dimension": 1536,
                "method": {
                    "name": "hnsw",
                    "space_type": "l2",
                    "engine": "faiss"
                }
            }
        }
    }
}
response = client.indices.create(index=index_name, body=index_mapping)
print(f"인덱스 '{index_name}' 생성 결과:", response)

인덱스 'genai_search' 삭제 결과: {'acknowledged': True}
인덱스 'genai_search' 생성 결과: {'acknowledged': True, 'shards_acknowledged': True, 'index': 'genai_search'}


In [40]:
import yaml
import json

# YAML 파일 열기
with open('products.yaml', 'r', encoding='utf-8') as file:
    # YAML 파일 내용을 Python 객체로 로드
    arr = yaml.safe_load(file)

url_prefix = "https://raw.githubusercontent.com/YonghoChoi/genai-search/refs/heads/main/assets"
bulk_data = ""
count = 0
for data in arr:
    image_url = f"{url_prefix}/{data['category']}/{data['image']}"
    data['image_url'] = image_url
    index = {
        "index": {
            "_index": index_name,
            "_id": data.pop("id")
        }
    }
    bulk_data = bulk_data + f"{json.dumps(index)}\n{json.dumps(data)}\n"
    if count == 1000:
        # Bulk 인덱싱 수행
        resp = client.bulk(bulk_data)
        print(resp)
        
        # 초기화
        bulk_data = ""
        count = 0
        continue

    count += 1

In [49]:
response = client.count(index=index_name)
doc_count = response['count']
print(f"인덱스 '{index_name}'의 문서 수: {doc_count}")

인덱스 'genai_search'의 문서 수: 934


In [50]:
resp = client.search(body={
    "size": 10,
    "_source": {
        "includes": [
            "name",
            "description",
            "image_url"
        ],
        "excludes": [
            "name_v",
            "description_v"
        ]
    },
    "query": {
        "neural": {
            "description_v": {
                "query_text": "winter clothes",
                "model_id": "pontz5MBfwuFohZNlfdt",
                "k": 10
            }
        }
    }
}, index=index_name)
print(resp)

{'took': 1392, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 30, 'relation': 'eq'}, 'max_score': 0.0021572823, 'hits': [{'_index': 'genai_search', '_id': '371e0335-6290-446e-90b7-502efd718b4f', '_score': 0.0021572823, '_source': {'image_url': 'https://raw.githubusercontent.com/YonghoChoi/genai-search/refs/heads/main/assets/apparel/371e0335-6290-446e-90b7-502efd718b4f.jpg', 'name': 'Stylish Winter Jacket for Women', 'description': 'The Trendy Winter Jacket keeps you warm and dry during winter adventures. Its water-resistant shell and insulating fleece lining protect you from the cold, while the stylish design and handy pockets add fashion and function. The perfect coat for women exploring the outdoors in comfort and style.'}}, {'_index': 'genai_search', '_id': '1772332f-facc-4f13-b229-2c25f7c360ca', '_score': 0.0020501765, '_source': {'image_url': 'https://raw.githubusercontent.com/YonghoChoi/genai-search/refs/heads/