# OpenSearch Provisioning

Knowledge Base 로 사용할 OpenSearch 를 프로비져닝 합니다.

## 1. OpenSearch 도메인 생성

SageMaker JupyterLab에서 코드를 통해 OpenSearch Domain을 생성하는 경우, SageMaker Notebook IAM role에 OpenSearchFullAccess와 같은 권한이 필요합니다.

In [None]:
import boto3
import uuid
import botocore
import time
DEV = True # True일 경우 1-AZ without standby로 생성, False일 경우 3-AZ with standby. 워크샵 목적일 때는 지나친 과금/리소스 방지를 위해 True로 설정하는 것을 권장
VERSION = "2.11" # OpenSearch Version (예: 2.7 / 2.9 / 2.11 / 2.13)

opensearch_user_id = 'raguser'
opensearch_user_password = 'Passw0rd1!'

region = boto3.Session().region_name
account_id = boto3.client("sts").get_caller_identity()["Account"]
opensearch = boto3.client('opensearch', region)
rand_str = uuid.uuid4().hex[:8]
opensearch_domain_name = f'rag-hol-{rand_str}'

cluster_config_prod = {
    'InstanceCount': 3,
    'InstanceType': 'r6g.large.search',
    'ZoneAwarenessEnabled': True,
    'DedicatedMasterEnabled': True,
    'MultiAZWithStandbyEnabled': True,
    'DedicatedMasterType': 'r6g.large.search',
    'DedicatedMasterCount': 3
}

cluster_config_dev = {
    'InstanceCount': 1,
    'InstanceType': 'r6g.large.search',
    'ZoneAwarenessEnabled': False,
    'DedicatedMasterEnabled': False,
}


ebs_options = {
    'EBSEnabled': True,
    'VolumeType': 'gp3',
    'VolumeSize': 100,
}

advanced_security_options = {
    'Enabled': True,
    'InternalUserDatabaseEnabled': True,
    'MasterUserOptions': {
        'MasterUserName': opensearch_user_id,
        'MasterUserPassword': opensearch_user_password
    }
}

ap = f'{{\"Version\":\"2012-10-17\",\"Statement\":[{{\"Effect\":\"Allow\",\"Principal\":{{\"AWS\":\"*\"}},\"Action\":\"es:*\",\"Resource\":\"arn:aws:es:{region}:{account_id}:domain\/{opensearch_domain_name}\/*\"}}]}}'

if DEV:
    cluster_config = cluster_config_dev
else:
    cluster_config = cluster_config_prod
    
response = opensearch.create_domain(
    DomainName=opensearch_domain_name,
    EngineVersion=f'OpenSearch_{VERSION}',
    ClusterConfig=cluster_config,
    AccessPolicies=ap,
    EBSOptions=ebs_options,
    AdvancedSecurityOptions=advanced_security_options,
    NodeToNodeEncryptionOptions={'Enabled': True},
    EncryptionAtRestOptions={'Enabled': True},
    DomainEndpointOptions={'EnforceHTTPS': True}
)

## 2. OpenSearch 도메인 생성 완료 대기 (20분 소요)

In [None]:
%%time
def wait_for_domain_creation(domain_name):
    try:
        response = opensearch.describe_domain(
            DomainName=domain_name
        )
        # Every 60 seconds, check whether the domain is processing.
        while 'Endpoint' not in response['DomainStatus']:
            print('Creating domain...')
            time.sleep(60)
            response = opensearch.describe_domain(
                DomainName=domain_name)

        # Once we exit the loop, the domain is ready for ingestion.
        endpoint = response['DomainStatus']['Endpoint']
        print('Domain endpoint ready to receive data: ' + endpoint)
    except botocore.exceptions.ClientError as error:
        if error.response['Error']['Code'] == 'ResourceNotFoundException':
            print('Domain not found.')
        else:
            raise error

# OpenSearch 도메인 생성 - 약 20분 소요
wait_for_domain_creation(opensearch_domain_name)

## 3. OpenSearch 도메인 Endpoint 확인

In [None]:
response = opensearch.describe_domain(DomainName=opensearch_domain_name)
opensearch_domain_endpoint = f"https://{response['DomainStatus']['Endpoint']}"

# OpenSearch 도메인 Endpoint 확인
print(opensearch_domain_endpoint)

## 4. 필요한 변수 저장

In [None]:
# 다음 노트북에서 OpenSearch 연결 정보를 활용하기 위해 변수 저장
print(f"OpenSearch User Id: {opensearch_user_id}")
print(f"OpenSearch User Password: {opensearch_user_password}")
print(f"OpenSearch Domain Name: {opensearch_domain_name}")
print(f"OpenSearch Domain Endpoint: {opensearch_domain_endpoint}")
print()
%store opensearch_user_id opensearch_user_password opensearch_domain_name opensearch_domain_endpoint

## 4. (Option) 도메인 삭제

In [None]:
# import boto3
# import botocore

# opensearch = boto3.client('opensearch', region)


# # 삭제할 OpenSearch 도메인 이름을 콘솔에서 확인하고 입력합니다.
# domain_name = "rag-hol-aeb17f2f"


# try:
#     # OpenSearch 도메인 삭제
#     response = opensearch.delete_domain(
#         DomainName=domain_name
#     )
#     print(f"Deleting domain '{domain_name}'...")

#     # 도메인이 완전히 삭제될 때까지 기다립니다.
#     while True:
#         try:
#             describe_response = opensearch.describe_domain(DomainName=domain_name)
#             status = describe_response['DomainStatus']['Processing']
#             if not status:
#                 print(f"Domain '{domain_name}' has been deleted.")
#                 break
#             else:
#                 print(f"Waiting for domain '{domain_name}' to be deleted...")
#                 time.sleep(60)
#         except botocore.exceptions.ClientError as e:
#             if e.response['Error']['Code'] == 'ResourceNotFoundException':
#                 print(f"Domain '{domain_name}' has been deleted.")
#                 break
#             else:
#                 raise e

# except botocore.exceptions.ClientError as e:
#     print(f"Error: {e.response['Error']['Message']}")
    