# OpenSearch Service에 Model(Remote Connector) 배포하기

Opensearch에 연결합니다.

In [1]:
from opensearchpy import OpenSearch
import json

# OpenSearch 연결 설정
host = 'localhost'
port = 9200
auth = ('admin', 'TestUser2@')  # 초기 설정한 어드민 비밀번호 사용

aos_client = OpenSearch(
    hosts=[{'host': host, 'port': port}],
    http_auth=auth,
    use_ssl=True,
    verify_certs=False,
    ssl_show_warn=False,
)

# 임베딩 원격 모델 배포하기

모델 커넥터 생성 요청을 위한 Base URL을 설정합니다. 

In [2]:
base_url = "https://localhost:9200/_plugins/_ml"

원격 모델 커넥터를 생성합니다.

In [3]:
import os
from dotenv import dotenv_values
import requests
env_vars = dotenv_values('.env_api')

# OpenAI Embeddings - Text Connector 생성

connector_payload = {
    "name": "OpenAI Connector: embedding",
    "description": "The connector to OpenAI embedding model",
    "version": 1,
    "protocol": "http",
    "parameters": {
        "model": "text-embedding-3-small",
        "endpoint": "api.openai.com"
    },
    "credential": {
        "openAI_key": env_vars.get("OPENAI_API_KEY")
    },
    "actions": [
        {
            "action_type": "predict",
            "method": "POST",
            "url": "https://${parameters.endpoint}/v1/embeddings",
            "headers": {
                "Authorization": "Bearer ${credential.openAI_key}",
                "Content-Type": "application/json"
            },
            "request_body": "{ \"input\": ${parameters.input}, \"model\": \"${parameters.model}\" }",
            "pre_process_function": "connector.pre_process.openai.embedding",
            "post_process_function": "connector.post_process.openai.embedding"
        }
    ]
}

# Send the request
response = requests.post(
    base_url + "/connectors/_create",
    auth=auth,
    json=connector_payload,
    verify=False
)

# Print the response
print(response.text)

{"connector_id":"oKEhfpEBhL3bNK7sh4DV"}


In [4]:
obj = json.loads(response.text)
connector_id = obj["connector_id"]
connector_id

'oKEhfpEBhL3bNK7sh4DV'

원격 모델을 위한 모델 그룹을 생성합니다.

In [5]:
register_model_group_payload = {
    "name": "OpenAI Models for text-embedding",
    "description": "This is a model group for OpenAI model",
}

response = requests.post(
    base_url + "/model_groups/_register",
    auth=auth,
    json=register_model_group_payload,
    verify=False
)

# Print the response
print(response.text)

{"model_group_id":"oaEhfpEBhL3bNK7skIBb","status":"CREATED"}


In [6]:
obj = json.loads(response.text)
try:
    model_group_id = obj["model_group_id"]
except:
    model_group_id = obj['error']['reason'].split(': ')[1][:-1]
print(model_group_id)

oaEhfpEBhL3bNK7skIBb


앞서 생성한 커넥터와 모델 그룹을 사용하여 모델을 등록합니다.

In [7]:
register_model_payload = {
    "name": "OpenAI text embedding model",
    "function_name": "remote",
    "model_group_id": model_group_id,
    "description": "This is OpenAI text-embedding-3-small - Text model",
    "connector_id": connector_id,
}

# Send the request
response = requests.post(
    base_url + "/models/_register",
    auth=auth,
    json=register_model_payload,
    verify=False
)

# Print the response
print(response.text)

{"task_id":"oqEhfpEBhL3bNK7smICy","status":"CREATED","model_id":"o6EhfpEBhL3bNK7smYBX"}


In [8]:
obj = json.loads(response.text)
model_id = obj["model_id"]
print(model_id)

o6EhfpEBhL3bNK7smYBX


앞서 등록한 모델을 배포합니다.

In [9]:
deploy_model_payload = {}

# Send the request
response = requests.post(
    base_url + "/models/" + model_id + "/_deploy",
    auth=auth,
    json=deploy_model_payload,
    verify=False
)

# Print the response
print(response.text)

{"task_id":"pKEhfpEBhL3bNK7snoC9","task_type":"DEPLOY_MODEL","status":"COMPLETED"}


모델이 잘 배포되었는지 테스트해봅니다.

In [10]:
predict_model_payload = {"parameters": {"input": ["안녕하세요. 우리는 롯데헬스케어입니다. 비타민C는 건강에 좋습니다"]}}

# Send the request
response = requests.post(
    base_url + "/models/" + model_id + "/_predict",
    auth=auth,
    json=predict_model_payload,
    verify=False
)
response.json()

{'inference_results': [{'output': [{'name': 'sentence_embedding',
     'data_type': 'FLOAT32',
     'shape': [1536],
     'data': [0.008632458,
      0.01143533,
      -0.060651045,
      0.028418012,
      0.014841598,
      0.013323376,
      -0.04324988,
      0.049712054,
      -0.04348345,
      -0.042120945,
      -0.045585606,
      0.011678635,
      3.423376e-05,
      -0.011756493,
      0.027775686,
      -0.01799483,
      0.037507884,
      0.033050537,
      -0.008374554,
      0.01374186,
      0.052281357,
      -0.023960667,
      0.049128126,
      -0.017761257,
      0.0040558926,
      -0.0022067754,
      0.023882808,
      -0.032018926,
      0.017878044,
      -0.029060336,
      0.0012724847,
      -0.020048324,
      0.0010614176,
      0.010666487,
      -0.0236687,
      0.0019951002,
      0.034724474,
      -0.011172561,
      0.019260015,
      -0.014354989,
      -0.0066519557,
      -0.0010486441,
      0.025731925,
      -0.011396402,
      0.021352438,

In [11]:
# Print the response
len(response.json()["inference_results"][0]["output"][0]["data"])

1536

이후 테스트에 사용하기 위해 모델 아이디를 저장합니다.

In [12]:
%store model_id

Stored 'model_id' (str)


In [13]:
model_id

'o6EhfpEBhL3bNK7smYBX'