# Configuration

In [1]:
%load_ext autoreload
%autoreload 2
%pip install ipywidgets

Note: you may need to restart the kernel to use updated packages.


## 1. AWS Configuration

In [3]:
import boto3
from sagemaker import get_execution_role
role = get_execution_role()
session = boto3.Session()

In [None]:
os.environ["AWS_DEFAULT_REGION"] = boto3.Session().region_name #us-west-2
os.environ["AWS_PROFILE"] = session #role

## 2. Bedrock Configuration

In [4]:
import os
import requests
from pprint import pprint
from utils.bedrock import BedrockClient

In [6]:
# 결과 출력
print("\n== All FM lists ==")
all_models = BedrockClient.get_list_fm_models()
pprint(all_models)


== All FM lists ==


['amazon.titan-tg1-large',
 'amazon.titan-embed-g1-text-02',
 'amazon.titan-text-lite-v1:0:4k',
 'amazon.titan-text-lite-v1',
 'amazon.titan-text-express-v1:0:8k',
 'amazon.titan-text-express-v1',
 'amazon.nova-pro-v1:0',
 'amazon.nova-lite-v1:0',
 'amazon.nova-micro-v1:0',
 'amazon.titan-embed-text-v1:2:8k',
 'amazon.titan-embed-text-v1',
 'amazon.titan-embed-text-v2:0',
 'amazon.titan-embed-image-v1:0',
 'amazon.titan-embed-image-v1',
 'amazon.titan-image-generator-v1:0',
 'amazon.titan-image-generator-v1',
 'amazon.titan-image-generator-v2:0',
 'amazon.rerank-v1:0',
 'stability.stable-diffusion-xl-v1:0',
 'stability.stable-diffusion-xl-v1',
 'stability.sd3-large-v1:0',
 'stability.sd3-5-large-v1:0',
 'stability.stable-image-core-v1:0',
 'stability.stable-image-core-v1:1',
 'stability.stable-image-ultra-v1:0',
 'stability.stable-image-ultra-v1:1',
 'anthropic.claude-3-5-sonnet-20241022-v2:0:18k',
 'anthropic.claude-3-5-sonnet-20241022-v2:0:51k',
 'anthropic.claude-3-5-sonnet-20241022

In [11]:
# # Bedrock 클라이언트 생성 함수
# def get_bedrock_client(region=None):
#     session = boto3.Session()
    
#     config = Config(
#         retries={
#             'max_attempts': 10,
#             'mode': 'standard'
#         }
#     )
    
#     return session.client(
#         service_name='bedrock',
#         region_name=region,
#         config=config
#     )

# # Bedrock 클라이언트 생성
# boto3_bedrock = get_bedrock_client(
#     region=boto3.Session().region_name
# )

# # Bedrock 모델 리스트 가져오기 함수
# def get_list_fm_models(verbose=False):
#     response = boto3_bedrock.list_foundation_models()
#     models = response['modelSummaries']
    
#     if verbose:
#         return models
#     else:
#         return [model['modelId'] for model in models]

# # 결과 출력
# print(colored("\n== All FM lists ==", "green"))
# all_models = get_list_fm_models(verbose=False)
# pprint(all_models)

[32m
== All FM lists ==[0m
['amazon.titan-tg1-large',
 'amazon.titan-embed-g1-text-02',
 'amazon.titan-text-lite-v1:0:4k',
 'amazon.titan-text-lite-v1',
 'amazon.titan-text-express-v1:0:8k',
 'amazon.titan-text-express-v1',
 'amazon.nova-pro-v1:0',
 'amazon.nova-lite-v1:0',
 'amazon.nova-micro-v1:0',
 'amazon.titan-embed-text-v1:2:8k',
 'amazon.titan-embed-text-v1',
 'amazon.titan-embed-text-v2:0',
 'amazon.titan-embed-image-v1:0',
 'amazon.titan-embed-image-v1',
 'amazon.titan-image-generator-v1:0',
 'amazon.titan-image-generator-v1',
 'amazon.titan-image-generator-v2:0',
 'amazon.rerank-v1:0',
 'stability.stable-diffusion-xl-v1:0',
 'stability.stable-diffusion-xl-v1',
 'stability.sd3-large-v1:0',
 'stability.sd3-5-large-v1:0',
 'stability.stable-image-core-v1:0',
 'stability.stable-image-core-v1:1',
 'stability.stable-image-ultra-v1:0',
 'stability.stable-image-ultra-v1:1',
 'anthropic.claude-3-5-sonnet-20241022-v2:0:18k',
 'anthropic.claude-3-5-sonnet-20241022-v2:0:51k',
 'anthrop

In [3]:
os.environ["BEDROCK_MODEL_ID"] = ""
os.environ["BEDROCK_RETRIES"] = ""
os.environ["EMBED_MODEL_ID"] = ""

Collecting termcolor
  Downloading termcolor-2.5.0-py3-none-any.whl.metadata (6.1 kB)
Downloading termcolor-2.5.0-py3-none-any.whl (7.8 kB)
Installing collected packages: termcolor
Successfully installed termcolor-2.5.0
Note: you may need to restart the kernel to use updated packages.


## 3. Opensearch Configuration

### 3-1. Index Configuration

In [10]:
from utils.ssm import parameter_store
region=boto3.Session().region_name
pm = parameter_store(region)

In [11]:
index_name = "default_doc_index" 

pm.put_params(
    key="opensearch_index_name",
    value=f'{index_name}',
    overwrite=True,
    enc=False
)

Parameter stored successfully.


### 3-2. Define Index Schema

In [None]:
index_body = {
    'settings': {
        'analysis': {
            'analyzer': {
                'my_analyzer': {
                         'char_filter':['html_strip'],
                    'tokenizer': 'nori',
                    'filter': [
                        #'nori_number',
                        #'lowercase',
                        #'trim',
                        'my_nori_part_of_speech'
                    ],
                    'type': 'custom'
                }
            },
            'tokenizer': {
                'nori': {
                    'decompound_mode': 'mixed',
                    'discard_punctuation': 'true',
                    'type': 'nori_tokenizer'
                }
            },
            "filter": {
                "my_nori_part_of_speech": {
                    "type": "nori_part_of_speech",
                    "stoptags": [
                        "J", "XSV", "E", "IC","MAJ","NNB",
                        "SP", "SSC", "SSO",
                        "SC","SE","XSN","XSV",
                        "UNA","NA","VCP","VSV",
                        "VX"
                    ]
                }
            }
        },
        'index': {
            'knn': True,
            'knn.space_type': 'cosinesimil'  # Example space type
        }
    },
    'mappings': {
        'properties': {
            'metadata': {
                'properties': {
                    'source': {'type': 'keyword'},
                    'page_number': {'type':'long'},
                    'category': {'type':'text'},
                    'file_directory': {'type':'text'},
                    'last_modified': {'type': 'text'},
                    'type': {'type': 'keyword'},
                    'image_base64': {'type':'text'},
                    'origin_image': {'type':'text'},
                    'origin_table': {'type':'text'},
                }
            },
            'text': {
                'analyzer': 'my_analyzer',
                'search_analyzer': 'my_analyzer',
                'type': 'text'
            },
            'vector_field': {
                'type': 'knn_vector',
                'dimension': f"{dimension}" # Replace with your vector dimension
            }
        }
    }
}


In [None]:
### 3-3. Get Opensearch Domain Information

In [15]:
opensearch_domain_endpoint = pm.get_params(
    key="opensearch_domain_endpoint",
    enc=False
)

secrets_manager = boto3.client('secretsmanager')

response = secrets_manager.get_secret_value(
    SecretId='opensearch_user_password'
)

secrets_string = response.get('SecretString')
secrets_dict = eval(secrets_string)

opensearch_user_id = secrets_dict['es.net.http.auth.user']
opensearch_user_password = secrets_dict['pwkey']

http_auth = (opensearch_user_id, opensearch_user_password)

result = pm.parse_opensearch_endpoint(opensearch_domain_endpoint)
prefix, domain_name = result

In [None]:
os.environ["OPENSEARCH_PREFIX"] = prefix
os.environ["OPENSEARCH_DOMAIN_NAME"] = domain_name
os.environ["OPENSEARCH_DOCUMENT_NAME"] = ""
os.environ["OPENSEARCH_USER"] = opensearch_user_id
os.environ["OPENSEARCH_PASSWORD"] = opensearch_user_password

### 3-4. Create Opensearch Client

In [16]:
from utils.opensearch import opensearch_utils

aws_region = os.environ.get("AWS_DEFAULT_REGION", None)

os_client = opensearch_utils.create_aws_opensearch_client(
    aws_region,
    opensearch_domain_endpoint,
    http_auth
)

### 3-5. Create Index 

In [None]:
index_exists = opensearch_utils.check_if_index_exists(
    os_client,
    index_name
)

if index_exists:
    opensearch_utils.delete_index(
        os_client,
        index_name
    )

opensearch_utils.create_index(os_client, index_name, index_body)
index_info = os_client.indices.get(index=index_name)
print("Index is created")
pprint(index_info)

## 4. RAG Application Configuration

In [None]:
# Reranker Configuration
os.environ["RERANKER_AWS_REGION"] = ""
os.environ["RERANKER_AWS_PROFILE"] = ""
os.environ["RERANKER_MODEL_ID"] = ""

# Rank Fusion Configuration
os.environ["RERANK_TOP_K"] = ""
os.environ["HYBRID_SCORE_FILTER"] = ""
os.environ["FINAL_RERANKED_RESULTS"] = ""
os.environ["KNN_WEIGHT"] = ""

# Application Configuration
os.environ["RATE_LIMIT_DELAY"] = "60"  # API 요청 간 지연 시간(초) (기본값: 60)