In [1]:
import os
import requests
from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
from dotenv import load_dotenv

load_dotenv()

# トークンを取得する関数
def get_token():
    auth_url = os.getenv('AICORE_AUTH_URL')
    client_id = os.getenv('AICORE_CLIENT_ID')
    client_secret = os.getenv('AICORE_CLIENT_SECRET')

    token_url = f"{auth_url}/oauth/token"
    data = {
        'grant_type': 'client_credentials',
        'client_id': client_id,
        'client_secret': client_secret
    }

    response = requests.post(token_url, data=data)
    response.raise_for_status()

    token = response.json().get('access_token')
    if not token:
        raise ValueError("トークンが取得できませんでした。")

    return token

# トークンを取得
token = get_token()
print(f'Token: {token}')

# プロキシクライアントの取得
proxy_client = get_proxy_client('gen-ai-hub', token=token)

# デプロイメント一覧を取得
deployments_list = proxy_client.deployments
print("model_name, deployment_id, config_name")
for deployment in deployments_list:
    print(f"{deployment.model_name}, {deployment.deployment_id}, {deployment.config_name}")

Token: eyJhbGciOiJSUzI1NiIsImprdSI6Imh0dHBzOi8vZXUxLmF1dGhlbnRpY2F0aW9uLmV1MTAuaGFuYS5vbmRlbWFuZC5jb20vdG9rZW5fa2V5cyIsImtpZCI6Im15LWp3dC1rZXktMSIsInR5cCI6IkpXVCIsImppZCI6ICJ4RW5TOU1rQko5V1F6N2JPQlRxWHJDM3pOd1B2aFhPa0cyZWZzT2FTMEJBPSJ9.eyJqdGkiOiIxNjkxY2U3ZjcyODM0Y2EyOThjYjQ2OTM0MmE2OTlmNSIsImV4dF9hdHRyIjp7ImVuaGFuY2VyIjoiWFNVQUEiLCJzdWJhY2NvdW50aWQiOiIxMDgwNmFkZC0zZWZkLTQ5NWMtODYxYi1iNzU5ZTQ4YjhiMTYiLCJ6ZG4iOiJldTEiLCJzZXJ2aWNlaW5zdGFuY2VpZCI6IjFmZGUwNWYwLWU0YWItNGJlMC1iMWU0LWJkZjA4ZjQ5OTY3OSJ9LCJzdWIiOiJzYi0xZmRlMDVmMC1lNGFiLTRiZTAtYjFlNC1iZGYwOGY0OTk2NzkhYjQ3OTB8YWljb3JlIWI1NDAiLCJhdXRob3JpdGllcyI6WyJhaWNvcmUhYjU0MC5kb2NrZXJyZWdpc3RyeXNlY3JldC5jcmVkZW50aWFscy51cGRhdGUiLCJhaWNvcmUhYjU0MC5tbGZjb25uZWN0aW9uLmNyZWRlbnRpYWxzLmNyZWF0ZSIsImFpY29yZSFiNTQwLmRvY2tlcnJlZ2lzdHJ5c2VjcmV0LmNyZWRlbnRpYWxzLmRlbGV0ZSIsImFpY29yZSFiNTQwLnJlcG9zaXRvcmllcy5yZWFkIiwiYWljb3JlIWI1NDAuZG9ja2VycmVnaXN0cnlzZWNyZXQuY3JlZGVudGlhbHMucmVhZCIsImFpY29yZSFiNTQwLnNjZW5hcmlvcy5leGVjdXRpb25zY2hlZHVsZXMuY3JlYXRlIiwiYWlj

In [None]:
from gen_ai_hub.proxy.langchain.openai import OpenAIEmbeddings

# 取得したデプロイメントIDを使用
deployment_id = "d8b1e5eb7341b1f3"

# embedding_modelの設定
embedding_model = OpenAIEmbeddings(deployment_id=deployment_id, proxy_client=proxy_client)

In [None]:
import os
import json
from hdbcli import dbapi
from langchain_community.document_loaders import JSONLoader
from langchain.vectorstores import HanaDB
from langchain.text_splitter import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
from gen_ai_hub.proxy.langchain.openai import OpenAIEmbeddings

# 環境変数の読み込み
load_dotenv()

# 埋め込みモデルの設定
embedding_model = OpenAIEmbeddings(deployment_id=os.getenv('EMBEDDING_DEPLOYMENT_ID'))

def get_embedding(input_text):
    return embedding_model.embed_query(input_text)

# データベースの接続と初期化
def connect_database():
    try:
        connection = dbapi.connect(
            address=os.getenv("HANA_DB_ADDRESS"),
            port=int(os.getenv("HANA_DB_PORT")),  # ポートは整数に変換
            user=os.getenv("HANA_DB_USER"),
            password=os.getenv("HANA_DB_PASSWORD"),
            autocommit=True,
            sslValidateCertificate=False,
        )
        db = HanaDB(
            connection=connection,
            embedding=embedding_model,
            table_name="FILE_EMBEDDINGS",
        )
        db.delete(filter={})  # データベースの初期化
        return connection, db
    except Exception as e:
        print(f"データベース接続エラー: {e}")
        raise

# JSONファイルを処理してDBに格納
def process_json_files(json_folder, db):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)  # チャンクサイズとオーバーラップを設定

    for file_name in os.listdir(json_folder):
        file_path = os.path.join(json_folder, file_name)
        # JSONファイルのメタデータを取得
        with open(file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
            metadata = data.get('metadata', {})
        if os.path.isdir(file_path):
            continue  # ディレクトリはスキップ
        loader = JSONLoader(file_path, jq_schema='.content', text_content=False)
        documents = loader.load()
        for doc in documents:
            if doc.page_content:  # 空でないことを確認
                chunks = text_splitter.split_text(doc.page_content)
                for i, chunk in enumerate(chunks):
                    chunk_doc = doc.copy()  # 元の文書オブジェクトをコピー
                    chunk_doc.page_content = chunk
                    embedding = get_embedding(chunk)
                    chunk_doc.metadata['embedding'] = embedding
                    chunk_doc.metadata['source_filename'] = metadata['filename']
                    # chunk_doc.metadata['chunk_id'] = f"{metadata['filename']}_chunk_{i+1}"
                    db.add_documents([chunk_doc])
                print(f"Processed and added JSON file: {file_path}")
            else:
                print(f"Skipping empty document: {file_path}")

# メインフロー
def main():
    json_folder = "../data/all_JSONs"
    connection, db = connect_database()
    process_json_files(json_folder, db)
    connection.close()

# 実行部分
if __name__ == "__main__":
    main()
