In [1]:
import json
from ipywidgets import widgets
import os
import requests
import llm_commons.proxy.base

llm_commons.proxy.base.proxy_version = 'aicore'

In [3]:
with open('irpa-r570-genaixl-ka-sk.txt') as f:
    sk = json.load(f)

In [4]:
resource_group = widgets.Text(
    value='47d7157c-af7b-4416-816c-5c0871523092', # resource group
    placeholder='Resource group of deployments',
    description='',
    disabled=False
)

resource_group

Text(value='47d7157c-af7b-4416-816c-5c0871523092', placeholder='Resource group of deployments')

In [7]:
os.environ['AICORE_LLM_AUTH_URL'] = sk['url']+"/oauth/token"
os.environ['AICORE_LLM_CLIENT_ID'] = sk['clientid']
os.environ['AICORE_LLM_CLIENT_SECRET'] = sk['clientsecret']
os.environ['AICORE_LLM_API_BASE'] = sk["serviceurls"]["AI_API_URL"]+ "/v2"
os.environ['AICORE_LLM_RESOURCE_GROUP'] = resource_group.value
os.environ['LLM_COMMONS_PROXY'] = 'aicore'

llm_commons.proxy.resource_group = os.environ['AICORE_LLM_RESOURCE_GROUP']
llm_commons.proxy.api_base = os.environ['AICORE_LLM_API_BASE']
llm_commons.proxy.auth_url = os.environ['AICORE_LLM_AUTH_URL']
llm_commons.proxy.client_id = os.environ['AICORE_LLM_CLIENT_ID']
llm_commons.proxy.client_secret = os.environ['AICORE_LLM_CLIENT_SECRET']

In [8]:
response = requests.post(
        f'{os.environ["AICORE_LLM_AUTH_URL"]}/oauth/token',
        data={"grant_type": "client_credentials"},
        auth=(os.environ['AICORE_LLM_CLIENT_ID'], os.environ['AICORE_LLM_CLIENT_SECRET']),
        timeout=8000,
)
auth_token = response.json()["access_token"]

In [9]:
## Copy deployment URL from AILaunchPad or Wiki page shared by Usecase team
deployment_url= "https://api.ai.prod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d026d09d4fbbad63"

In [10]:
# Preparing the input for inference
test_input = {
    "model" : "text-embedding-ada-002",
    "input" : ["test embedding model"]
}

endpoint = f"{deployment_url}/embeddings?api-version=2023-05-15" # endpoint implemented in serving engine
print(endpoint)
headers = {"Authorization": f"Bearer {auth_token}",
           'ai-resource-group': os.environ['AICORE_LLM_RESOURCE_GROUP'],
           "Content-Type": "application/json"}
response = requests.post(endpoint, headers=headers, json=test_input)

print('Inference result:', response)
print('Inference result:', response.text)

https://api.ai.prod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d026d09d4fbbad63/embeddings?api-version=2023-05-15
Inference result: <Response [200]>
Inference result: {"data":[{"embedding":[-0.029155683,-0.002209082,-0.010131669,-0.01715452,0.008283183,0.015250019,0.006046093,-0.009004373,-0.000119578384,-0.026901089,-0.008101135,0.02929572,-0.0055209547,-0.012001162,-0.016230276,0.0071558855,0.012967417,0.009774576,0.013891661,-0.010341725,-0.000914616,0.0026274424,0.005867546,-0.006630747,0.0011640567,-0.00089623616,0.023246126,-0.029995903,-0.010677814,-0.006424193,-0.005111347,-0.0050343266,-0.031368267,-0.031340256,0.006158123,-0.0057065035,0.010775839,-0.012974419,0.03475716,-0.007688026,0.010719825,0.018302822,0.0053949216,-0.01688845,0.003317124,0.008010111,0.0036479612,-0.011420009,-0.009739567,-0.0016944464,0.0050868406,0.00892035,-0.0047682566,0.0031438284,-0.012799373,0.022685979,0.0015351545,-0.005555964,0.0017102007,-0.013569576,-0.008913348,0.00441816

Chat completion with API

In [11]:
## Copy deployment URL from AILaunchPad or Wiki page shared by Usecase team
deployment_url= "https://api.ai.prod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d13fd219506b28f1"

In [12]:
# Preparing the input for inference
test_input = {
    "model" : "gpt-4",
    "messages" : [{ "content": "who can benefit from OpenAI format?","role": "user"}]
}


endpoint = f"{deployment_url}/chat/completions?api-version=2023-05-15" # endpoint implemented in serving engine
print(endpoint)
headers = {"Authorization": f"Bearer {auth_token}",
           'ai-resource-group': resource_group.value,
           "Content-Type": "application/json"}
response = requests.post(endpoint, headers=headers, json=test_input)

print('Inference result:', response)
print('Inference result:', response.text)

https://api.ai.prod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d13fd219506b28f1/chat/completions?api-version=2023-05-15
Inference result: <Response [200]>
Inference result: {"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Many entities can benefit from the OpenAI format. They include:\n\n1. Researchers: The tool can aid in studying advanced AI algorithms and testing innovative ideas.\n\n2. AI developers: Using OpenAI, developers can help in building and applying advanced technology for their software.\n\n3. Organizations: Businesses can leverage OpenAI to automate tasks, enhance decision-making, improve products and services, and speed up operations.\n\n4. Society: OpenAI can address challenging societal issues like healthcare, education, the environment, and more.\n\n5. Governments: They can use AI to enhance public services, ensure security, and support decision-making processes.\n\n6. Consumers: By being incorporated in various products and ser