# Configure AI Core

Let's first connect to the AI Core client using credentials stored in `.env`. This file should contain the following credentials:
```
AICORE_AUTH_URL = "FROM AI CORE SERVICE KEY"
AICORE_CLIENT_ID = "FROM AI CORE SERVICE KEY"
AICORE_CLIENT_SECRET = "FROM AI CORE SERVICE KEY"
AICORE_RESOURCE_GROUP = "serving-framework-benchmarking"
AICORE_BASE_URL = "FROM AI CORE SERVICE KEY"
DOCKER_PAT = "DOCKER PERSONAL ACCESS TOKEN"
GH_PAT = "GITHUB PERSONAL ACCESS TOKEN"
```

In [2]:
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [3]:
import os
# Load Library
from ai_core_sdk.ai_core_v2_client import AICoreV2Client

# Create Connection
ai_core_client = AICoreV2Client(
    base_url = os.environ["AICORE_BASE_URL"],
    auth_url=  os.environ["AICORE_AUTH_URL"], 
    client_id = os.environ["AICORE_CLIENT_ID"],
    client_secret = os.environ["AICORE_CLIENT_SECRET"],
    resource_group=os.environ['AICORE_RESOURCE_GROUP']
)

Now, let's store Docker credentials as a secret in AI Core.

In [6]:
response = ai_core_client.docker_registry_secrets.create(
    name = "docker-secret-aman",
    data = {
        ".dockerconfigjson": f'{{\"auths\":{{\"docker.io/amanichopra\":{{\"username\":\"amanichopra\",\"password\":\"{os.environ["DOCKER_PAT"]}\"}}}}}}'
    }
)
print(response.__dict__)

{'message': 'Secret exists'}


Now, let's onboard a GitHub repo onto AI Core.

In [None]:
response = ai_core_client.repositories.create(
    name = "llm-serving-benchmarking-gh-repo",
    url = "https://github.com/amanichopra/llm-serving-framework-benchmarking.git",
    username = "amanichopra",
    password = os.environ['GH_PAT']
)
print(response.__dict__)

# Serving via Ollama

Let's create an AI Core application for to serve via Ollama.

In [10]:
def create_ai_core_application(ai_core_client, app_name, path_to_serving_template):
    response = ai_core_client.applications.create(
    application_name = app_name,
    repository_url = "https://github.com/amanichopra/llm-serving-framework-benchmarking.git",
    path = path_to_serving_template,
    revision = "HEAD"
    )
    return response

In [None]:
response = create_ai_core_application(ai_core_client, 'ollama', './ollama/')
print(response.__dict__)

{'id': 'ollama', 'message': 'Application has been successfully created.'}


List all applications to ensure the new app has been created.

In [20]:
response = ai_core_client.applications.query()

for app in response.resources:
    print(app.__dict__)

{'path': 'pipelines', 'revision': 'HEAD', 'repository_url': 'https://github.tools.sap/D070429/ai_core_information_extraction', 'application_name': 'ai-core-information-extraction'}
{'path': 'LearningScenarios', 'revision': 'HEAD', 'repository_url': 'https://github.tools.sap/I589259/aicore-pipelines', 'application_name': 'andreas-learning-scenarios-app'}
{'path': 'LearningScenarios', 'revision': 'HEAD', 'repository_url': 'https://github.tools.sap/D070429/aicore-pipelines', 'application_name': 'christians-learning-scenarios-app'}
{'path': 'LearningScenarios', 'revision': 'HEAD', 'repository_url': 'https://github.tools.sap/I522484/aicore-pipelines', 'application_name': 'christoph-learning'}
{'path': 'templates', 'revision': 'HEAD', 'repository_url': 'https://github.com/fyx99/CI-CD-with-AI-Core#main', 'application_name': 'cicd-felix-application'}
{'path': 'templates', 'revision': 'HEAD', 'repository_url': 'https://github.com/fyx99/Cloud-Connector-with-AI-Core', 'application_name': 'cloud-c

Ensure the application is synced to the GitHub workflow template.

In [42]:
print(ai_core_client.applications.refresh(application_name='ollama').message)

A refresh of the application has been scheduled.


In [43]:
# Get application status
response = ai_core_client.applications.get_status(application_name='ollama')
print(response.message)

for sync_status in response.sync_ressources_status:
    print(sync_status.__dict__)

successfully synced (all tasks run)
{'name': 'ollama-serving-template', 'kind': 'ServingTemplate', 'status': 'Synced', 'message': 'servingtemplate.ai.sap.com/ollama-serving-template configured'}


Ensure you can see the executable and scenario created in AI Core, as defined in the template.

In [None]:
# List available executables
response = ai_core_client.executable.query(
    scenario_id = "ollama"
)

for executable in response.resources:
    print(executable.__dict__)


{'id': 'custom-serving-template', 'scenario_id': 'custom-serve', 'version_id': '0.0.1', 'name': 'custom-serve', 'description': 'Serve GPT-OSS-20B with custom server', 'deployable': True, 'parameters': [<ai_api_client_sdk.models.parameter.Parameter object at 0x104de1be0>, <ai_api_client_sdk.models.parameter.Parameter object at 0x103fb6350>], 'input_artifacts': [], 'output_artifacts': None, 'labels': None, 'created_at': datetime.datetime(2025, 8, 18, 22, 38, 16, tzinfo=datetime.timezone.utc), 'modified_at': datetime.datetime(2025, 8, 18, 22, 38, 16, tzinfo=datetime.timezone.utc)}


Create a configuration for the deployment

In [4]:
def create_configuration(ai_core_client, config_name, scenario_id, executable_id):
    response = ai_core_client.configuration.create(
        name = config_name,
        scenario_id = scenario_id,
        executable_id = executable_id              
    )    
    return response

In [None]:
# Create configuration
response = create_configuration(ai_core_client, 'ollama-serve', 'ollama', 'ollama-serving-template')
configuration = response.__dict__
configuration

{'id': '9407d8fe-55a1-4b54-aae2-e43bb2894deb',
 'message': 'Configuration created'}

Create a deployment.

In [14]:
response = ai_core_client.deployment.create(
    configuration_id=configuration['id'],
)
deployment = response.__dict__
deployment

{'id': 'dfbad28c79915674',
 'message': 'Deployment scheduled.',
 'deployment_url': '',
 'status': <Status.UNKNOWN: 'UNKNOWN'>,
 'ttl': None}

In [15]:
response = ai_core_client.deployment.get(deployment_id=deployment['id'])
response.__dict__

{'id': 'dfbad28c79915674',
 'configuration_id': '9407d8fe-55a1-4b54-aae2-e43bb2894deb',
 'configuration_name': 'ollama-serve',
 'scenario_id': 'ollama',
 'status': <Status.UNKNOWN: 'UNKNOWN'>,
 'target_status': <TargetStatus.RUNNING: 'RUNNING'>,
 'created_at': datetime.datetime(2025, 8, 12, 15, 2, 16, tzinfo=datetime.timezone.utc),
 'modified_at': datetime.datetime(2025, 8, 12, 15, 2, 16, tzinfo=datetime.timezone.utc),
 'status_message': None,
 'status_details': None,
 'submission_time': None,
 'start_time': None,
 'completion_time': None,
 'deployment_url': '',
 'last_operation': None,
 'latest_running_configuration_id': None,
 'details': None,
 'ttl': None}

Pull gpt-oss. Can also do this via API client like Postman.

In [17]:
import requests

token = ai_core_client.rest_client.get_token()
headers = {
        "Authorization": token,
        'ai-resource-group': os.environ['AICORE_RESOURCE_GROUP'],
        "Content-Type": "application/json"}

model = "gpt-oss:20b"
deployment_obj = ai_core_client.deployment.get(deployment_id=deployment['id'])
inference_base_url = f"{deployment_obj.deployment_url}"

# pull the model from ollama model repository
endpoint = f"{inference_base_url}/v1/api/pull"

# let's pull the mistral model from ollama
json_data = { "name": model}

response = requests.post(endpoint, headers=headers, json=json_data)
response.__dict__

{'_content': b'{"status":"pulling manifest"}\n{"status":"pulling b112e727c6f1","digest":"sha256:b112e727c6f18875636c56a779790a590d705aec9e1c0eb5a97d51fc2a778583","total":13780154624,"completed":13780154624}\n{"status":"pulling fa6710a93d78","digest":"sha256:fa6710a93d78da62641e192361344be7a8c0a1c3737f139cf89f20ce1626b99c","total":7240,"completed":7240}\n{"status":"pulling f60356777647","digest":"sha256:f60356777647e927149cbd4c0ec1314a90caba9400ad205ddc4ce47ed001c2d6","total":11353,"completed":11353}\n{"status":"pulling d8ba2f9a17b3","digest":"sha256:d8ba2f9a17b3bbdeb5690efaa409b3fcb0b56296a777c7a69c78aa33bbddf182","total":18,"completed":18}\n{"status":"pulling 55c108d8e936","digest":"sha256:55c108d8e93662a22dcbed5acaa0374c7d740c6aa4e8b7eee7ae77ed7dc72a25","total":489,"completed":489}\n{"status":"verifying sha256 digest"}\n{"status":"writing manifest"}\n{"status":"success"}\n',
 '_content_consumed': True,
 '_next': None,
 'status_code': 200,
 'headers': {'content-type': 'application/x-n

# Serving YOLO for Pose Estimation 

Create AI core application for serving 

In [11]:
response = create_ai_core_application(ai_core_client, 'pose-estimation-server', './pose-estimation/')
print(response.__dict__)

{'id': 'pose-estimation-server', 'message': 'Application has been successfully created.'}


In [23]:
print(ai_core_client.applications.refresh(application_name='pose-estimation-server').message)

A refresh of the application has been scheduled.


In [24]:
# Get application status
response = ai_core_client.applications.get_status(application_name='pose-estimation-server')
print(response.message)

for sync_status in response.sync_ressources_status:
    print(sync_status.__dict__)

successfully synced (all tasks run)
{'name': 'pose-estimation-serving-template', 'kind': 'ServingTemplate', 'status': 'Synced', 'message': 'servingtemplate.ai.sap.com/pose-estimation-serving-template created'}


Create model configuration.

In [59]:
response = create_configuration(ai_core_client, 'yolo', 'yolo', 'pose-estimation-serving-template')
configuration = response.__dict__
configuration

{'id': '37be03df-d27e-4d63-b426-f71e65c640f3',
 'message': 'Configuration created'}

In [60]:
response = ai_core_client.deployment.create(
    configuration_id=configuration['id'],
)
deployment = response.__dict__
deployment

{'id': 'df0b6311b41d4db0',
 'message': 'Deployment scheduled.',
 'deployment_url': '',
 'status': <Status.UNKNOWN: 'UNKNOWN'>,
 'ttl': None}

In [63]:
response = ai_core_client.deployment.get(deployment_id=deployment['id'])
response.__dict__

{'id': 'df0b6311b41d4db0',
 'configuration_id': '37be03df-d27e-4d63-b426-f71e65c640f3',
 'configuration_name': 'yolo',
 'scenario_id': 'yolo',
 'status': <Status.RUNNING: 'RUNNING'>,
 'target_status': <TargetStatus.RUNNING: 'RUNNING'>,
 'created_at': datetime.datetime(2025, 8, 26, 2, 19, 31, tzinfo=datetime.timezone.utc),
 'modified_at': datetime.datetime(2025, 8, 26, 2, 48, 34, tzinfo=datetime.timezone.utc),
 'status_message': None,
 'status_details': None,
 'submission_time': datetime.datetime(2025, 8, 26, 2, 21, 35, tzinfo=datetime.timezone.utc),
 'start_time': datetime.datetime(2025, 8, 26, 2, 30, 51, tzinfo=datetime.timezone.utc),
 'completion_time': None,
 'deployment_url': 'https://api.ai.internalprod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/df0b6311b41d4db0',
 'last_operation': <Operation.CREATE: 'CREATE'>,
 'latest_running_configuration_id': '37be03df-d27e-4d63-b426-f71e65c640f3',
 'details': {'resources': {'backend_details': {'predictor': {'resource_plan

Prepare image for sending requests.

In [4]:
import base64

with open("pose_estimation_yolo/sample_images/cami.jpg", "rb") as f:
    img_bytes = f.read()

img_b64 = base64.b64encode(img_bytes).decode("utf-8")
img_b64

'/9j/4QDoRXhpZgAATU0AKgAAAAgABgESAAMAAAABAAEAAAEaAAUAAAABAAAAVgEbAAUAAAABAAAAXgEoAAMAAAABAAIAAAITAAMAAAABAAEAAIdpAAQAAAABAAAAZgAAAAAAAACQAAAAAQAAAJAAAAABAAiQAAAHAAAABDAyMjGRAQAHAAAABAECAwCShgAHAAAAEgAAAMygAAAHAAAABDAxMDCgAQADAAAAAQABAACgAgAEAAAAAQAABkCgAwAEAAAAAQAAB1KkBgADAAAAAQAAAAAAAAAAQVNDSUkAAABTY3JlZW5zaG90AAD/4g/wSUNDX1BST0ZJTEUAAQEAAA/gYXBwbAIQAABtbnRyUkdCIFhZWiAH6QAIABUAFgAQADdhY3NwQVBQTAAAAABBUFBMAAAAAAAAAAAAAAAAAAAAAAAA9tYAAQAAAADTLWFwcGwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABFkZXNjAAABUAAAAGJkc2NtAAABtAAABLxjcHJ0AAAGcAAAACN3dHB0AAAGlAAAABRyWFlaAAAGqAAAABRnWFlaAAAGvAAAABRiWFlaAAAG0AAAABRyVFJDAAAG5AAACAxhYXJnAAAO8AAAACB2Y2d0AAAPEAAAADBuZGluAAAPQAAAAD5tbW9kAAAPgAAAACh2Y2dwAAAPqAAAADhiVFJDAAAG5AAACAxnVFJDAAAG5AAACAxhYWJnAAAO8AAAACBhYWdnAAAO8AAAACBkZXNjAAAAAAAAAAhEaXNwbGF5AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAbWx1YwAAAAAAAAAnAAAADGhySFIAAAAUAAAB5GtvS1IAAAAMAAAB+G5iTk8AAAASAAACBGlkAAAAAAA