# Use SAP AI Core to Train a Pose Estimation Nueral Network


## Step 1

Create a connection with AI Core. Use the `aicore_creds.json` provided to you and place it in the project working directory.

In [1]:
import os
import json

with open('aicore_creds.json') as f:
    ac_creds = json.load(f)
    
os.environ["AICORE_AUTH_URL"] = f"{ac_creds['url']}/oauth/token"
os.environ["AICORE_BASE_URL"] = ac_creds['serviceurls']['AI_API_URL']
os.environ["AICORE_CLIENT_ID"] = ac_creds['clientid']
os.environ["AICORE_CLIENT_SECRET"] = ac_creds['clientsecret']
os.environ["AICORE_RESOURCE_GROUP"] = "default"

In [2]:
# Load Library
from ai_core_sdk.ai_core_v2_client import AICoreV2Client

# Create Connection
ai_core_client = AICoreV2Client(
    base_url = os.environ["AICORE_BASE_URL"] + "/v2", # The present SAP AI Core API version is 2
    auth_url=  os.environ["AICORE_AUTH_URL"], 
    client_id = os.environ["AICORE_CLIENT_ID"],
    client_secret = os.environ["AICORE_CLIENT_SECRET"]
)
# no output is expected

## Step 2

Create a Docker account and generate a personal access token. Refer to [this](https://developers.sap.com/tutorials/ai-core-code.html#cf7b33ab-c455-47ee-a812-33a1ff587cf0) tutorial for steps. Also install Docker Desktop [here](https://www.docker.com/products/docker-desktop/).

## Step 3

Write the training code. You can find this is `train_movenet.py`, `model_utils.py`, and `preprocessing_utils.py`.

## Step 4

Define a `Dockerfile` and ensure it successfully builds using the make target `deploy-container-service-local`. Then connect Docker to your account via `docker login docker.io`. Now upload the image to the cloud using the make target `push-docker-image`.

## Step 5

Store your Docker credentials in SAP AI Core as a Docker registry secret.

In [3]:
with open('docker_creds.json') as f:
    docker_creds = json.load(f)

In [4]:
response = ai_core_client.docker_registry_secrets.create(
    name = "docker-secret",
    data = {
        ".dockerconfigjson": f'{{\"auths\":{{\"docker.io/amanichopra/aicore-train:tagname\":{{\"username\":\"amanichopra\",\"password\":\"{docker_creds["pat"]}\"}}}}}}'
    }
)

print(response.__dict__)

{'message': 'secret has been created'}


## Step 6

You need to onboard your Github Repo to AI Core.

In [5]:
with open('gh_creds.json') as f:
    gh_creds = json.load(f)

In [6]:
# on-board new GitHub repository
ai_core_client.repositories.create(
    name = "gh-repo",
    url = "https://github.com/amanichopra/sap-aicore-train.git",
    username = "amanichopra",
    password = gh_creds['pat']
)

<ai_core_sdk.models.base_models.Message at 0x113a55a30>

In [7]:
# check on-boarding status
response = ai_core_client.repositories.query()
#
for repository in response.resources:
#    print('Name:', repository.name)
    print('URL:', repository.url)
    print('Status:', repository.status)

URL: https://github.com/amanichopra/sap-aicore-train.git
Status: RepositoryStatus.COMPLETED


## Step 7

Create an application to sync the training workflow with AI Core.

In [20]:
ai_core_client.applications.create(
    application_name = "training-app",
    repository_url = "https://github.com/amanichopra/sap-aicore-train.git",
    path = "./",
    revision = "HEAD"
)

<ai_api_client_sdk.models.base_models.BasicResponse at 0x114012fc0>

In [21]:
response = ai_core_client.applications.query()

for app in response.resources:
    print(app.__dict__)

{'path': '.', 'revision': 'HEAD', 'repository_url': 'https://github.com/amanichopra/sap-aicore-train.git', 'application_name': 'training-app'}


## Step 8

Store S3 object store secrets in AI Core.

In [10]:
with open('s3_creds.json') as f:
    s3_creds = json.load(f)

In [11]:
# Create object Store secret
response = ai_core_client.object_store_secrets.create(
    name = "s3-secret", # identifier for this secret within your SAP AI Core
    path_prefix = "data", # path that we want to limit restrict this secret access to
    type = "S3",
    data = { # Dictionary of credentials of AWS
        "AWS_ACCESS_KEY_ID": s3_creds['access_key_id'],
        "AWS_SECRET_ACCESS_KEY": s3_creds['secret_access_key']
    },
    bucket = "hcp-c1a2d095-b523-400a-bf19-94eda5e8d109", # Edit this
    region = "eu-central-1", # Edit this
    endpoint = "s3-eu-central-1.amazonaws.com", # Edit this
    resource_group = "default" # object store secret are restricted within this resource group. you may change this when creating secret for another resource group.
)
print(response.__dict__)

{'message': 'secret has been created'}


In [12]:
# Create object Store secret
response = ai_core_client.object_store_secrets.create(
    name = "default", # identifier for this secret within your SAP AI Core
    path_prefix = "model", # path that we want to limit restrict this secret access to
    type = "S3",
    data = { # Dictionary of credentials of AWS
        "AWS_ACCESS_KEY_ID": s3_creds['access_key_id'],
        "AWS_SECRET_ACCESS_KEY": s3_creds['secret_access_key']
    },
    bucket = s3_creds['bucket'], # Edit this
    region = s3_creds['region'], # Edit this
    endpoint = s3_creds['host'], # Edit this
    resource_group = "default" # object store secret are restricted within this resource group. you may change this when creating secret for another resource group.
)
print(response.__dict__)

{'message': 'secret has been created'}


## Step 9

Register the dataset.

In [16]:
# Create Artifact
from ai_api_client_sdk.models.artifact import Artifact

response = ai_core_client.artifact.create(
    name = "pose_training_data", # Custom Non-unqiue identifier
    kind = Artifact.Kind.DATASET,
    url = "ai://s3-secret/", #
    scenario_id = "train-demo",
    description = "Pose embedding data",
    resource_group = "default" # required to restrict object store secret usage within a resource group
)

artifact = response.__dict__
artifact

AIAPIInvalidRequestException: Failed to post /artifacts: Invalid Request, Could not create Artifact due to invalid Scenario ID train-demo. Please check the Scenario ID. 
 Status Code: 400, Request ID:c9bbe234-0e4c-4375-8f26-bea88db9717f

## Step 10

Define your workflow using [Argo](https://argo-workflows.readthedocs.io/en/latest/workflow-templates/) workflow templates. This is defined at `train_pipeline.yaml`.


## Step 11

Check the sync status of your workflow. You should see a successful sync in the response.

In [None]:
print(ai_core_client.applications.refresh(application_name='training-app').message)
# Get application status
response = ai_core_client.applications.get_status(application_name='training-app')
print(response.message)

for sync_status in response.sync_ressources_status:
    print(sync_status.__dict__)

## Step 12

Ensure you can see the executable defined in the workflow template for this scenario.

In [17]:
# List available executables
response = ai_core_client.executable.query(
    scenario_id = "train-demo", resource_group='default'
)

for executable in response.resources:
    print(executable.__dict__)


## Step 13

Create a configuration for the training workflow.

In [18]:
# Create configuration
from ai_api_client_sdk.models.base_models import KeyValue
from ai_api_client_sdk.models.input_artifact_binding import InputArtifactBinding

response = ai_core_client.configuration.create(
    name = "pose-detection-training-configuration",
    scenario_id = "train-demo",
    executable_id = "aicore-train-pipeline",
    resource_group = "default",
    parameter_bindings=[KeyValue('DL_BATCH_SIZE', '32'),
                        KeyValue('ACTIVATION', 'tanh'),
                        KeyValue('DROPOUT', '0.95024310853758'),
                        KeyValue('LR', '0.01'),
                        KeyValue('OPTIMIZER', 'adam'),
                        KeyValue('EPOCHS', '10')],
    input_artifact_bindings=[InputArtifactBinding('pose_dataset', artifact['id'])]
                
)
configuration = response.__dict__
configuration


NameError: name 'artifact' is not defined

## Step 14

Create a an execution of the training workflow using the above created configuration.

In [None]:
# Create and start execution
response = ai_core_client.execution.create(
    configuration_id = configuration['id'],
    resource_group = 'default'
)

execution = response.__dict__
execution

In [None]:
# Get execution status from SAP AI Core
# execute this multiple times in interval of 30 seconds
response = ai_core_client.execution.get(
    execution_id = execution['id'], # Change this
    resource_group = 'default'
)

response.__dict__


In [None]:
# Get Execution Logs
response = ai_core_client.execution.query_logs(
    execution_id = execution['id'],
    resource_group = 'default'
)

for log in response.data.result:
    print(log.__dict__)


## Step 15

Run multiple executions for hyperparameter tuning.

In [51]:
def run_execution(bs, act, dropout, lr, opt, epochs=500):
    config = ai_core_client.configuration.create(
        name = "pose-detection-training-configuration",
        scenario_id = "train-demo",
        executable_id = "aicore-train-pipeline",
        resource_group = "default",
        parameter_bindings=[KeyValue('DL_BATCH_SIZE', str(bs)),
                            KeyValue('ACTIVATION', str(act)),
                            KeyValue('DROPOUT', str(dropout)),
                            KeyValue('LR', str(lr)),
                            KeyValue('OPTIMIZER', opt),
                            KeyValue('EPOCHS', str(epochs))],
        input_artifact_bindings=[InputArtifactBinding('pose_dataset', artifact['id'])]                   
    ).__dict__

    execution = ai_core_client.execution.create(
        configuration_id = config['id'],
        resource_group = 'default'
    ).__dict__

    return execution['id']

In [None]:
batch_sizes = [16, 32, 64, 128, 256]
lrs = [0.01, 0.005, 0.001, 0.0005, 0.0001]
dropouts = [0, 0.25, 0.5, 0.95, 1]
opts = ['adam', 'sgd']
acts = ['sigmoid', 'relu', 'tanh']

ex_ids = []
for bs in batch_sizes:
    for lr in lrs:
        for dropout in dropouts:
            for opt in opts:
                for act in acts:
                    ex_ids.append(run_execution(bs, act, dropout, lr, opt))