# Deploy a model to a secure AKS

# 1. Connect to Azure Machine Learning Workspace

## 1.1. Import the required libraries

In [None]:
%pip install azure.ai.ml --extra-index-url  https://azuremlsdktestpypi.azureedge.net/sdk-cli-v2

In [43]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Model, Environment, KubernetesOnlineEndpoint, KubernetesOnlineDeployment, CodeConfiguration
from azure.identity import DefaultAzureCredential
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities._deployment.resource_requirements_settings import ResourceRequirementsSettings
from azure.ai.ml.entities._deployment.container_resource_settings import ResourceSettings

## 1.2. Configure workspace details and get a handle to the workspace

In [44]:
# Enter details of your AML workspace
subscription_id = "<SUBSCRIPTION_ID>"
resource_group = "<RESOURCE_GROUP>"
workspace = "<AML_WORKSPACE_NAME>"

In [45]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)

# 2. Create a model

In [None]:
model = Model(path="./model/sklearn_regression_model.pkl")

ml_client.models.create_or_update(model)

# 3. Create an inference environment for the model

## Environment

[Note about no-code-deployment](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-mlflow-models-online-endpoints?tabs=cli) : Azure Machine Learning performs dynamic installation of packages when deploying MLflow models with no-code deployment. As a consequence, deploying MLflow models to online endpoints with no-code deployment in a private network without egress connectivity is not supported by the moment.

This means we have to provide an environment and a scoring files for secured infrastructure.

All curated environment are part of mcr.microsoft.com and most customers do not want to allow public network trafic from/to their inference endpoint.

This means you have to create a custom environment that will be saved in your private container registry. You have 2 ways to do this :
- Define a custom environment within AML and let the image-builder cluster build it and save it in ACR for you. The cluster will require access to the source you are using.
- Use your own platform (a side VM ?) to create and push your docker image to the ACR, and then register it to AML.

### Build a custom environment within AML

Network prerequisite : image-builder cluster (within training subnet) can to reach out to mcr.microsoft.com to retrieve curated image as a base image to build the custom one that will be saved into private ACR. (This is already implemented as part as the terraform code).

The following command will :
- Define the environment 
- Register the environment
- Build it on imabe_builder cluster (because we defined this cluster as default during installation) 
- Save the environment in Azure contrainer registry

In [None]:
env = Environment(
    image="mcr.microsoft.com/azureml/curated/minimal-ubuntu18.04-py37-cpu-inference:51",
    conda_file="environment/conda.yml",
    name="my_custom_inference_env",
)
ml_client.environments.create_or_update(env)

# 4. Setup a Kubernetes online endpoint

This takes around 2mins.

In [None]:
import datetime

online_endpoint_name = "k8s-endpoint-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = KubernetesOnlineEndpoint(
    name=online_endpoint_name,
    compute="<COMPUTE_NAME>",
    auth_mode="key",
)

In [None]:
import datetime

online_endpoint_name = "k8s-endpoint-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = KubernetesOnlineEndpoint(
    name=online_endpoint_name,
    compute="aks-inference",
    auth_mode="key",
)

ml_client.begin_create_or_update(endpoint).result()

# 5. Create the online deployment

This takes around 2mins.

In [None]:
deployment = KubernetesOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    environment=env,
    code_configuration=CodeConfiguration(
        code="src", scoring_script="score.py"
    ),
    instance_count=1,
    resources=ResourceRequirementsSettings(
        requests=ResourceSettings(
            cpu="100m",
            memory="0.5Gi",
        ),
    )      
)

ml_client.begin_create_or_update(deployment).result()

# 6. Test the endpoint with sample data

In [None]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data = {}

body = str.encode(json.dumps(data))

url = 'http://10.42.0.91/aml/api/v1/endpoint/k8s-endpoint-02061646309278/score'
# Replace this with the primary/secondary key or AMLToken for the endpoint
api_key = 'mRuLigIIk5gclRmgBblIv3ePXARcjYYL'
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")

# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key), 'azureml-model-deployment': 'blue' }

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))


# Troubleshooting commodities

In [None]:
ml_client.online_deployments.get_logs(
    name="blue", endpoint_name=online_endpoint_name, lines=50
)