![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/NotebookVM/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.png)

# Deploying the Best Performing Model to Azure Kubernetes Service (AKS)
This notebook shows the steps for deploying a service: registering a model, creating an image, provisioning a cluster (one time action), and deploying a service to it. 
We then test and delete the service, image and model.

In [1]:
from azureml.core import Workspace
from azureml.core.compute import AksCompute, ComputeTarget
from azureml.core.webservice import Webservice, AksWebservice
from azureml.core.model import Model
import shutil

In [2]:
import azureml.core
print(azureml.core.VERSION)

1.19.0


# Get workspace
Load existing workspace from the config file info.

In [3]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

quick-starts-ws-134480
aml-quickstarts-134480
southcentralus
81cefad3-d2c9-4f77-a466-99a7f541c7bb


# Create the Environment
Create an environment that the model will be deployed with

In [4]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies 

scoring_env = Environment.from_conda_specification(
    name="scoring-env", file_path="./envs/conda_dependencies.yml"
)
# use Azure's default docker image
scoring_env.docker.base_image ="mcr.microsoft.com/azureml/intelmpi2018.3-ubuntu16.04:20200821.v1"

# Write the Entry Script
Write the script that will be used to predict on your model

In [5]:
%%writefile utils.py

from tqdm import tqdm
import numpy as np
import pandas as pd
import time

def reduce_mem(df):
    """ Reduce memory """
    starttime = time.time()
    numerics = ["int16", "int32", "int64", "float16", "float32", "float64"]
    start_mem = df.memory_usage().sum() / 1024 ** 2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if pd.isnull(c_min) or pd.isnull(c_max):
                continue
            if str(col_type)[:3] == "int":
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if (
                    c_min > np.finfo(np.float16).min
                    and c_max < np.finfo(np.float16).max
                ):
                    df[col] = df[col].astype(np.float16)
                elif (
                    c_min > np.finfo(np.float32).min
                    and c_max < np.finfo(np.float32).max
                ):
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024 ** 2
    print(
        "-- Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction),time spend:{:2.2f} min".format(
            end_mem,
            100 * (start_mem - end_mem) / start_mem,
            (time.time() - starttime) / 60,
        )
    )
    return df


def unique_count(df, features):
    """ perform a unique count for categorical features"""
    for f in tqdm(features):
        print(f)
        map_dict = dict(zip(df[f].unique(), range(df[f].nunique())))
        df[f] = df[f].map(map_dict)
        df[f + "_count"] = df[f].map(df[f].value_counts())
    df = reduce_mem(df)
    return df

Writing utils.py


In [6]:
%%writefile score.py
import os
import pickle
import json
import numpy
import joblib
from utils import reduce_mem, unique_count


def preprocess_data(df):
    """ Perform brief data preprocessing for the incoming dataset object """

    # categorical feature unique count
    categorical_cols = [
        "slot_id",
        "adv_id",
        "adv_prim_id",
        "creat_type_cd",
        "inter_type_cd",
        "age",
        "city",
        "uid",
        "dev_id",
        "task_id",
    ]
    df = unique_count(df, categorical_cols)
    df = reduce_mem(df)

    # drop engineered features
    drop_fea = ["pt_d", "communication_onlinerate", "uid"]
    df.drop(columns=drop_fea, inplace=True)

    return df


def init():
    global model
    model_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), "model.joblib")
    # deserialize the model file back into a sklearn model
    model = joblib.load(model_path)


# note you can pass in multiple rows for scoring
def run(raw_data):
    try:
        data = json.loads(raw_data)["data"]
        data = preprocess_data(data)
        result = model.predict_proba(data)[:, 1]
        # you can return any data type as long as it is JSON-serializable
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error


Writing score.py


move scripts into the source folder

In [7]:
script_folder = "predict"

if script_folder not in os.listdir():
    os.mkdir(script_folder)

try:
    shutil.move('score.py', script_folder)
    shutil.move('utils.py', script_folder)
except:
    pass

# Create the InferenceConfig
Create the inference config that will be used when deploying the model

In [8]:
from azureml.core.model import InferenceConfig

inf_config = InferenceConfig(source_directory=script_folder, entry_script='score.py', environment=scoring_env)

# Provision the AKS Cluster
This is a one time setup. You can reuse this cluster for multiple deployments after it has been created. If you delete the cluster or the resource group that contains it, then you would have to recreate it.

In [8]:
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your AKS cluster
aks_name = 'ctr-scoring' 

# Verify that cluster does not exist already
try:
    aks_target = ComputeTarget(workspace=ws, name=aks_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # Use the default configuration (can also provide parameters to customize)
    prov_config = AksCompute.provisioning_configuration()

    # Create the cluster
    aks_target = ComputeTarget.create(workspace = ws, 
                                    name = aks_name, 
                                    provisioning_configuration = prov_config)

if aks_target.get_status() != "Succeeded":
    aks_target.wait_for_completion(show_output=True)

Found existing cluster, use it.


In [10]:
%%time
try:
    aks_target.wait_for_completion(show_output = True)
    print(aks_target.provisioning_state)
    print(aks_target.provisioning_errors)
except:
    print(aks_target.provisioning_state)
    print(aks_target.provisioning_errors)    

Succeeded
None
CPU times: user 9.2 ms, sys: 293 µs, total: 9.49 ms
Wall time: 93 ms


# Deploy web service to AKS

In [11]:
# Set the web service configuration (using default here)
aks_config = AksWebservice.deploy_configuration()

# # Enable token auth and disable (key) auth on the webservice
# aks_config = AksWebservice.deploy_configuration(token_auth_enabled=True, auth_enabled=False)


get the model

In [12]:
model = Model(ws, 'click-through-rate-predictions-HDrive')

In [13]:
%%time
aks_service_name ='ctr-prediction-service-1'

aks_service = Model.deploy(workspace=ws,
                           name=aks_service_name,
                           models=[model],
                           inference_config=inf_config,
                           deployment_config=aks_config,
                           deployment_target=aks_target)

aks_service.wait_for_deployment(show_output = True)
print(aks_service.get_logs())
print(aks_service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running....
Succeeded
AKS service creation operation finished, operation "Succeeded"
2021-01-12T04:04:07,965221386+00:00 - iot-server/run 
2021-01-12T04:04:07,967980688+00:00 - nginx/run 
2021-01-12T04:04:07,968031190+00:00 - gunicorn/run 
/usr/sbin/nginx: /azureml-envs/azureml_024d70955107dc728a9f6a15687a2651/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_024d70955107dc728a9f6a15687a2651/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_024d70955107dc728a9f6a15687a2651/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_024d70955107dc728a9f6a15687a2651/lib/libssl.so.1.0.0: no version in

# Test the web service using run method
We test the web sevice by passing data.
Run() method retrieves API keys behind the scenes to make sure that call is authenticated.

In [14]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

data = [
    {
        "uid": 1391930,
        "task_id": 3481,
        "adv_id": 3984,
        "creat_type_cd": 6,
        "adv_prim_id": 150,
        "dev_id": 17,
        "inter_type_cd": 5,
        "slot_id": 18,
        "spread_app_id": 11,
        "tags": 39,
        "app_first_class": 4,
        "app_second_class": 17,
        "age": 7,
        "city": 161,
        "city_rank": 3,
        "device_name": 65,
        "device_size": 141,
        "career": 9,
        "gender": 2,
        "net_type": 2,
        "residence": 18,
        "his_app_size": 6,
        "his_on_shelf_time": 3,
        "app_score": 2,
        "emui_dev": 14,
        "list_time": 10,
        "device_price": 2,
        "up_life_duration": 20,
        "up_membership_grade": -1,
        "membership_life_duration": -1,
        "consume_purchase": 2,
        "communication_onlinerate": "0^1^2^3^4^5^6^7^8^9^10^11^12^13^14^15^16^17^18^19^20^21^22^23",
        "communication_avgonline_30d": 13,
        "indu_name": 36,
        "pt_d": 1,
        "uid_prev_1_day_ctr": 0.0,
        "uid_prev_2_day_ctr": 0.0,
        "uid_prev_3_day_ctr": 0.0,
        "uid_prev_4_day_ctr": 0.0,
        "uid_prev_5_day_ctr": 0.0,
        "uid_prev_6_day_ctr": 0.0,
        "task_id_prev_1_day_ctr": 0.0,
        "task_id_prev_2_day_ctr": 0.0,
        "task_id_prev_3_day_ctr": 0.0,
        "task_id_prev_4_day_ctr": 0.0,
        "task_id_prev_5_day_ctr": 0.0,
        "task_id_prev_6_day_ctr": 0.0,
        "adv_id_prev_1_day_ctr": 0.0,
        "adv_id_prev_2_day_ctr": 0.0,
        "adv_id_prev_3_day_ctr": 0.0,
        "adv_id_prev_4_day_ctr": 0.0,
        "adv_id_prev_5_day_ctr": 0.0,
        "adv_id_prev_6_day_ctr": 0.0,
        "adv_prim_id_prev_1_day_ctr": 0.0,
        "adv_prim_id_prev_2_day_ctr": 0.0,
        "adv_prim_id_prev_3_day_ctr": 0.0,
        "adv_prim_id_prev_4_day_ctr": 0.0,
        "adv_prim_id_prev_5_day_ctr": 0.0,
        "adv_prim_id_prev_6_day_ctr": 0.0,
        "spread_app_id_prev_1_day_ctr": 0.0,
        "spread_app_id_prev_2_day_ctr": 0.0,
        "spread_app_id_prev_3_day_ctr": 0.0,
        "spread_app_id_prev_4_day_ctr": 0.0,
        "spread_app_id_prev_5_day_ctr": 0.0,
        "spread_app_id_prev_6_day_ctr": 0.0,
    },
    {
        "uid": 2220385,
        "task_id": 3401,
        "adv_id": 1766,
        "creat_type_cd": 7,
        "adv_prim_id": 156,
        "dev_id": 56,
        "inter_type_cd": 5,
        "slot_id": 16,
        "spread_app_id": 58,
        "tags": 37,
        "app_first_class": 4,
        "app_second_class": 21,
        "age": 7,
        "city": 103,
        "city_rank": 4,
        "device_name": 38,
        "device_size": 162,
        "career": 9,
        "gender": 2,
        "net_type": 2,
        "residence": 39,
        "his_app_size": 14,
        "his_on_shelf_time": 3,
        "app_score": 2,
        "emui_dev": 20,
        "list_time": 4,
        "device_price": 4,
        "up_life_duration": 20,
        "up_membership_grade": 1,
        "membership_life_duration": -1,
        "consume_purchase": 2,
        "communication_onlinerate": "7^8^9^10^11^12^13^14^15^16^17^18^19^20^21^22^23",
        "communication_avgonline_30d": 11,
        "indu_name": 17,
        "pt_d": 1,
        "uid_prev_1_day_ctr": 0.0,
        "uid_prev_2_day_ctr": 0.0,
        "uid_prev_3_day_ctr": 0.0,
        "uid_prev_4_day_ctr": 0.0,
        "uid_prev_5_day_ctr": 0.0,
        "uid_prev_6_day_ctr": 0.0,
        "task_id_prev_1_day_ctr": 0.0,
        "task_id_prev_2_day_ctr": 0.0,
        "task_id_prev_3_day_ctr": 0.0,
        "task_id_prev_4_day_ctr": 0.0,
        "task_id_prev_5_day_ctr": 0.0,
        "task_id_prev_6_day_ctr": 0.0,
        "adv_id_prev_1_day_ctr": 0.0,
        "adv_id_prev_2_day_ctr": 0.0,
        "adv_id_prev_3_day_ctr": 0.0,
        "adv_id_prev_4_day_ctr": 0.0,
        "adv_id_prev_5_day_ctr": 0.0,
        "adv_id_prev_6_day_ctr": 0.0,
        "adv_prim_id_prev_1_day_ctr": 0.0,
        "adv_prim_id_prev_2_day_ctr": 0.0,
        "adv_prim_id_prev_3_day_ctr": 0.0,
        "adv_prim_id_prev_4_day_ctr": 0.0,
        "adv_prim_id_prev_5_day_ctr": 0.0,
        "adv_prim_id_prev_6_day_ctr": 0.0,
        "spread_app_id_prev_1_day_ctr": 0.0,
        "spread_app_id_prev_2_day_ctr": 0.0,
        "spread_app_id_prev_3_day_ctr": 0.0,
        "spread_app_id_prev_4_day_ctr": 0.0,
        "spread_app_id_prev_5_day_ctr": 0.0,
        "spread_app_id_prev_6_day_ctr": 0.0
    }
]

body = str.encode(json.dumps(data))

deployed_webservice = Webservice(ws, 'ctr-prediction-service-1')
url = deployed_webservice.scoring_uri
api_key = deployed_webservice.get_keys()[0]

headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(json.loads(error.read().decode("utf8", 'ignore')))


b'[0.441306768, 0.405676435]'


# Clean up
Delete the service, image and model.

In [15]:
%%time
aks_service.delete()
model.delete()

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.91 µs
