[AZURE ML PRODUCTION DEPLOYMENT - FOQA DECISION TREE MODEL](https://c3.ndc.nasa.gov/dashlink/resources/1018/)

# 1. Packages installation - AML

In [38]:
!which python

/anaconda/envs/azureml_py38/bin//python


In [13]:
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall scikit-learn
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall -U imbalanced-learn
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall azureml
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall azure-ai-ml
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall azureml-inference-server-http
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall azure-ai-formrecognizer

Collecting scikit-learn
  Using cached scikit_learn-1.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
Collecting numpy>=1.17.3
  Using cached numpy-1.24.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
Collecting scipy>=1.3.2
  Using cached scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)
Collecting joblib>=1.1.1
  Using cached joblib-1.2.0-py3-none-any.whl (297 kB)
Collecting threadpoolctl>=2.0.0
  Using cached threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
[31mERROR: pandas-ml 0.6.1 requires enum34, which is not installed.[0m
[31mERROR: fbprophet 0.7.1 requires cmdstanpy==0.9.5, which is not installed.[0m
[31mERROR: tensorflow 2.2.1 has requirement h5py<2.11.0,>=2.10.0, but you'll have h5py 3.8.0 which is incompatible.[0m
[31mERROR: tensorflow 2.2.1 has requirement numpy<1.19.0,>=1.16.0, but you'll have numpy 1.24.3 which is incompatible.[0m
[31mERROR: tensorflow-gpu 2.2.1 has requirement h5py<2.11.0,>=2.

# 2. Init Azure Config

In [10]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential


subscription_id = 'e3fb51e5-d8bd-4bf8-9685-bda3d5d2e216'
resource_group = 'foqa-resource-2'
workspace_name = 'foqa-ws-2'

# Get a handle to the workspace
ml_client = MLClient(
    credential=DefaultAzureCredential(),
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name,
)

Explore Azure Dataset

In [None]:
'''
from azureml.core import Workspace, Dataset

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='foqa-data-asset')
dataset.download(target_path='.', overwrite=True)
# Download mounts the file as local file
# TODO: How to work directly without mounting

import numpy as np
import os
with dataset.mount() as mount_context:
    # print(os.listdir(mount_context.mount_point)[0])
    full_data = np.load(os.listdir(mount_context.mount_point)[0])

data = full_data['data']
label = full_data['label']
print("Data:",data.shape)
print("Label:",label.shape)'''

# 3. Create training script

In [12]:
import os
train_src_dir = "./src"
os.makedirs(train_src_dir, exist_ok=True)

In [13]:
%%writefile {train_src_dir}/main.py
import numpy as np
import xgboost

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix,ConfusionMatrixDisplay

from imblearn.over_sampling import ADASYN

import os
import argparse
import mlflow
import mlflow.sklearn

def main():
    """Main function of the script."""

    # input and output arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--data", type=str, help="path to input data")
    parser.add_argument("--train_test_ratio", type=float, required=False, default=0.20)
    parser.add_argument("--registered_model_name", type=str, help="model name")
    args = parser.parse_args()

    # Start Logging
    mlflow.start_run()

    # enable autologging
    mlflow.sklearn.autolog()

    # Load data
    full_data = np.load(args.data)
    data = full_data['data']
    label = full_data['label']

    # Reduce timestep dimension
    data = np.average(data, axis=1)

    # Oversampling using ADASYN
    oversample = ADASYN()
    data, label = oversample.fit_resample(data, label)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=args.train_test_ratio)

    # Train the model
    model = xgboost.XGBClassifier()
    model.fit(X_train, y_train)

    # Measuring performance - accuracy score
    preds = model.predict(X_test)
    accuracy = accuracy_score(y_test, preds)

    # Measuring performance - ROC AUC score
    probs = model.predict_proba(X_test)
    roc_auc = roc_auc_score(y_test, probs, multi_class='ovr')

    print("Accuracy: %.2f%% \nROC AUC Score: %.2f%%" % (accuracy * 100.0,roc_auc * 100))

    # Registering the model to the workspace
    print("Registering the model via MLFlow")

    mlflow.sklearn.log_model(
        sk_model=model,
        registered_model_name=args.registered_model_name,
        artifact_path=args.registered_model_name,
    )

    # Saving the model to a file
    mlflow.sklearn.save_model(
        sk_model=model,
        path=os.path.join(args.registered_model_name, "trained_model"),
    )

    # Stop Logging
    mlflow.end_run()

if __name__ == "__main__":
    main()

    

Overwriting ./src/main.py


# 4. Create custom environment

In [14]:
%%writefile ./conda.yaml
name: foqa-env
channels:
  - conda-forge
dependencies:
  - python=3.7
  - scikit-learn
  - pandas
  - numpy
  - matplotlib
  - xgboost
  - imbalanced-learn  
  - pip
  - pip:
    - azureml
    - azure-ai-ml
    - azureml-mlflow
    - azureml-inference-server-http

Overwriting ./conda.yaml


In [15]:
from azure.ai.ml.entities import Environment

env = Environment(
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    conda_file="./conda.yaml",
    name="foqa-env",
    description="Environment for FOQA",
)
ml_client.environments.create_or_update(env)

Environment({'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'name': 'foqa-env', 'description': 'Environment for FOQA', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/e3fb51e5-d8bd-4bf8-9685-bda3d5d2e216/resourceGroups/foqa-resource-2/providers/Microsoft.MachineLearningServices/workspaces/foqa-ws-2/environments/foqa-env/versions/3', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/foqa-compute/code/Users/duc.tran', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f5574109d30>, 'serialize': <msrest.serialization.Serializer object at 0x7f5574109e80>, 'version': '3', 'latest_version': None, 'conda_file': {'channels': ['conda-forge'], 'dependencies': ['python=3.7', 'scikit-learn', 'pandas', 'numpy', 'matplotlib', 'xgboost', 'imbalanced-learn', 'pip', {'pip': ['azureml', 'azure-ai-ml', 'azureml-mlflow', 'azureml-inference-server-http']}], 'name': 'foqa

# 5. Create compute cluster

In [None]:
from azure.ai.ml.entities import AmlCompute

# Name assigned to the compute cluster
cpu_compute_target = "cpu-cluster"

try:
    # let's see if the compute target already exists
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
    print(
        f"You already have a cluster named {cpu_compute_target}, we'll reuse it as is."
    )

except Exception:
    print("Creating a new cpu compute target...")

    # Let's create the Azure Machine Learning compute object with the intended parameters
    cpu_cluster = AmlCompute(
        name=cpu_compute_target,
        # Azure Machine Learning Compute is the on-demand VM service
        type="amlcompute",
        # VM Family
        size="STANDARD_DS3_V2",
        # Minimum running nodes when there is no job running
        min_instances=0,
        # Nodes in cluster
        max_instances=1,
        # How many seconds will the node running after the job termination
        idle_time_before_scale_down=180,
        # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination
        tier="Dedicated",
    )
    print(
        f"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}"
    )
    # Now, we pass the object to MLClient's create_or_update method
    cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)

# 6. Submit the model training as a job

In [None]:
'''
NOTE:
1. How to get "path" arg for command
    1.1:
        # all_data_assets = ml_client.data.list()
        # data_asset_path = ml_client.data.get(name="foqa-data-asset", version="1")
        # Copy the path
    1.2:
        # to get the below path: AML -> Data -> Datastore -> <datastore-name> -> Browse -> <Click on triple dot of target file> -> Copy URI

2. List all available "environment" arg for command:
    # Portal -> cloud shell -> az ml environment list --resource-group foqa-resource --workspace-name foqa-ws
'''

from azure.ai.ml import command
from azure.ai.ml import Input


registered_model_name = "foqa_model"

job = command(
    inputs=dict(
        data=Input(
            type="uri_file",
            path="azureml://subscriptions/e3fb51e5-d8bd-4bf8-9685-bda3d5d2e216/resourcegroups/foqa-resource-2/workspaces/foqa-ws-2/datastores/foqa_datastore/paths/DASHlink_full_fourclass_raw_comp.npz",
        ),
        train_test_ratio=0.2,
        registered_model_name=registered_model_name,
    ),
    code="./src/",  # location of source code
    command="python main.py --data ${{inputs.data}} --train_test_ratio ${{inputs.train_test_ratio}} --registered_model_name ${{inputs.registered_model_name}}",
    environment="foqa-env@latest",
    compute="cpu-cluster",
    display_name="foqa-prediction",
)

returned_job = ml_client.create_or_update(job)

# 7. Register Model

In [None]:
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

job_name = returned_job.name
run_model = Model(
    path=f"azureml://jobs/{job_name}/outputs/artifacts/paths/model/", 
    name="foqa-model",
    description="XGBoost Model for FOQA",
    type=AssetTypes.MLFLOW_MODEL,
)
ml_client.models.create_or_update(run_model)

# 8. Deploy the model 

## 8.1 Create scoring script

In [33]:
%%writefile ./src/score.py
import os
import logging
import json
import numpy as np
import joblib


def init():
    """
    This function is called when the container is initialized/started, typically after create/update of the deployment.
    You can write the logic here to perform init operations like caching the model in memory
    """
    global model
    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
    # Please provide your model's folder name if there is one
    # <model-name>/model.pkl
    model_path = os.path.join(
        os.getenv("AZUREML_MODEL_DIR"), "foqa_model/model.pkl"
    )
    # deserialize the model file back into a sklearn model
    model = joblib.load(model_path)
    logging.info("Init complete")


def run(raw_data):
    """
    This function is called for every invocation of the endpoint to perform the actual scoring/prediction.
    In the example we extract the data from the json input and call the scikit-learn model's predict()
    method and return the result back
    """
    logging.debug("model 1: request received")
    data = json.loads(raw_data)
    data = data["data"]
    data = np.array(data)
    data = np.average(data, axis=1)
    result = model.predict(data)
    logging.info("Request processed")
    return result.tolist()

Overwriting ./src/score.py


## 8.2 Create an Endpoint

In [17]:
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
)
import uuid

# Creating a unique name for the endpoint
foqa_endpoint_name = "foqa-endpoint-" + str(uuid.uuid4())[:8]

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=foqa_endpoint_name,
    description="Endpoint for FOQA",
    auth_mode="key",
)

endpoint = ml_client.online_endpoints.begin_create_or_update(endpoint).result()

print(f"Endpoint {endpoint.name} provisioning state: {endpoint.provisioning_state}")

Endpoint foqa-endpoint-4dd696a7 provisioning state: Succeeded


## 8.3 Local verification

### 8.3.1 Install the packages locally

In [12]:
# Local Deployment packages installation
'''
!/usr/local/bin/python3.10 -m pip install azureml
!/usr/local/bin/python3.10 -m pip install azure-ai-ml
!/usr/local/bin/python3.10 -m pip install azureml-inference-server-http
!/usr/local/bin/python3.10 -m pip install azure-identity
'''


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To u

### 8.3.2 Deploy locally 

In [3]:
from azure.ai.ml.entities import ManagedOnlineDeployment, CodeConfiguration, Environment, Model

deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name="foqa-endpoint-defeffd5",
    model=Model(path="./model/model.pkl"), # Need to download model artifacts
    code_configuration=CodeConfiguration(
        code="./src", scoring_script="score.py"
    ),
    environment=Environment(
        image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
        conda_file="./conda.yaml",
        name="local-env",
        description="local Environment for FOQA",
    ),
    instance_type="Standard_DS3_v2",
    instance_count=1,
)

blue_deployment = ml_client.online_deployments.begin_create_or_update(
    deployment, local=True, vscode_debug=True
)

Creating local deployment (foqa-endpoint-defeffd5 / blue) .
Building Docker image from Dockerfile
Step 1/7 : FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
. ---> a79caa0393e2
Step 2/7 : RUN mkdir -p /var/azureml-app/
 ---> Using cache
 ---> a08424b17768
Step 3/7 : WORKDIR /var/azureml-app/
 ---> Using cache
 ---> c9c0d9e8c938
Step 4/7 : COPY conda.yml /var/azureml-app/
 ---> Using cache
 ---> 37661838fb24
Step 5/7 : RUN conda env create -n inf-conda-env --file conda.yml
 ---> Running in 168f38a86c1f
Retrieving notices: ...working... done
Collecting package metadata (repodata.json): ...working... ...................done
Solving environment: ...working... .............................................................................................................done
[91m

  current version: 23.1.0
  latest version: 23.3.1

Please update conda by running

    $ conda update -n base -c conda-forge conda

Or to minimize the number of packages updated during conda update use

   

### 8.3.3 Debug local endpoint

In [10]:
endpoint_name = "foqa-endpoint-defeffd5"
endpoint = ml_client.online_endpoints.get(name=endpoint_name, local=True)
print(endpoint)

auth_mode: key
location: local
mirror_traffic: {}
name: foqa-endpoint-defeffd5
properties: {}
provisioning_state: Succeeded
scoring_uri: http://localhost:5001/score
tags: {}
traffic: {}



### 8.3.4 Send test data & receive prediction

#### 8.3.4.1 Create request.json

In [None]:
# Finish the following code

# %%writefile ./sample-request.json
# TODO: Load data to X_test & modify the test_json
# import json
# test_json = np.ndarray.tolist(X_test[:10])
# json_object = json.dumps(test_json)

# # Writing to sample-request.json
# with open("sample-request.json", "w") as outfile:
#     outfile.write('{"data":')
#     outfile.write(json_object)
#     outfile.write("}")

#### 8.3.4.2 Invoking the local endpoint with sample-request.json

In [5]:
endpoint_name = "foqa-endpoint-defeffd5"
request_file_path = "./sample-request.json"

ml_client.online_endpoints.invoke(endpoint_name=endpoint_name, request_file=request_file_path, local=True)

'[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]'


## 8.4 ONLINE DEPLOYMENT

### 8.4.1 Configure Deployment

In [34]:
# List of available instance types
# https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list?view=azureml-api-2

from azure.ai.ml.entities import ManagedOnlineDeployment, CodeConfiguration

foqa_endpoint_name = endpoint.name

foqa_model = ml_client.models.get(name='foqa_model',version='1')

blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=foqa_endpoint_name,
    model=foqa_model,
    environment="foqa-env@latest",
    code_configuration=CodeConfiguration(
        code="./src", scoring_script="score.py"
    ),
    # instance_type="Standard_DS3_v3",
    instance_type="Standard_F4s_v2",
    instance_count=3,
)

### 8.4.2 Deploy to Azure

In [35]:
result = ml_client.online_deployments.begin_create_or_update(blue_deployment).result()

Check: endpoint foqa-endpoint-4dd696a7 exists
[32mUploading src (0.0 MBs): 100%|██████████| 3898/3898 [00:00<00:00, 93337.12it/s]
[39m

data_collector is not a known attribute of class <class 'azure.ai.ml._restclient.v2022_02_01_preview.models._models_py3.ManagedOnlineDeployment'> and will be ignored


.......................................................................

### 8.4.3 Direct traffic to endpoint

In [36]:
# blue deployment takes 100 traffic
endpoint.traffic = {"blue": 100}
ml_client.begin_create_or_update(endpoint).result()

Readonly attribute principal_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>
Readonly attribute tenant_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>


ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://foqa-endpoint-4dd696a7.eastus2.inference.ml.azure.com/score', 'openapi_uri': 'https://foqa-endpoint-4dd696a7.eastus2.inference.ml.azure.com/swagger.json', 'name': 'foqa-endpoint-4dd696a7', 'description': 'Endpoint for FOQA', 'tags': {}, 'properties': {'azureml.onlineendpointid': '/subscriptions/e3fb51e5-d8bd-4bf8-9685-bda3d5d2e216/resourcegroups/foqa-resource-2/providers/microsoft.machinelearningservices/workspaces/foqa-ws-2/onlineendpoints/foqa-endpoint-4dd696a7', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/e3fb51e5-d8bd-4bf8-9685-bda3d5d2e216/providers/Microsoft.MachineLearningServices/locations/eastus2/mfeOperationsStatus/oe:7f2f69c2-5aa1-4ade-9e51-d64ace308061:ba660499-4389-4aad-8819-f28e1892de19?api-version=2022-02-01-preview'}, 'print_as_yaml': True, 'id': '/subscriptions/e3fb51e5-d8bd-4bf8-9685-bda3d5d2e216/resourceGroups/foqa-resource-2

### 8.4.4 Test Deployment

In [None]:
deploy_dir = "./deploy"
os.makedirs(deploy_dir, exist_ok=True)

In [None]:
# Finish the following code

# %%writefile {deploy_dir}/sample-request.json
# TODO: Load data to X_test & modify the test_json
# import json
# test_json = np.ndarray.tolist(X_test[:10])
# json_object = json.dumps(test_json)

# # Writing to sample-request.json
# with open("sample-request.json", "w") as outfile:
#     outfile.write('{"data":')
#     outfile.write(json_object)
#     outfile.write("}")

In [37]:
# test the blue deployment with some sample data
ml_client.online_endpoints.invoke(
    endpoint_name=foqa_endpoint_name,
    request_file="./deploy/sample-request.json",
    deployment_name="blue",
)

'[0, 0, 0, 1, 1, 0, 1, 0, 0, 0]'