[AZURE ML PRODUCTION DEPLOYMENT - FOQA DECISION TREE MODEL](https://c3.ndc.nasa.gov/dashlink/resources/1018/)

# 1. Packages installation - AML

In [38]:
!which python

/anaconda/envs/azureml_py38/bin//python


In [None]:
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall scikit-learn
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall -U imbalanced-learn
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall azureml
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall azure-ai-ml
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall azureml-inference-server-http
!/anaconda/envs/azureml_py38/bin/python -m pip install --upgrade --force-reinstall azure-ai-formrecognizer

# 2. Init Azure Config

In [10]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

subscription_id = '<insert-subscription-id>'
resource_group = '<insert-resource-group>'
workspace_name = '<insert-workspace-name>'

# Get a handle to the workspace
ml_client = MLClient(
    credential=DefaultAzureCredential(),
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name,
)

Explore Azure Dataset

In [None]:
'''
from azureml.core import Workspace, Dataset

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='foqa-data-asset')
dataset.download(target_path='.', overwrite=True)
# Download mounts the file as local file
# TODO: How to work directly without mounting

import numpy as np
import os
with dataset.mount() as mount_context:
    # print(os.listdir(mount_context.mount_point)[0])
    full_data = np.load(os.listdir(mount_context.mount_point)[0])

data = full_data['data']
label = full_data['label']
print("Data:",data.shape)
print("Label:",label.shape)'''

# 3. Create training script

In [12]:
import os
train_src_dir = "./src"
os.makedirs(train_src_dir, exist_ok=True)

In [13]:
%%writefile {train_src_dir}/main.py
import numpy as np

def main():
    """Main function of the script."""
    pass

if __name__ == "__main__":
    main()    

Overwriting ./src/main.py


# 4. Create custom environment

In [14]:
%%writefile ./conda.yaml
name: foqa-env
channels:
  - conda-forge
dependencies:
  - python=3.7
  - scikit-learn
  - pandas
  - numpy
  - matplotlib
  - xgboost
  - imbalanced-learn  
  - pip
  - pip:
    - azureml
    - azure-ai-ml
    - azureml-mlflow
    - azureml-inference-server-http

Overwriting ./conda.yaml


In [15]:
from azure.ai.ml.entities import Environment

env = Environment(
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    conda_file="./conda.yaml",
    name="foqa-env",
    description="Environment for FOQA",
)
ml_client.environments.create_or_update(env)

Environment({'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'name': 'foqa-env', 'description': 'Environment for FOQA', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/e3fb51e5-d8bd-4bf8-9685-bda3d5d2e216/resourceGroups/foqa-resource-2/providers/Microsoft.MachineLearningServices/workspaces/foqa-ws-2/environments/foqa-env/versions/3', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/foqa-compute/code/Users/duc.tran', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f5574109d30>, 'serialize': <msrest.serialization.Serializer object at 0x7f5574109e80>, 'version': '3', 'latest_version': None, 'conda_file': {'channels': ['conda-forge'], 'dependencies': ['python=3.7', 'scikit-learn', 'pandas', 'numpy', 'matplotlib', 'xgboost', 'imbalanced-learn', 'pip', {'pip': ['azureml', 'azure-ai-ml', 'azureml-mlflow', 'azureml-inference-server-http']}], 'name': 'foqa

# 5. Create compute cluster

In [None]:
from azure.ai.ml.entities import AmlCompute

# Name assigned to the compute cluster
cpu_compute_target = "cpu-cluster"

try:
    # let's see if the compute target already exists
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
    print(
        f"You already have a cluster named {cpu_compute_target}, we'll reuse it as is."
    )

except Exception:
    print("Creating a new cpu compute target...")

    # Let's create the Azure Machine Learning compute object with the intended parameters
    cpu_cluster = AmlCompute(
        name=cpu_compute_target,
        # Azure Machine Learning Compute is the on-demand VM service
        type="amlcompute",
        # VM Family
        size="STANDARD_DS3_V2",
        # Minimum running nodes when there is no job running
        min_instances=0,
        # Nodes in cluster
        max_instances=1,
        # How many seconds will the node running after the job termination
        idle_time_before_scale_down=180,
        # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination
        tier="Dedicated",
    )
    print(
        f"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}"
    )
    # Now, we pass the object to MLClient's create_or_update method
    cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)

# 6. Submit the model training as a job

In [None]:
'''
NOTE:
1. How to get "path" arg for command
    1.1:
        # all_data_assets = ml_client.data.list()
        # data_asset_path = ml_client.data.get(name="foqa-data-asset", version="1")
        # Copy the path
    1.2:
        # to get the below path: AML -> Data -> Datastore -> <datastore-name> -> Browse -> <Click on triple dot of target file> -> Copy URI

2. List all available "environment" arg for command:
    # Portal -> cloud shell -> az ml environment list --resource-group foqa-resource --workspace-name foqa-ws
'''

from azure.ai.ml import command
from azure.ai.ml import Input


registered_model_name = "foqa_model"

job = command(
    inputs=dict(
        data=Input(
            type="uri_file",
            path="azureml://subscriptions/e3fb51e5-d8bd-4bf8-9685-bda3d5d2e216/resourcegroups/foqa-resource-2/workspaces/foqa-ws-2/datastores/foqa_datastore/paths/DASHlink_full_fourclass_raw_comp.npz",
        ),
        train_test_ratio=0.2,
        registered_model_name=registered_model_name,
    ),
    code="./src/",  # location of source code
    command="python main.py --data ${{inputs.data}} --train_test_ratio ${{inputs.train_test_ratio}} --registered_model_name ${{inputs.registered_model_name}}",
    environment="foqa-env@latest",
    compute="cpu-cluster",
    display_name="foqa-prediction",
)

returned_job = ml_client.create_or_update(job)

# 7. Register Model

In [None]:
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

job_name = returned_job.name
run_model = Model(
    path=f"azureml://jobs/{job_name}/outputs/artifacts/paths/model/", 
    name="foqa-model",
    description="Model for FOQA",
    type=AssetTypes.MLFLOW_MODEL,
)
ml_client.models.create_or_update(run_model)