# Chapter 6 Model Deployment

In [44]:
import azure.ai.ml
print(azure.ai.ml._version.VERSION)

0.1.0b6


In [45]:
#import required libraries
import pandas as pd
import matplotlib.pyplot as plt
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml.entities import Environment, BuildContext

In [46]:
subscription_id = '5da07161-3770-4a4b-aa43-418cbbb627cf'
resource_group = 'aml-workspace-rg'
workspace = 'aml-workspace'

## Connecting to your workspace

Once you connect to your workspace, you will create a new cpu target which you will provide an environment to.

- Configure your credential.  We are using `DefaultAzureCredential`.  It will request a token using multiple identities, stopping once a token is found

In [47]:
try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    # This will open a browser page for
    credential = InteractiveBrowserCredential()

In [48]:
#connect to the workspace
try:
    ml_client = MLClient.from_config(credential=credential)
except Exception as ex:
    # NOTE: Update following workspace information if not correctly configure before
    client_config = {
        "subscription_id": subscription_id,
        "resource_group": resource_group,
        "workspace_name": workspace,
    }

    if client_config["subscription_id"].startswith("<"):
        print(
            "please update your <SUBSCRIPTION_ID> <RESOURCE_GROUP> <AML_WORKSPACE_NAME> in notebook cell"
        )
        raise ex
    else:  # write and reload from config file
        import json, os

        config_path = "../.azureml/config.json"
        os.makedirs(os.path.dirname(config_path), exist_ok=True)
        with open(config_path, "w") as fo:
            fo.write(json.dumps(client_config))
        ml_client = MLClient.from_config(credential=credential, path=config_path)
print(ml_client)

Found the config file in: /mnt/batch/tasks/shared/LS_root/mounts/clusters/memasanz1/code/Users/memasanz/.azureml/config.json


MLClient(credential=<azure.identity._credentials.default.DefaultAzureCredential object at 0x7f062601aeb0>,
         subscription_id=5da07161-3770-4a4b-aa43-418cbbb627cf,
         resource_group_name=aml-workspace-rg,
         workspace_name=aml-workspace)


## Setup enviroment

### Creating environment from docker image with a conda YAML

Azure ML allows you to leverage curated environments, as well as to build your own environment from:

    - existing docker image
    - base docker image with a conda yml file to customize
    - a docker build content
    
We will proceed with creating an environment from a docker build plus a conda yml file.

In [49]:
import os
script_folder = os.path.join(os.getcwd(), "conda-yamls")
print(script_folder)
os.makedirs(script_folder, exist_ok=True)

/mnt/batch/tasks/shared/LS_root/mounts/clusters/memasanz1/code/Users/memasanz/ML-Engineering-with-Azure-Machine-Learning-Service/Chapter6/conda-yamls


### Create job environment in a yml file

In [50]:
%%writefile $script_folder/job_env.yml
name: job_env
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.8.5
- scikit-learn
- ipykernel
- matplotlib
- pandas
- pip
- pip:
  - azureml-defaults
  - pyarrow
  - azureml-mlflow==1.43.0.post1
  - azure-ai-ml
  - mltable

Overwriting /mnt/batch/tasks/shared/LS_root/mounts/clusters/memasanz1/code/Users/memasanz/ML-Engineering-with-Azure-Machine-Learning-Service/Chapter6/conda-yamls/job_env.yml


### Getting the most current and up-to-date base image

Default images are always changing.  
Note the base image is defined in the property `image` below.  These images are defined at [https://hub.docker.com/_/microsoft-azureml](https://hub.docker.com/_/microsoft-azureml)

The current image we have selected for this notebook is `mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04`, but based on image availability, that will change in the future.  In additon, note the python version specified in your conda environment file is `python=3.8.5`, as this will evolve over time as well.  Currently `MLTable` is not available in python 3.10, but as that changes, we encourage you to update python version as well.

In [51]:
env_docker_conda = Environment(
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    conda_file="conda-yamls/job_env.yml",
    name="job_base_env",
    description="Environment created from a Docker image plus Conda environment.",
)
ml_client.environments.create_or_update(env_docker_conda)

Environment({'is_anonymous': False, 'auto_increment_version': False, 'name': 'job_base_env', 'description': 'Environment created from a Docker image plus Conda environment.', 'tags': {}, 'properties': {}, 'id': '/subscriptions/5da07161-3770-4a4b-aa43-418cbbb627cf/resourceGroups/aml-workspace-rg/providers/Microsoft.MachineLearningServices/workspaces/aml-workspace/environments/job_base_env/versions/2022-08-27-21-31-56-1184941', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/memasanz1/code/Users/memasanz/ML-Engineering-with-Azure-Machine-Learning-Service/Chapter6', 'creation_context': <azure.ai.ml._restclient.v2022_05_01.models._models_py3.SystemData object at 0x7f0625fe4310>, 'serialize': <msrest.serialization.Serializer object at 0x7f0636f2a8e0>, 'version': '2022-08-27-21-31-56-1184941', 'latest_version': None, 'conda_file': {'dependencies': ['python=3.8.5', 'scikit-learn', 'ipykernel', 'matplotlib', 'pandas', 'pip', {'pip': ['azureml-defaul

## Create Compute 

In [52]:
from azure.ai.ml.entities import AmlCompute

# specify aml compute name.
cpu_compute_target = "cpu-cluster"

try:
    ml_client.compute.get(cpu_compute_target)
except Exception:
    print("Creating a new cpu compute target...")
    compute = AmlCompute(
        name=cpu_compute_target, size="STANDARD_D2_V2", min_instances=0, max_instances=4, idle_time_before_scale_down = 1800
    )
    ml_client.compute.begin_create_or_update(compute)

## Creating code to generate Basic Model

We will first create a model using the job command

In [53]:
script_folder = os.path.join(os.getcwd(), "src")
print(script_folder)
os.makedirs(script_folder, exist_ok=True)

/mnt/batch/tasks/shared/LS_root/mounts/clusters/memasanz1/code/Users/memasanz/ML-Engineering-with-Azure-Machine-Learning-Service/Chapter6/src


## Create main.py file for running in your command

In [54]:
%%writefile ./src/main.py
import os
import mlflow
import argparse
from mlflow.tracking import MlflowClient
import pandas as pd
import matplotlib.pyplot as plt
import mlflow
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.metrics import roc_auc_score,roc_curve

import mlflow
import mlflow.sklearn
from sklearn.metrics import accuracy_score, precision_score, recall_score


# define functions
def main(args):
    # enable auto logging
    current_run = mlflow.start_run()
    mlflow.sklearn.autolog()

    # read in data
    df = pd.read_csv(args.titanic_csv)
    model = model_train('Survived', df, args.randomstate)
    

def model_train(LABEL, df, randomstate):
    print('df.columns = ')
    print(df.columns)
    y_raw           = df[LABEL]
    columns_to_keep = ['Embarked', 'Loc', 'Sex','Pclass', 'Age', 'Fare', 'GroupSize']
    X_raw           = df[columns_to_keep]
    
    X_raw['Embarked'] = X_raw['Embarked'].astype(object)
    X_raw['Loc'] = X_raw['Loc'].astype(object)
    X_raw['Loc'] = X_raw['Sex'].astype(object)
    X_raw['Pclass'] = X_raw['Pclass'].astype(float)
    X_raw['Age'] = X_raw['Age'].astype(float)
    X_raw['Fare'] = X_raw['Fare'].astype(float)
    X_raw['GroupSize'] = X_raw['GroupSize'].astype(float)
    


    print(X_raw.columns)
     # Train test split
    X_train, X_test, y_train, y_test = train_test_split(X_raw, y_raw, test_size=0.2, random_state=randomstate)
    
    #use Logistic Regression estimator from scikit learn
    lg = LogisticRegression(penalty='l2', C=1.0, solver='liblinear')
    preprocessor = buildpreprocessorpipeline(X_train)
    
    #estimator instance
    clf = Pipeline(steps=[('preprocessor', preprocessor),
                               ('regressor', lg)], verbose=True)

    model = clf.fit(X_train, y_train)
    
    print('type of X_test = ' + str(type(X_test)))
          
    y_pred = model.predict(X_test)
    
    print('*****X_test************')
    print(X_test)
    
    metrics = mlflow.sklearn.eval_and_log_metrics(model, X_test, y_test, prefix="test_")
    
    #get the active run.
    run = mlflow.active_run()
    print("Active run_id: {}".format(run.info.run_id))
    MlflowClient().log_metric(run.info.run_id, "metric", 0.22)

    
    return model

    mlflow.end_run()


def buildpreprocessorpipeline(X_raw):

    categorical_features = X_raw.select_dtypes(include=['object', 'bool']).columns
    numeric_features = X_raw.select_dtypes(include=['float','int64']).columns

    #categorical_features = ['Sex', 'Embarked', 'Loc']
    categorical_transformer = Pipeline(steps=[('onehotencoder', 
                                               OneHotEncoder(categories='auto', sparse=False, handle_unknown='ignore'))])


    #numeric_features = ['Pclass', 'Age', 'Fare', 'GroupSize']    
    numeric_transformer1 = Pipeline(steps=[('scaler1', SimpleImputer(missing_values=np.nan, strategy = 'mean'))])
    

    preprocessor = ColumnTransformer(
        transformers=[
            ('numeric1', numeric_transformer1, numeric_features),
            ('categorical', categorical_transformer, categorical_features)], remainder='drop')
    
    return preprocessor



def parse_args():
    # setup arg parser
    parser = argparse.ArgumentParser()

    # add arguments
    parser.add_argument("--titanic-csv", type=str)
    parser.add_argument("--randomstate", type=int, default=42)

    # parse args
    args = parser.parse_args()
    print(args)
    # return args
    return args


# run script
if __name__ == "__main__":
    # parse args
    args = parse_args()

    # run main function
    main(args)

Overwriting ./src/main.py


## Configure Command

- `display_name` display name for the job
- `description`  the description of the experiment
- `code` path where the code is located
- `command` command to run
- `inputs`  dictionary of name value pairs using `${{inputs.<input_name>}}`
    
    - To use files or folder - using the `Input` class
        
        - `type` defaults to a `uri_folder` but this can be set to `uri_file` or `uri_folder`
        - `path` is the path to the file or folder.  These can be local or remote leveraging **https, http, wasb`
        
            - To use an Azure ML dataset, this would be an Input `Input(type='uri_folder', path='my_dataset:1')`
            
            - `mode` is how the data should be delivered to the compute which include `ro_mount`(default), `rw_mount` and `download`

- `environment`: environment to be used by compute when running command
- `compute`: can be `local`, or a specificed compute name
- `distribution`: distribution to leverage for distributed training scenerios including:
        
    - `Pytorch`
    - `TensorFlow`
    - `MPI`
            

In [55]:
# create the command
from azure.ai.ml import command
from azure.ai.ml import Input

myjob = command(
    code="./src",  # local path where the code is stored
    command="python main.py --titanic ${{inputs.titanic}} --randomstate ${{inputs.randomstate}}",
    inputs={
        "titanic": Input(
            type="uri_file",
            path="azureml:titanic_prepped:1",
        ),
        "randomstate": 0,
    },
    environment="job_base_env@latest",
    compute="cpu-cluster",
    display_name="sklearn-titanic",
    # description,
    # experiment_name
)

In [56]:
script_folder = os.path.join(os.getcwd(), "job")
print(script_folder)
os.makedirs(script_folder, exist_ok=True)

/mnt/batch/tasks/shared/LS_root/mounts/clusters/memasanz1/code/Users/memasanz/ML-Engineering-with-Azure-Machine-Learning-Service/Chapter6/job


## Run Command with SDK

In [57]:
# submit the command
returned_job = ml_client.create_or_update(myjob)

### Register the Model 

Using the Python SDK V2 - we can register the Model for use.  

Parameters for model registration include:

- `path` - A remote uri or local path pointing at the model
- `name` - A string value
- `description` - A description for the model
- `type` - valid values include: 
    - "custom_model"
    - "mlflow_model" 
    - "triton_model".  
    
* Instead of typing out the `type`, you can use the AssetTypes in the namespace azure.ai.ml.constants as we have done below




In [58]:
runid = returned_job.name
print('runid:' + runid)
experiment = returned_job.experiment_name
print("experiment:" + experiment)

runid:coral_gas_1hz6njncjl
experiment:Chapter6


In [60]:
import mlflow
import time

exp = mlflow.get_experiment_by_name(experiment)
last_run = mlflow.search_runs(exp.experiment_id, output_format="list")[-1]

if last_run.info.run_id != runid:
    print('run ids were not the same - waiting for run id to update')
    time.sleep(5)
    exp = mlflow.get_experiment_by_name(experiment)
    last_run = mlflow.search_runs(exp.experiment_id, output_format="list")[-1]

while last_run.info.status == 'SCHEDULED':
  print('run is being scheduled')
  time.sleep(5)
  last_run = mlflow.search_runs(exp.experiment_id, output_format="list")[-1]

while last_run.info.status == 'RUNNING':
  print('job is being run')
  time.sleep(10)
  last_run = mlflow.search_runs(exp.experiment_id, output_format="list")[-1]

print("run_id:{}".format(last_run.info.run_id))
print('----------')
print("run_id:{}".format(last_run.info.status))

run_id:coral_gas_1hz6njncjl
----------
run_id:FINISHED


## Register the Model

In [64]:
#https://github.com/Azure/azureml-examples/blob/march-sdk-preview/sdk/assets/model/model.ipynb
from azure.ml._constants import ModelType

ModuleNotFoundError: No module named 'azure.ml'

In [73]:
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

# run_model = Model(
#     path=f"runs:/{last_run.info.run_id}/model",
#     name="chapter6_titanic_model",
#     description="Model created from run.",
#     type=AssetTypes.MLFLOW_MODEL 
# )
job_name = runid

run_model = Model(
    path="azureml://jobs/" + last_run.info.run_id  + "/outputs/artifacts/paths/model/",
    #Users could also use, "azureml://jobs/XXXXXXXXXXXXXXXXXXXXXXXXX/outputs/artifacts/paths/model/" as a shorthand to the same location
    name="run-model-example",
    description="Model created from run.",
    type=AssetTypes.MLFLOW_MODEL
)

ml_client.models.create_or_update(run_model) 

Model({'job_name': 'coral_gas_1hz6njncjl', 'is_anonymous': False, 'auto_increment_version': False, 'name': 'run-model-example', 'description': 'Model created from run.', 'tags': {}, 'properties': {}, 'id': '/subscriptions/5da07161-3770-4a4b-aa43-418cbbb627cf/resourceGroups/aml-workspace-rg/providers/Microsoft.MachineLearningServices/workspaces/aml-workspace/models/run-model-example/versions/1', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/memasanz1/code/Users/memasanz/ML-Engineering-with-Azure-Machine-Learning-Service/Chapter6', 'creation_context': <azure.ai.ml._restclient.v2022_05_01.models._models_py3.SystemData object at 0x7f0625405700>, 'serialize': <msrest.serialization.Serializer object at 0x7f062546fa60>, 'version': '1', 'latest_version': None, 'path': 'azureml://subscriptions/5da07161-3770-4a4b-aa43-418cbbb627cf/resourceGroups/aml-workspace-rg/workspaces/aml-workspace/datastores/workspaceartifactstore/paths/ExperimentRun/dcid.cora

In [74]:
run_model

Model({'job_name': None, 'is_anonymous': False, 'auto_increment_version': True, 'name': 'run-model-example', 'description': 'Model created from run.', 'tags': {}, 'properties': {}, 'id': None, 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/memasanz1/code/Users/memasanz/ML-Engineering-with-Azure-Machine-Learning-Service/Chapter6', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7f062546fbe0>, 'version': None, 'latest_version': None, 'path': 'azureml://jobs/coral_gas_1hz6njncjl/outputs/artifacts/paths/model/', 'utc_time_created': None, 'flavors': None, 'arm_type': 'model_version', 'type': 'mlflow_model'})

In [75]:
run_model.path

'azureml://jobs/coral_gas_1hz6njncjl/outputs/artifacts/paths/model/'

In [None]:
#'path': 'azureml://subscriptions/5da07161-3770-4a4b-aa43-418cbbb627cf/resourceGroups/aml-workspace-rg/workspaces/aml-workspace/datastores/workspaceartifactstore/paths/ExperimentRun/dcid.shy_brake_7ts0t3rj8w/model