# Part 3 - Model Training with Azure ML and Iguazio# 2. Azure AutoML

### Initialize MLRun Project

In [2]:
import mlrun
from mlrun import get_or_create_project, code_to_function, build_function, run_function

In [3]:
project = get_or_create_project(name="azure-fs-demo", context="./")

> 2022-02-18 01:26:10,012 [info] loaded project azure-fs-demo from MLRun DB


### Code to Function

In [4]:
# Python to MLRun Function
azure_automl = code_to_function(
    name="azure",
    filename="azureml_utils.py",
    kind="job",
    image=".mlrun/func-azure-fs-demo-azure:latest"
)
azure_automl.save()

'db://azure-fs-demo/azure'

### Build Docker Image

In [None]:
# Build Docker image (if not already built)
build_function(
    function="azure", 
    skip_deployed=False,
    with_mlrun=False,
    base_image="mlrun/mlrun:0.8.0",
    requirements="requirements.txt"
)

### Set Project Secrets

In [11]:
mlrun.get_run_db().create_project_secrets(
    project.name,
    provider=mlrun.api.schemas.SecretProviderName.kubernetes,
    secrets={
        "AZURE_TENANT_ID": "XXXXXXXX",
        "AZURE_SERVICE_PRINCIPAL_ID": "XXXXXXXX",
        "AZURE_SERVICE_PRINCIPAL_PASSWORD": "XXXXXXXX",
        "AZURE_SUBSCRIPTION_ID": "XXXXXXXX",
        "AZURE_RESOURCE_GROUP": "XXXXXXXX",
        "AZURE_WORKSPACE_NAME": "XXXXXXXX",
        "AZURE_STORAGE_CONNECTION_STRING": "XXXXXXXX"
    }
)

In [7]:
secrets_spec = mlrun.new_task().with_secrets(
    kind='kubernetes',
    source=[
        'AZURE_TENANT_ID',
        'AZURE_SERVICE_PRINCIPAL_ID',
        'AZURE_SERVICE_PRINCIPAL_PASSWORD',
        'AZURE_SUBSCRIPTION_ID',
        'AZURE_RESOURCE_GROUP',
        'AZURE_WORKSPACE_NAME',
        'AZURE_STORAGE_CONNECTION_STRING'
    ]
)

### AutoML Settings

In [8]:
# Azure ML settings for model training
automl_settings = {
    "task": 'classification',
    "enable_early_stopping" : False,
    "allowed_models": ['LogisticRegression', 'SGD', 'SVM'],
    "iterations" : 5,
    "n_cross_validations": 5,
    "primary_metric": 'accuracy',
    "featurization": 'off',
    "model_explainability": False,
    "enable_voting_ensemble": False,
    "enable_stack_ensemble": False
}

# MLRun input for dataset - FeatureVector that we previously created
inputs={
    "dataset" : "store://feature-vectors/azure-fs-demo/heart_disease_vec:latest"
}

# MLRun parameters for job
params={
    "experiment_name" : "azure-iguazio-blog",
    "cpu_cluster_name" : "azureml-cpu",
    "dataset_name" : "iris",
    "dataset_description" : "iris training data",
    "register_model_name": "iris-model",
    "label_column_name" : "target",
    "save_n_models" : 3,
    "automl_settings" : automl_settings
}

### Run AutoML Training Job

In [10]:
run_function(
    function="azure",
    handler="train",
    inputs=inputs,
    params=params,
    base_task=secrets_spec
)

> 2022-02-18 01:29:48,376 [info] starting run azure-train uid=464630c52a3142b18a9ff35178af38ef DB=http://mlrun-api:8080
> 2022-02-18 01:29:48,702 [info] Job is running in the background, pod: azure-train-l58pd
> 2022-02-18 01:29:51,994 [info] Server and client versions are not the same: {'parsed_server_version': VersionInfo(major=0, minor=9, patch=1, prerelease=None, build=None), 'parsed_client_version': VersionInfo(major=0, minor=8, patch=0, prerelease=None, build=None)}
> 2022-02-18 01:29:53,986 [info] Loading AzureML Workspace
> 2022-02-18 01:29:56,278 [info] Initializing AzureML experiment azure-iguazio-blog
> 2022-02-18 01:29:56,658 [info] Initializing AzureML compute target azureml-cpu
> 2022-02-18 01:30:04,727 [info] Found existing cluster, will use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
> 2022-02-18 01:30:04,891 [info] Connecting to AzureML experiment default datastore
> 2022-02-18 01:30:05,064 [info] Retri

project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
azure-fs-demo,...78af38ef,0,Feb 18 01:29:53,completed,azure-train,workflow=Nonev3io_user=nickkind=jobowner=nickhost=azure-train-l58pd,dataset,"experiment_name=azure-iguazio-blogcpu_cluster_name=azureml-cpudataset_name=irisdataset_description=iris training dataregister_model_name=iris-modellabel_column_name=targetsave_n_models=3automl_settings={'task': 'classification', 'enable_early_stopping': False, 'allowed_models': ['LogisticRegression', 'SGD', 'SVM'], 'iterations': 5, 'n_cross_validations': 5, 'primary_metric': 'accuracy', 'featurization': 'off', 'model_explainability': False, 'enable_voting_ensemble': False, 'enable_stack_ensemble': False}",dataset_blob_path=az://azureml-blobstore-27f8977b-4946-4ca0-bdc5-5a685d2fe8d7/iris.parquetbest_iteration=1precision_score_macro=1.0matthews_correlation=1.0recall_score_micro=1.0auc_micro=1.0precision_score_micro=1.0f1_score_micro=1.0accuracy=1.0weighted_accuracy=1.0average_precision_score_macro=1.0f1_score_macro=1.0precision_score_weighted=1.0auc_weighted=1.0recall_score_weighted=1.0average_precision_score_weighted=1.0norm_macro_recall=1.0balanced_accuracy=1.0f1_score_weighted=1.0auc_macro=1.0recall_score_macro=1.0log_loss=0.027781935797568047average_precision_score_micro=1.0,modelmodel_0model_1model_2





> 2022-02-18 01:39:23,610 [info] run executed, status=completed


<mlrun.model.RunObject at 0x7f6bf7f9a9d0>