In [3]:
import configparser

from azure.ai.ml import automl, MLClient
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities import AmlCompute, Data
from azure.identity import DefaultAzureCredential
from azure.core.exceptions import ResourceNotFoundError
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import Input
from azure.ai.ml.automl import ClassificationModels

In [4]:
config = configparser.ConfigParser()
config.read('config.ini')
subscription_id = config.get('Azure', 'subscription_id')
resource_group = config.get('Azure', 'resource_group')
workspace_name = config.get('Azure', 'workspace')
datastore_name = config.get('Azure', 'datastore_name')
clean_data_path = config.get('Azure', 'clean_data_path')
print(subscription_id, resource_group, workspace_name, datastore_name, clean_data_path)

f3aa5221-5b34-4091-bcec-acf7b816f5b6 GrpTrabajo1 GrpTrabajo1 workspaceblobstore UI/clean_data.csv


In [5]:
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential)
ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)

Found the config file in: /config.json


In [6]:
workspace = ml_client.workspaces.get(name=workspace_name)

output = {}
output["Workspace"] = workspace_name
output["Subscription ID"] = subscription_id
output["Resource Group"] = resource_group
output["Location"] = workspace.location
output

{'Workspace': 'GrpTrabajo1',
 'Subscription ID': 'f3aa5221-5b34-4091-bcec-acf7b816f5b6',
 'Resource Group': 'GrpTrabajo1',
 'Location': 'eastus'}

In [7]:
my_path = "./data/train.csv"
v1 = "initial"

my_data = Data(
    name="Mobile-Data",
    version=v1,
    description="Mobile Data",
    path=my_path,
    type=AssetTypes.URI_FILE,
)
try:
    data_asset = ml_client.data.get(name="Mobile-Data", version=v1)
    print(
        f"Data asset already exists. Name: {my_data.name}, version: {my_data.version}"
    )
except:
    ml_client.data.create_or_update(my_data)
    print(f"Data asset created. Name: {my_data.name}, version: {my_data.version}")

Data asset already exists. Name: Mobile-Data, version: initial


In [8]:
data_asset = ml_client.data.get(name="Mobile-Data", version=v1)
print(f"Data asset URI: {data_asset.path}")

Data asset URI: azureml://subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/resourcegroups/GrpTrabajo1/workspaces/GrpTrabajo1/datastores/workspaceblobstore/paths/LocalUpload/f55f422ab7ad21f1efb148ac7a59ac7d/train.csv


In [40]:
import pandas as pd
df = pd.read_csv(data_asset.path)
df.head()

Unnamed: 0,battery_power,px_height,px_width,ram,price_range
0,1703,286,1235,1046,0
1,1445,1273,1345,1441,1
2,1087,295,589,690,0
3,671,852,1182,2504,2
4,1472,710,1052,2677,2


In [11]:
!pip install -U azureml-fsspec

Requirement already up-to-date: azureml-fsspec in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (1.2.0)


In [None]:
!pip install mltable

In [None]:
import mltable
from mltable import MLTableHeaders, MLTableFileEncoding, DataType
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

paths = [{"file": data_asset.path}]
tbl = mltable.from_delimited_files(
    paths=paths,
    delimiter=",",
    header=MLTableHeaders.all_files_same_headers,
    infer_column_types=True,
    include_path_column=False,
    encoding=MLTableFileEncoding.utf8,
)
print(tbl.show())
tbl_v = "1"
mltable_folder = "./MobileDataT"
tbl.save(mltable_folder)
my_data = Data(
    path=mltable_folder,
    type=AssetTypes.MLTABLE,
    description="Mobile Data MlTable",
    name="MobileDataT",
    version=tbl_v,
)
ml_client.data.create_or_update(my_data)

# CREAMOS EL MLTABLE DATA INPUT

In [12]:
dirT = ml_client.data.get(name='MobileDataT', version='1').path
my_training_data_input  = Input(type=AssetTypes.MLTABLE, path=dirT)

# CREAMOS EL COMPUTE-CLUSTER PARA EL TRABAJO

In [59]:
cpu_compute_target = "cluster-trabajo1"

try:
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
except Exception:
    cpu_cluster = AmlCompute(
        name=cpu_compute_target,
        type="amlcompute",
        size="STANDARD_DS12_V2",
        min_instances=0,
        max_instances=6,
        idle_time_before_scale_down=120,
        tier="Dedicated",
    )

    cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster).result()

# CREAMOS LA CONFIGURACION DEL AUTOML CLASSIF JOB

In [55]:
exp_name = 'experimento-trabajo1'

In [60]:
classification_job = automl.classification(
    compute=cpu_compute_target,
    experiment_name=exp_name,
    training_data=my_training_data_input,
    target_column_name="price_range",
    primary_metric="accuracy",
    n_cross_validations=5,
    enable_model_explainability=True,
    tags={"my_custom_tag": "My custom value"},
)

classification_job.set_limits(
    timeout_minutes=60,
    trial_timeout_minutes=20,
    max_trials=5,
    enable_early_termination=True,
)

classification_job.set_training(
    blocked_training_algorithms=[ClassificationModels.LOGISTIC_REGRESSION],
    enable_onnx_compatible_models=True,
)

# CORREMOS EL COMANDO USANDO MLCLIENT

In [61]:
returned_job = ml_client.jobs.create_or_update(
    classification_job
)  
print(f"Created job: {returned_job}")

Created job: compute: azureml:cluster-trabajo1
creation_context:
  created_at: '2023-10-29T20:37:38.475862+00:00'
  created_by: Boris Bellido
  created_by_type: User
display_name: maroon_boot_jb0dh2jnb6
experiment_name: experimento-trabajo1
id: azureml:/subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/resourceGroups/GrpTrabajo1/providers/Microsoft.MachineLearningServices/workspaces/GrpTrabajo1/jobs/maroon_boot_jb0dh2jnb6
limits:
  enable_early_termination: true
  max_concurrent_trials: 1
  max_cores_per_trial: -1
  max_nodes: 1
  max_trials: 5
  timeout_minutes: 60
  trial_timeout_minutes: 20
log_verbosity: info
n_cross_validations: 5
name: maroon_boot_jb0dh2jnb6
outputs: {}
primary_metric: accuracy
properties: {}
resources:
  instance_count: 1
  shm_size: 2g
services:
  Studio:
    endpoint: https://ml.azure.com/runs/maroon_boot_jb0dh2jnb6?wsid=/subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/resourcegroups/GrpTrabajo1/workspaces/GrpTrabajo1&tid=4b70a299-063f-4165-a45c-b28e92ad4d

[Handler.handle()] Failed to read or parse request from socket: Expecting value: line 1 column 1 (char 0)
[Handler.handle()] Failed to read or parse request from socket: 'utf-8' codec can't decode byte 0xe4 in position 4: invalid continuation byte
[Handler.handle()] Failed to handle request due to exception class=JSONDecodeError, message=Expecting value: line 1 column 1 (char 0)
[Handler.handle()] Failed to handle request due to exception class=UnicodeDecodeError, message='utf-8' codec can't decode byte 0xe4 in position 4: invalid continuation byte
[Handler.handle()] Failed to read or parse request from socket: 'utf-8' codec can't decode byte 0xe4 in position 4: invalid continuation byte
[Handler.handle()] Failed to handle request due to exception class=UnicodeDecodeError, message='utf-8' codec can't decode byte 0xe4 in position 4: invalid continuation byte
[Handler.handle()] Failed to read or parse request from socket: 'utf-8' codec can't decode byte 0x80 in position 13: invalid start

# ENCONTRAMOS AL MEJOR MODELO

In [13]:
import mlflow

# Obtain the tracking URL from MLClient
MLFLOW_TRACKING_URI = ml_client.workspaces.get(
    name=ml_client.workspace_name
).mlflow_tracking_uri

print(MLFLOW_TRACKING_URI)

azureml://eastus.api.azureml.ms/mlflow/v1.0/subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/resourceGroups/GrpTrabajo1/providers/Microsoft.MachineLearningServices/workspaces/GrpTrabajo1


In [14]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

print("\nCurrent tracking uri: {}".format(mlflow.get_tracking_uri()))


Current tracking uri: azureml://eastus.api.azureml.ms/mlflow/v1.0/subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/resourceGroups/GrpTrabajo1/providers/Microsoft.MachineLearningServices/workspaces/GrpTrabajo1


In [15]:
from mlflow.tracking.client import MlflowClient
from mlflow.artifacts import download_artifacts

# Initialize MLFlow client
mlflow_client = MlflowClient()

**Ubicamos al automl parent job**

In [16]:
job_name = returned_job.name

# Example if providing an specific Job name/ID
# job_name = "b4e95546-0aa1-448e-9ad6-002e3207b4fc"

# Get the parent run
mlflow_parent_run = mlflow_client.get_run(job_name)

print("Parent Run: ")
print(mlflow_parent_run)
print(mlflow_parent_run.data.tags)

Parent Run: 
<Run: data=<RunData: metrics={'AUC_macro': 0.9914267999587644,
 'AUC_micro': 0.9920944010416667,
 'AUC_weighted': 0.991363427122233,
 'accuracy': 0.92125,
 'average_precision_score_macro': 0.9742055700057497,
 'average_precision_score_micro': 0.9770671504489699,
 'average_precision_score_weighted': 0.9740868701197549,
 'balanced_accuracy': 0.9229265668679002,
 'f1_score_macro': 0.9217469706919792,
 'f1_score_micro': 0.92125,
 'f1_score_weighted': 0.9214907633789032,
 'log_loss': 0.28477476979941424,
 'matthews_correlation': 0.8951160112608332,
 'norm_macro_recall': 0.8972354224905335,
 'precision_score_macro': 0.9215807695058326,
 'precision_score_micro': 0.92125,
 'precision_score_weighted': 0.922712428513892,
 'recall_score_macro': 0.9229265668679002,
 'recall_score_micro': 0.92125,
 'recall_score_weighted': 0.92125,
 'weighted_accuracy': 0.9196608356192824}, params={}, tags={'automl_best_child_run_id': 'maroon_boot_jb0dh2jnb6_3',
 'fit_time_000': '0.3878024;0.3851692;0.

# ENCONTRAMOS LA MEJOR CORRIDA

In [17]:
best_child_run_id = mlflow_parent_run.data.tags["automl_best_child_run_id"]
print("Found best child run id: ", best_child_run_id)

best_run = mlflow_client.get_run(best_child_run_id)

print("Best child run: ")
print(best_run)

Found best child run id:  maroon_boot_jb0dh2jnb6_3
Best child run: 
<Run: data=<RunData: metrics={'AUC_macro': 0.9914267999587644,
 'AUC_micro': 0.9920944010416667,
 'AUC_weighted': 0.991363427122233,
 'accuracy': 0.92125,
 'average_precision_score_macro': 0.9742055700057497,
 'average_precision_score_micro': 0.9770671504489699,
 'average_precision_score_weighted': 0.9740868701197549,
 'balanced_accuracy': 0.9229265668679002,
 'f1_score_macro': 0.9217469706919792,
 'f1_score_micro': 0.92125,
 'f1_score_weighted': 0.9214907633789032,
 'log_loss': 0.28477476979941424,
 'matthews_correlation': 0.8951160112608332,
 'norm_macro_recall': 0.8972354224905335,
 'precision_score_macro': 0.9215807695058326,
 'precision_score_micro': 0.92125,
 'precision_score_weighted': 0.922712428513892,
 'recall_score_macro': 0.9229265668679002,
 'recall_score_micro': 0.92125,
 'recall_score_weighted': 0.92125,
 'weighted_accuracy': 0.9196608356192824}, params={}, tags={'mlflow.parentRunId': 'maroon_boot_jb0dh2

# OBTENEMOS LAS METRICAS DEL MEJOR

In [18]:
best_run.data.metrics

{'f1_score_macro': 0.9217469706919792,
 'recall_score_micro': 0.92125,
 'norm_macro_recall': 0.8972354224905335,
 'AUC_macro': 0.9914267999587644,
 'accuracy': 0.92125,
 'average_precision_score_micro': 0.9770671504489699,
 'average_precision_score_weighted': 0.9740868701197549,
 'weighted_accuracy': 0.9196608356192824,
 'precision_score_weighted': 0.922712428513892,
 'recall_score_macro': 0.9229265668679002,
 'AUC_weighted': 0.991363427122233,
 'log_loss': 0.28477476979941424,
 'precision_score_micro': 0.92125,
 'average_precision_score_macro': 0.9742055700057497,
 'AUC_micro': 0.9920944010416667,
 'precision_score_macro': 0.9215807695058326,
 'recall_score_weighted': 0.92125,
 'f1_score_micro': 0.92125,
 'balanced_accuracy': 0.9229265668679002,
 'f1_score_weighted': 0.9214907633789032,
 'matthews_correlation': 0.8951160112608332}

# DESCARGAMOS EL MEJOR MODELO

In [19]:
import os

# Create local folder
local_dir = "./artifact_downloads"
if not os.path.exists(local_dir):
    os.mkdir(local_dir)

In [20]:
local_path = download_artifacts(
    run_id=best_run.info.run_id, artifact_path="outputs", dst_path=local_dir
)
print("Artifacts downloaded in: {}".format(local_path))
print("Artifacts: {}".format(os.listdir(local_path)))

Artifacts downloaded in: /mnt/batch/tasks/shared/LS_root/mounts/clusters/pc2/code/Users/jboris.bsm/Trabajo1/artifact_downloads/outputs
Artifacts: ['conda_env_v_1_0_0.yml', 'engineered_feature_names.json', 'env_dependencies.json', 'featurization_summary.json', 'generated_code', 'internal_cross_validated_models.pkl', 'mlflow-model', 'model.onnx', 'model.pkl', 'model_onnx.json', 'pipeline_graph.json', 'run_id.txt', 'scoring_file_pbi_v_1_0_0.py', 'scoring_file_v_1_0_0.py', 'scoring_file_v_2_0_0.py']


In [None]:
os.listdir("./artifact_downloads/outputs/mlflow-model")

# REGISTRAMOS EL MEJOR MODELO

## 1 CREANDO ENDPOINT

In [21]:
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    ProbeSettings,
)
from azure.ai.ml.constants import ModelType

In [22]:
import datetime

online_endpoint_name = "mobileClassif-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is a sample online endpoint for mlflow model",
    auth_mode="key",
    tags={"foo": "bar"},
)

In [23]:
ml_client.begin_create_or_update(endpoint).result()

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://mobileclassif-10300242231395.eastus.inference.ml.azure.com/score', 'openapi_uri': 'https://mobileclassif-10300242231395.eastus.inference.ml.azure.com/swagger.json', 'name': 'mobileclassif-10300242231395', 'description': 'this is a sample online endpoint for mlflow model', 'tags': {'foo': 'bar'}, 'properties': {'azureml.onlineendpointid': '/subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/resourcegroups/grptrabajo1/providers/microsoft.machinelearningservices/workspaces/grptrabajo1/onlineendpoints/mobileclassif-10300242231395', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/oe:f49da61b-a267-4e18-b328-3e9e8da5ea4f:643f15ec-57d8-475b-aa36-7ff24933a5cb?api-version=2022-02-01-preview'}, 'print_as_yaml': True, 'id': '/subscriptions/f3aa

## 2 Register best model and deploy

In [28]:
model_name = "Mobile-model"
model = Model(
    path=f"azureml://jobs/{best_run.info.run_id}/outputs/artifacts/outputs/mlflow-model/",
    name=model_name,
    description="my sample classification model",
    type=AssetTypes.MLFLOW_MODEL,
)

registered_model = ml_client.models.create_or_update(model)

# Deployamos

In [33]:
deployment = ManagedOnlineDeployment(
    name="Mobile-deploy",
    endpoint_name=online_endpoint_name,
    model=registered_model.id,
    instance_type="Standard_DS3_V2",
    instance_count=1,
    liveness_probe=ProbeSettings(
        failure_threshold=30,
        success_threshold=1,
        timeout=2,
        period=10,
        initial_delay=2000,
    ),
    readiness_probe=ProbeSettings(
        failure_threshold=10,
        success_threshold=1,
        timeout=10,
        period=10,
        initial_delay=2000,
    ),
)

..

In [None]:
ml_client.online_deployments.begin_create_or_update(deployment).result()

In [None]:
endpoint.traffic = {"Mobile-deploy": 100}
ml_client.begin_create_or_update(endpoint)

# Testeamos

In [None]:
import pandas as pd

test_data = pd.read_csv("./data/test.csv")

test_data = test_data.drop("price_range", axis=1)

test_data_json = test_data.to_json(orient="records", indent=4)
data = (
    '{ \
          "input_data": {"data": '
    + test_data_json
    + "}}"
)

request_file_name = "mobile.json"

with open(request_file_name, "w") as request_file:
    request_file.write(data)

ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    deployment_name="Mobile-deploy",
    request_file=request_file_name,
)

# Endpoint

In [None]:
endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)

# existing traffic details
print(endpoint.traffic)

# Get the scoring URI
print(endpoint.scoring_uri)

# Cargado de modelo y predicciones

In [None]:
!pip install azureml-mlflow

In [None]:
!pip install mlflow

In [None]:
retrain_data = Data(
    path="./data/retrained-mltable-folder/",
    type=AssetTypes.MLTABLE,
    description="Updated training dataset, includes validation data.",
    name="bankmarketing-retrain-data",
)
retrain_data = ml_client.data.create_or_update(retrain_data)

In [None]:
import mlflow

# Obtain the tracking URL from MLClient
MLFLOW_TRACKING_URI = ml_client.workspaces.get(
    name=ml_client.workspace_name
).mlflow_tracking_uri

print(MLFLOW_TRACKING_URI)

# Set the MLFLOW TRACKING URI
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

print("\nCurrent tracking uri: {}".format(mlflow.get_tracking_uri()))

# Descargar Script

In [None]:
from mlflow.tracking.client import MlflowClient

# Initialize MLFlow client
mlflow_client = MlflowClient()

# Get the parent run
job_name = returned_job.name
mlflow_parent_run = mlflow_client.get_run(job_name)
print("Parent Run: ")
print(mlflow_parent_run)

# Get the best model's child run
best_child_run_id = mlflow_parent_run.data.tags["automl_best_child_run_id"]
print("Found best child run id: ", best_child_run_id)
best_run = mlflow_client.get_run(best_child_run_id)

# Download run's artifacts/outputs
local_dir = "./artifact_downloads/"
if not os.path.exists(local_dir):
    os.mkdir(local_dir)
local_path = download_artifacts(
    run_id=best_run.info.run_id, artifact_path="outputs", dst_path=local_dir
)
print("Artifacts downloaded in: {}".format(local_path))
print("Artifacts: {}".format(os.listdir(local_path)))

In [None]:
import yaml

model_file = os.path.join(local_dir, "outputs", "mlflow-model", "MLmodel")

with open(model_file, "r") as model_stream:
    model_yaml = yaml.safe_load(model_stream)
    training_environment_name = (
        "AzureML-AutoML:" + model_yaml["metadata"]["azureml.base_image"].split(":")[-1]
    )
    print("Training emvironment {}".format(training_environment_name))

# Comando

In [None]:
command_str = f"python script.py --training_dataset_uri {retrain_data.path}"
command_job = command(
    code="./artifact_downloads/outputs/generated_code/",
    command=command_str,
    tags=dict(automl_child_run_id=best_run.info.run_id),
    environment=training_environment_name,
    compute=compute_name,
    experiment_name=exp_name,
)

script_job = ml_client.create_or_update(command_job)

In [None]:
script_job.studio_url
ml_client.jobs.stream(script_job.name)