In [2]:
import configparser
import os

from azure.ai.ml import MLClient
from azure.ai.ml import command, Input
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities import AmlCompute,Environment
from azure.ai.ml.sweep import Choice, Uniform, MedianStoppingPolicy
from azure.identity import DefaultAzureCredential

In [3]:
config = configparser.ConfigParser()
config.read('config.ini')
subscription_id = config.get('Azure', 'subscription_id')
resource_group = config.get('Azure', 'resource_group')
workspace_name = config.get('Azure', 'workspace')
datastore_name = config.get('Azure', 'datastore_name')
clean_data_path = config.get('Azure', 'clean_data_path')
print(subscription_id, resource_group, workspace_name, datastore_name, clean_data_path)

f3aa5221-5b34-4091-bcec-acf7b816f5b6 GrpTrabajo1 GrpTrabajo1 workspaceblobstore UI/clean_data.csv


In [4]:
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential)
ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)

Found the config file in: /config.json


In [5]:
cpu_compute_target = "cluster-trabajo1"

try:
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
except Exception:
    cpu_cluster = AmlCompute(
        name=cpu_compute_target,
        type="amlcompute",
        size="STANDARD_DS12_V2",
        min_instances=0,
        max_instances=6,
        idle_time_before_scale_down=120,
        tier="Dedicated",
    )

    cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster).result()

# Configuración de Entorno

In [6]:
env_name = 'sklearn-env'
job_env = Environment(
    name=env_name,
    description="sklearn 0.24.2",
    conda_file='./env/conda.yaml',
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)
job_env = ml_client.environments.create_or_update(job_env)

In [7]:
dirT = ml_client.data.get(name="Mobile-Data", version='initial')
my_training_data_input  = Input(type=AssetTypes.URI_FILE, path=dirT.path)

# Árbol de decisión

In [8]:
job = command(
    code="./src",
    command="python train.py --dataset_path ${{inputs.dataset_path}} --min_samples_split ${{inputs.min_samples_split}} --criterion ${{inputs.criterion}}",
    environment=f"{job_env.name}:{job_env.version}",
    experiment_name='mobile-decision-tree-hyp',
    display_name="mobile-exp-decision-tree-hyp",
    inputs={
        "dataset_path": my_training_data_input,
        "min_samples_split": 3,
        "criterion": "entropy",
    },
    compute=cpu_compute_target,
)


In [8]:
returned_job = ml_client.jobs.create_or_update(job)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading src (0.0 MBs): 100%|███

In [None]:
job_for_sweep = job(
    min_samples_split=Choice(values=[3,5,7,9]),
    criterion=Choice(values=['entropy','gini']),
)

sweep_job = job_for_sweep.sweep(
    compute=cpu_compute_target,
    sampling_algorithm="random",
    primary_metric="F1 Score",
    goal="Maximize",
    max_total_trials=12,
    max_concurrent_trials=4,
)

returned_sweep_job = ml_client.create_or_update(sweep_job)
ml_client.jobs.stream(returned_sweep_job.name)

# Voting

In [9]:
job_voting = command(
    code="./src",
    command="python train_voting.py --dataset_path ${{inputs.dataset_path}} --n_estimators ${{inputs.n_estimators}} --voting ${{inputs.voting}}",
    environment=f"{job_env.name}:{job_env.version}",
    experiment_name='mobile-exp-voting-hyp',
    display_name="mobile-exp-voting-hyp",
    inputs={
        "dataset_path": my_training_data_input,
        "n_estimators": 100,
        "voting": "hard",
    },
    compute=cpu_compute_target,
)

In [10]:
returned_job = ml_client.jobs.create_or_update(job_voting)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading src (0.01 MBs): 100%|██

In [15]:
job_voting_for_sweep = job_voting(
    n_estimators=Choice(values=[30, 50, 100]),
    voting=Choice(values=['soft', 'hard']),
)

sweep_job_voting = job_voting_for_sweep.sweep(
    compute=cpu_compute_target,
    sampling_algorithm="random",
    primary_metric="F1 Score",
    goal="Maximize",
    max_total_trials=12,
    max_concurrent_trials=4,
)

returned_sweep_job_voting = ml_client.create_or_update(sweep_job_voting)
ml_client.jobs.stream(returned_sweep_job_voting.name)

RunId: lime_squash_wx817bx72t
Web View: https://ml.azure.com/runs/lime_squash_wx817bx72t?wsid=/subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/resourcegroups/GrpTrabajo1/workspaces/GrpTrabajo1

Streaming azureml-logs/hyperdrive.txt

[2023-10-30T01:47:35.821238][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space
[2023-10-30T01:47:36.2874705Z][SCHEDULER][INFO]Scheduling job, id='lime_squash_wx817bx72t_0' 
[2023-10-30T01:47:36.4087939Z][SCHEDULER][INFO]Scheduling job, id='lime_squash_wx817bx72t_1' 
[2023-10-30T01:47:36.5678817Z][SCHEDULER][INFO]Scheduling job, id='lime_squash_wx817bx72t_2' 
[2023-10-30T01:47:36.6988950Z][SCHEDULER][INFO]Scheduling job, id='lime_squash_wx817bx72t_3' 
[2023-10-30T01:47:36.602930][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.
[2023-10-30T01:47:36.8170379Z][SCHEDULER][INFO]Successfully scheduled a job. Id='lime_squash_wx817bx72t_0' 
[2023-10-30T01:47:36.8543309Z][SCHEDULER][INFO]Suc