In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="alexei-ml")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: airline-ws
Azure region: francecentral
Subscription id: af783b88-9530-433e-9520-32a8accf75a5
Resource group: alexeirg


In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "airline-cluster"
vm_size = "Standard_D2_V2"

try:
    computeTarget = ComputeTarget(workspace=ws, name=cluster_name)
    print('Cluster already exists, reutilize it.')
except ComputeTargetException:
    print('Cluster compute cluster form scratch.')
    compute_config = AmlCompute.provisioning_configuration(vm_size, max_nodes=2)
    computeTarget = ComputeTarget.create(ws, cluster_name, compute_config)

computeTarget.wait_for_completion(show_output=True)

Cluster compute cluster form scratch.
InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [9]:
# Try to load the dataset from the Workspace. Otherwise, create it from the file
# NOTE: update the key to match the dataset name
found = False
key = "airline-ds"
description_text = "Airline DataSet"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
    dataset = Dataset.get_by_name(ws, name=key)
    df = dataset.to_pandas_dataframe()

In [10]:
df = dataset.to_pandas_dataframe()
df.head(5)

Unnamed: 0,Airline,Flight,AirportFrom,AirportTo,DayOfWeek,Time,Length,Delay
0,CO,269,SFO,IAH,3,15,205,1
1,US,1558,PHX,CLT,3,15,222,1
2,AA,2400,LAX,DFW,3,20,165,1
3,AA,2466,SFO,DFW,3,20,195,1
4,AS,108,ANC,SEA,3,30,202,0


In [4]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
parameter_space = {"--C": uniform(0.0001, 0.001), "--max_iter": choice(50,100,150)}
ps = RandomParameterSampling(parameter_space)


# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "training" not in os.listdir() : os.mkdir("./training")

# # Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

# # Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(
    source_directory=".",
    script='train.py',
    compute_target=cluster_name,
    environment=sklearn_env
)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=ps,
    primary_metric_name='Accuracy',
    primary_metric_goal= PrimaryMetricGoal("MAXIMIZE"),
    max_total_runs=20,
    max_concurrent_runs=4,
    policy=policy
)

In [13]:
# Submit hyperdrive run to the experiment and show run details with the widget.
hyperdrive_run = exp.submit(hyperdrive_config, show_output=True)
RunDetails(hyperdrive_run).show()

hyperdrive_run.wait_for_completion(show_output=True)

assert(hyperdrive_run.get_status() == "Completed")

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_2547facb-198f-4d28-b0be-26fb52a3b5af
Web View: https://ml.azure.com/runs/HD_2547facb-198f-4d28-b0be-26fb52a3b5af?wsid=/subscriptions/af783b88-9530-433e-9520-32a8accf75a5/resourcegroups/alexeirg/workspaces/airline-ws&tid=5b38c313-3bf3-4f5b-90e6-8e32480e8986

Streaming azureml-logs/hyperdrive.txt

[2022-08-04T10:06:23.540950][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space
[2022-08-04T10:06:24.5174255Z][SCHEDULER][INFO]Scheduling job, id='HD_2547facb-198f-4d28-b0be-26fb52a3b5af_0' 
[2022-08-04T10:06:24.6770069Z][SCHEDULER][INFO]Scheduling job, id='HD_2547facb-198f-4d28-b0be-26fb52a3b5af_1' 
[2022-08-04T10:06:24.8421976Z][SCHEDULER][INFO]Scheduling job, id='HD_2547facb-198f-4d28-b0be-26fb52a3b5af_2' 
[2022-08-04T10:06:24.8661817Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_2547facb-198f-4d28-b0be-26fb52a3b5af_0' 
[2022-08-04T10:06:24.9199777Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_2547facb-198f-4d28-b0be-26fb52a3b5af_1' 
[2022-08-

To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code CCWGXLY7F to authenticate.


AuthenticationError: AuthenticationError:
	Message: AADSTS70016: OAuth 2.0 device flow error. Authorization is pending. Continue polling.
Trace ID: 203df8a8-0f5b-4e0c-b05a-4967adf74000
Correlation ID: d7e08a1f-2878-4592-ac60-198e62e0b421
Timestamp: 2022-08-04 10:27:42Z
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "AADSTS70016: OAuth 2.0 device flow error. Authorization is pending. Continue polling.\r\nTrace ID: 203df8a8-0f5b-4e0c-b05a-4967adf74000\r\nCorrelation ID: d7e08a1f-2878-4592-ac60-198e62e0b421\r\nTimestamp: 2022-08-04 10:27:42Z"
    }
}

In [None]:
import joblib
# Get your best run and save the model from that run.

best_hyperdrive_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_hyperdrive_run.get_metrics()

print('Best run:', best_hyperdrive_run)
print('Metrics:', best_run_metrics)

model = best_hyperdrive_run.register_model(model_name="best_hyperdrive_model", model_path="./outputs/model.pkl")

In [None]:
cluster_name.delete()