In [1]:
from azureml.core import Workspace

# Read the workspace config from file
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

Found the config file in: /data/home/demouser/notebooks/MTC_AzureAILabs/ML-AML-Walkthrough/aml_config/config.json
jkamlworkshop
jkamlworkshop
southcentralus
952a710c-8d9c-40c1-9fec-f752138cc0b3


In [2]:
# Create an Azure ML Compute cluster

# Create Azure ML cluster
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
cluster_name = "cpu-cluster"
cluster_min_nodes = 1
cluster_max_nodes = 3
vm_size = "STANDARD_DS11_V2"

# Check if the cluster exists. If yes connect to it
if cluster_name in ws.compute_targets:
    compute_target = ws.compute_targets[cluster_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('Found existing compute target, using this compute target instead of creating:  ' + cluster_name)
    else:
        print("Error: A compute target with name ",cluster_name," was found, but it is not of type AmlCompute.")
else:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size, 
                                                                min_nodes = cluster_min_nodes, 
                                                                max_nodes = cluster_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, provisioning_config)
    
    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
     # For a more detailed view of current BatchAI cluster status, use the 'status' property    
    print(compute_target.status.serialize())

Found existing compute target, using this compute target instead of creating:  cpu-cluster


In [4]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core import Run
from azureml.core import ScriptRunConfig

# create a new RunConfig object
run_config = RunConfiguration(framework="python")

# Azure ML Compute cluster for Automated ML jobs require docker.
run_config.environment.docker.enabled = True

# Set compute target to Azure ML Compute cluster
run_config.target = compute_target

# Set data references
#run_config.data_references = {ds.name: dr}


In [5]:
import os
project_folder = './project'
script_name = 'get_data.py'
os.makedirs(project_folder, exist_ok=True)

In [6]:
%%writefile $project_folder/get_data.py

from sklearn import datasets
from scipy import sparse
import numpy as np

def get_data():
    
    digits = datasets.load_digits()
    X_train = digits.data
    y_train = digits.target

    return { "X" : X_train, "y" : y_train }

Overwriting ./project/get_data.py


In [11]:
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun
import logging


automl_settings = {
    "iteration_timeout_minutes": 2,
    "iterations": 20,
    "n_cross_validations": 5,
    "primary_metric": 'AUC_weighted',
    "preprocess": False,
    "max_concurrent_iterations": 5,
    "verbosity": logging.INFO
}

automl_config = AutoMLConfig(task = 'classification',
                             debug_log = 'automl_errors.log',
                             path = project_folder,
                             run_configuration=run_config,
                             data_script = project_folder + "/get_data.py",
                             **automl_settings
                            )

In [13]:
from azureml.core.experiment import Experiment

experiment_name = 'automl-remote-batchai'
experiment = Experiment(ws, experiment_name)


In [14]:

remote_run = experiment.submit(automl_config, show_output = True)

Running on remote compute: cpu-cluster
Parent Run ID: AutoML_9f65abc9-1cbc-43a3-a965-752d3f32b900
*******************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
*******************************************************************************************

 ITERATION   PIPELINE                                       DURATION      METRIC      BEST
         0   StandardScalerWrapper KNN                      0:01:01       0.9982    0.9982
         1   StandardScalerWrapper KNN                      0:01:30       0.9989    0.9989
         2   MaxAbsScaler LightGBM                          0:02:18       0.9968    0.9989
         3   RobustScaler LinearSVM                         0:02:49       0.9846    0.9989
     