![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/NotebookVM/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.png)

In [1]:
import os
import shutil
import requests
import tempfile
import azureml.core
from azureml.core import Workspace, Experiment, Datastore, Environment
from azureml.widgets import RunDetails
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep
from azureml.core.runconfig import DockerConfiguration
from azureml.core import Experiment
from azureml.core import Dataset

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

ws = Workspace.from_config()
# env = Environment.get(workspace=ws, name="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu")

# Enable Docker
# docker_config = DockerConfiguration(use_docker=True)

print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

experiment_name = 'train-on-nasaexperiment'
experiment = Experiment(workspace = ws, name = experiment_name)

SDK version: 1.38.0
mlprojecta
parta-projecta
koreacentral
e6ec79e7-c7e5-4312-85d0-75e8285c09dd


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

aml_compute_target1 = "cpu-cluster-automl1"
try:
    aml_compute1 = AmlCompute(ws, aml_compute_target1)
    print("found existing compute target.")
except ComputeTargetException:
    print("creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 1, 
                                                                max_nodes = 4,
                                                                identity_type="UserAssigned",
                                                                identity_id=['/subscriptions/e6ec79e7-c7e5-4312-85d0-75e8285c09dd/resourcegroups/PartA-projectA/providers/Microsoft.ManagedIdentity/userAssignedIdentities/testidentity'])    
    aml_compute1 = ComputeTarget.create(ws, aml_compute_target1, provisioning_config)
    aml_compute1.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
print("Azure Machine Learning Compute attached")

aml_compute_target2 = "cpu-cluster-custom1"
try:
    aml_compute2 = AmlCompute(ws, aml_compute_target2)
    print("found existing compute target.")
except ComputeTargetException:
    print("creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 1, 
                                                                max_nodes = 4,
                                                                identity_type="UserAssigned",
                                                                identity_id=['/subscriptions/e6ec79e7-c7e5-4312-85d0-75e8285c09dd/resourcegroups/PartA-projectA/providers/Microsoft.ManagedIdentity/userAssignedIdentities/testidentity'])    
    aml_compute2 = ComputeTarget.create(ws, aml_compute_target2, provisioning_config)
    aml_compute2.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
print("Azure Machine Learning Compute attached")


found existing compute target.
Azure Machine Learning Compute attached
found existing compute target.
Azure Machine Learning Compute attached


In [3]:
# Use a RunConfiguration to specify some additional requirements for this step.
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

# create a new runconfig object
run_config = RunConfiguration()

# enable Docker 
run_config.environment.docker.enabled = True

# set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False

# specify CondaDependencies obj
run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'], 
    pip_packages=['azureml-sdk[automl]', 'numpy', 'pandas', 'matplotlib'])

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [4]:
from azureml.core import ScriptRunConfig

# Uses default values for PythonScriptStep construct.

source_directory = './scripts'
print('Source directory for the step is {}.'.format(os.path.realpath(source_directory)))

os.makedirs(source_directory, exist_ok=True)
shutil.copy('../nasa_forecast/helper.py', source_directory)

step1 = PythonScriptStep(name="Auto ML",
                         script_name="auto-ml-forecasting-nasa.py", 
                         compute_target=aml_compute1, 
                         source_directory=source_directory,
                         runconfig=run_config,
                         allow_reuse=False)

# All steps use the same Azure Machine Learning compute target as well
step2 = PythonScriptStep(name="Custom ML",
                         script_name="custom-ml-forecasting-nasa.py", 
                         compute_target=aml_compute2, 
                         source_directory=source_directory,
                         runconfig=run_config,
                         allow_reuse=False)



Source directory for the step is /mnt/batch/tasks/shared/LS_root/mounts/clusters/testj/code/Users/brotheroak/PartA-ProjectA/model/pipeline_test/scripts.


In [5]:
# list of steps to run
steps = [step1, step2]
print("Step lists created")

pipeline1 = Pipeline(workspace=ws, steps=steps)
print ("Pipeline is built")

Step lists created
Pipeline is built


In [6]:
pipeline1.validate()
print("Pipeline validation complete")

Step Auto ML is ready to be created [f3b525c0]Step Custom ML is ready to be created [b50d56df]

Pipeline validation complete


In [7]:
pipeline_run1 = Experiment(ws, 'TimeSeriesMetric').submit(pipeline1, regenerate_outputs=False)
print("Pipeline is submitted for execution")

Created step Auto ML [f3b525c0][6b48130b-1704-49ca-985d-9ea83670f1c0], (This step will run and generate new outputs)Created step Custom ML [b50d56df][5890c13e-cc2f-45e4-830a-1a2160e252d6], (This step will run and generate new outputs)

Submitted PipelineRun b643b6a6-5320-4d54-9655-17c2aadbd64a
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/b643b6a6-5320-4d54-9655-17c2aadbd64a?wsid=/subscriptions/e6ec79e7-c7e5-4312-85d0-75e8285c09dd/resourcegroups/parta-projecta/workspaces/mlprojecta&tid=6331b4f9-e453-4d79-805b-a5af2809ef32
Pipeline is submitted for execution


In [8]:
from azureml.core import Dataset

dataset = Dataset.get_by_id(ws, "58e1e0ba-8e66-4b9c-87d8-92d0884ce93b")
df = dataset.to_pandas_dataframe()

In [9]:
cnt4nm = df.count(axis=1)[-1]

In [10]:
# pipeline_run1.get_details()
submitter = pipeline_run1.get_details()['submittedBy'].split('(')[0]
runType   = pipeline_run1.get_details()['properties']['runType']
pipeline_run1.display_name = submitter + '/' + str(cnt4nm) + '/' + runType
pipeline_run1.display_name

'Ji Ho Park/19/SDK'

In [11]:
RunDetails(pipeline_run1).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [12]:
from azureml.core import Run

run = Run(experiment, 'AutoML_cf97d856-dffc-4700-8268-5f34bbc4c290')
run.get_status
# run = Run.get_context()
# runid = Run.id('AutoML_cf97d856-dffc-4700-8268-5f34bbc4c290')
# run.display_name = <new display name>


<bound method Run.get_status of Run(Experiment: train-on-nasaexperiment,
Id: AutoML_cf97d856-dffc-4700-8268-5f34bbc4c290,
Type: automl,
Status: Completed)>

In [13]:
from azureml.train.automl.run import AutoMLRun
run = AutoMLRun(experiment, 'AutoML_cf97d856-dffc-4700-8268-5f34bbc4c290')

In [14]:
best_run, fitted_model = run.get_output()
print(fitted_model)

In [None]:
run.get_best_child()

In [None]:
bestrun = best_run.get_properties()

In [None]:
bestrun['run_algorithm']

In [None]:
bestrun