### Load your Libraries

In [129]:
# Load Azure Libaries
from azureml.core import Workspace, Datastore, Dataset, Experiment, Environment
from azureml.core.authentication import InteractiveLoginAuthentication
import logging
import pandas as pd
import numpy as np
import json
import os

#Load Libraries for Deployment
from azureml.core.model import Model
from azureml.pipeline.steps import PythonScriptStep
from azureml.contrib.pipeline.steps import ParallelRunConfig, ParallelRunStep
from azureml.data.data_reference import DataReference
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.pipeline.core import Pipeline, PipelineData, PublishedPipeline, PipelineEndpoint,PipelineParameter,PipelineRun
from azureml.core.runconfig import RunConfiguration, CondaDependencies, DEFAULT_CPU_IMAGE, DEFAULT_GPU_IMAGE
from azureml.widgets import RunDetails

### Check your version of the AzureML Python SDK

In [123]:
print("You are currently using version " + azureml.core.VERSION + " of the Azure ML SDK")

You are currently using version 1.9.0 of the Azure ML SDK


### Set your Workspace
When performing this step for the first time, you will be asked to sign in using a hyperlink and a code generated below.<br>You will be asked to log in every so often depending on your organization.<br>  This connects your notebook to your Azure Machine Learning Service Workspace.

In [124]:
# Retrieve your workspace from config
ws = Workspace.from_config()

### Set your Datastore
A Datastore is a pointer to an Azure Storage Account or Azure SQL Database that links to your AMLS Workspace.<p>

In [41]:
# Retrieve your Datastore 
datastore_name = "teaching_datalake"  # CHANGE THIS
datastore = Datastore.get(ws, datastore_name)

### Set your Remote Compute Target

In [42]:
# Retrieve your Compute Target for Running AutoML Remotely
from azureml.core.compute import ComputeTarget
compute_name = 'lp-training' # CHANGE HERE
compute_target = ComputeTarget(ws, compute_name)

### Retrieve your Model

In [43]:
model_name = 'AutoML_Titanic_Classification' 
model = Model(ws, model_name)

### Retrieve your Environment

In [44]:
environment_name = 'automl-environment'  # CHANGE THIS
environment = Environment.get(ws, environment_name)

### Set your Dataset

In [45]:
# if we have the dataset existing already, reference it below
dataset_name = "titanic_training_transformed_automl"
dataset = Dataset.get_by_name(ws, dataset_name, version='latest')

In [86]:
# Set your dataset as named input to pass through the pipeline
named_input = dataset.as_named_input(dataset_name)

### Create a Dataset Registration Script, a Scoring Script, and a Data Output Script for your Pipeline

In [87]:
# Create a folder on your local directory to hold all of your items
os.makedirs('AutoML_Retraining', exist_ok=True)  

### Create your Dataset Registration Step

In [91]:
# Create a Run Configuration with a Docker Container and your environment settings for your Data Transfer Step
run_config = RunConfiguration()
run_config.environment = environment
run_config.environment.docker.enabled = True
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE # Use DEFAULT_GPU_IMAGE for Deep Learning Jobs

In [92]:
cd = CondaDependencies.create(pip_indexurl="https://azuremlsdktestpypi.azureedge.net/datasets_uploading",                               
                              pip_packages=['azureml-core<0.1.10', 'azureml-dataprep'],
                              conda_packages=['numpy==1.16.2','pandas==0.23.4']) 
rc = RunConfiguration(conda_dependencies=cd)
rc.environment.docker.enabled = True
rc.environment.docker.base_image = DEFAULT_CPU_IMAGE

In [103]:
%%writefile AutoML_Retraining/AutoML_Dataset_Registration.py
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
# Load in Libaries
import pandas as pd
import numpy as np
import json
import os
import math
import argparse
from azureml.core import Run
from azureml.core import Workspace, Datastore, Dataset, Experiment, Environment

parser = argparse.ArgumentParser()
parser.add_argument("--datastore_name", dest="datastore_name", required=True)
parser.add_argument("--dataset_name", dest="dataset_name", required=True)
args = parser.parse_args()

print("Argument 1(datastore_name): %s" % args.datastore_name)
print("Argument 2(dataset_name): %s" % args.dataset_name)

# Set Workspace from Run Context
ws = Run.get_context().experiment.workspace


# Retrieve your Datastore
datastore_name = args.datastore_name
datastore = Datastore.get(ws, datastore_name)
print('Datastore Set')

dataset = Dataset.get_by_name(ws, name=args.dataset_name)
DF = dataset.to_pandas_dataframe()

#Dataset.Tabular.register_pandas_dataframe(DF, datastore, args.dataset_name, show_progress=True)

print('Dataset Registered')

Overwriting AutoML_Retraining/AutoML_Dataset_Registration.py


In [104]:
register_dataset_step = PythonScriptStep(script_name="AutoML_Dataset_Registration.py",
                                       source_directory = 'AutoML_Retraining',
                                       name="Register_AutoML_Dataset",
                                       allow_reuse=False,
                                       arguments=["--datastore_name", datastore_name, 
                                                  "--dataset_name", dataset_name],
                                       inputs=[],
                                       compute_target=compute_target,
                                       runconfig=rc)

### Create your AutoML Model Training Step

In [105]:
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.steps import AutoMLStep

target_column_name = 'Survived'


automl_settings = {
    "iteration_timeout_minutes": 15,
    "experiment_timeout_hours": 0.5,
    "n_cross_validations": 3,
    "primary_metric": 'accuracy',
    "max_concurrent_iterations": 3,
    "max_cores_per_iteration": -1,
    "verbosity": logging.INFO,
    "enable_early_stopping": True
}

automl_config = AutoMLConfig(task = 'classification',
                             debug_log = 'automl_errors.log',
                             path = ".",
                             compute_target=compute_target,
                             training_data = dataset,
                             label_column_name = target_column_name,
                             **automl_settings
                            )

In [106]:
from azureml.pipeline.core import PipelineData, TrainingOutput

metrics_output_name = 'metrics_output'
best_model_output_name = 'best_model_output'

metrics_data = PipelineData(name='metrics_data',
                           datastore=datastore,
                           pipeline_output_name=metrics_output_name,
                           training_output=TrainingOutput(type='Metrics'))
model_data = PipelineData(name='model_data',
                           datastore=datastore,
                           pipeline_output_name=best_model_output_name,
                           training_output=TrainingOutput(type='Model'))

In [107]:
automl_step = AutoMLStep(
    name='automl_module',
    automl_config=automl_config,
    outputs=[metrics_data, model_data],
    allow_reuse=False)

### Create your Model Registration Step

In [108]:
%%writefile AutoML_Retraining/AutoML_Model_Registration.py
from azureml.core.model import Model, Dataset
from azureml.core.run import Run, _OfflineRun
from azureml.core import Workspace
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--model_name", dest="model_name", required=True)
parser.add_argument("--model_path", dest="model_path", required=True)
parser.add_argument("--dataset_name", dest="dataset_name", required=True)
args = parser.parse_args()

print("Argument 1(model_name): %s" % args.model_name)
print("Argument 2(model_path): %s" % args.model_path)
print("Argument 3(dataset_name): %s" % args.dataset_name)

run = Run.get_context()
ws = None
if type(run) == _OfflineRun:
    ws = Workspace.from_config()
else:
    ws = run.experiment.workspace

train_ds = Dataset.get_by_name(ws, args.dataset_name)
datasets = [(Dataset.Scenario.TRAINING, train_ds)]

model = Model.register(workspace=ws,
                       model_path=args.model_path,
                       model_name=args.model_name,
                       datasets=datasets)

print("Registered version {0} of model {1}".format(model.version, model.name))

Overwriting AutoML_Retraining/AutoML_Model_Registration.py


In [109]:
register_model_step = PythonScriptStep(script_name="AutoML_Model_Registration.py",
                                       source_directory = 'AutoML_Retraining',
                                       name="Register_AutoML_Model",
                                       allow_reuse=False,
                                       arguments=["--model_name", model_name, "--model_path", model_data,
                                                  "--dataset_name", dataset_name],
                                       inputs=[model_data],
                                       compute_target=compute_target,
                                       runconfig=run_config)

### Run your Dataset Registration Pipeline

In [110]:
# Create your pipeline
from azureml.pipeline.core import Pipeline, StepSequence
step_sequence = StepSequence(steps=[register_dataset_step, automl_step, register_model_step])
pipeline = Pipeline(workspace=ws, steps=step_sequence)



In [111]:
# Run your pipeline
experiment_name = 'AutoML-Titanic-Pipeline'
pipeline_run = Experiment(ws, experiment_name).submit(pipeline,pipeline_parameters={}, show_output=True)

Created step Register_AutoML_Dataset [1a08c9e4][79650234-ef5b-4ad6-a69d-e4069fc12ae1], (This step will run and generate new outputs)Created step automl_module [bf1b9ce5][37219641-1ede-4917-bbd9-9a4a704dd0da], (This step will run and generate new outputs)
Created step Register_AutoML_Model [ec52edc6][dbdf7f63-b674-4f52-b671-252291397df2], (This step will run and generate new outputs)

Submitted PipelineRun 8efe12aa-660a-444b-8df3-39b1572555c3
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/AutoML-Titanic-Pipeline/runs/8efe12aa-660a-444b-8df3-39b1572555c3?wsid=/subscriptions/47a7ec0c-37ad-428b-9114-b87ea1057632/resourcegroups/ml-teaching/workspaces/ml-teaching-workspace


In [112]:
# GUI to see your Pipeline Run
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: 8efe12aa-660a-444b-8df3-39b1572555c3
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/AutoML-Titanic-Pipeline/runs/8efe12aa-660a-444b-8df3-39b1572555c3?wsid=/subscriptions/47a7ec0c-37ad-428b-9114-b87ea1057632/resourcegroups/ml-teaching/workspaces/ml-teaching-workspace
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 06c74e49-1577-4859-967d-277b4185723f
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/AutoML-Titanic-Pipeline/runs/06c74e49-1577-4859-967d-277b4185723f?wsid=/subscriptions/47a7ec0c-37ad-428b-9114-b87ea1057632/resourcegroups/ml-teaching/workspaces/ml-teaching-workspace
StepRun( Register_AutoML_Dataset ) Status: NotStarted
StepRun( Register_AutoML_Dataset ) Status: Running

Streaming azureml-logs/55_azureml-execution-tvmps_e3a60113670fd1f1fc642ad0361759fd77047cf292b32824b19d4c4d63eddd94_p.txt
2020-07-08T20:26:37Z Starting output-watcher...
2020-07-08T20:26:37Z IsDedicatedCompute == False, star




StepRun(automl_module) Execution Summary
StepRun( automl_module ) Status: Finished




StepRunId: 2e5f3253-b096-43f4-b9af-aff17d08af37
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/AutoML-Titanic-Pipeline/runs/2e5f3253-b096-43f4-b9af-aff17d08af37?wsid=/subscriptions/47a7ec0c-37ad-428b-9114-b87ea1057632/resourcegroups/ml-teaching/workspaces/ml-teaching-workspace
StepRun( Register_AutoML_Model ) Status: NotStarted

Streaming azureml-logs/20_image_build_log.txt
2020/07/08 21:23:31 Downloading source code...
2020/07/08 21:23:32 Finished downloading source code
StepRun( Register_AutoML_Model ) Status: Running
2020/07/08 21:23:33 Creating Docker network: acb_default_network, driver: 'bridge'
2020/07/08 21:23:33 Successfully set up Docker network: acb_default_network
2020/07/08 21:23:33 Setting up Docker configuration...
2020/07/08 21:23:34 Successfully set up Docker configuration
2020/07/08 21:23:34 Logging in to registry: mlteachingwoe1b28e33.azurecr.io
2020/07/08



Removing intermediate container 4498564a0514
 ---> e9383440d054
Step 9/15 : ENV PATH /azureml-envs/azureml_778c0158c725f306919e19e0e36767df/bin:$PATH
 ---> Running in 55937092a3ad
Removing intermediate container 55937092a3ad
 ---> 82beb6a4c9bf
Step 10/15 : ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/azureml_778c0158c725f306919e19e0e36767df
 ---> Running in 2f251d01714a
Removing intermediate container 2f251d01714a
 ---> cdf7801f9526
Step 11/15 : ENV LD_LIBRARY_PATH /azureml-envs/azureml_778c0158c725f306919e19e0e36767df/lib:$LD_LIBRARY_PATH
 ---> Running in 2d80c7f32bc5
Removing intermediate container 2d80c7f32bc5
 ---> 6da251112a7a
Step 12/15 : COPY azureml-environment-setup/spark_cache.py azureml-environment-setup/log4j.properties /azureml-environment-setup/
 ---> 398ebbd009ee
Step 13/15 : RUN if [ $SPARK_HOME ]; then /bin/bash -c '$SPARK_HOME/bin/spark-submit  /azureml-environment-setup/spark_cache.py'; fi
 ---> Running in 6da84cf6a5bc
Removing intermediate container 6da84cf6a5bc

'Finished'

### Publish your Pipeline

In [131]:
# Match the name to your pipeline experiment

experiment_name = 'AutoML-Titanic-Pipeline'
experiment = Experiment(ws,experiment_name)

pipeline_run = PipelineRun(experiment, '8efe12aa-660a-444b-8df3-39b1572555c3')

published_pipeline = pipeline_run.publish_pipeline(
    name="Titanic-AutoML-Training-Pipeline",\
    description="AutoML Classification Pipeline for ADF Use", version="1.0")

published_pipeline

Name,Id,Status,Endpoint
Titanic-AutoML-Training-Pipeline,6e1c02ac-164e-4bff-9d0e-eafbe85f11f5,Active,REST Endpoint


In [130]:
retrieved_experiment = Experiment(ws,'AutoML-Titanic-Pipeline')
pipeline_run_id = '8efe12aa-660a-444b-8df3-39b1572555c3'
retrieved_pipeline = PipelineRun(retrieved_experiment, pipeline_run_id)