In [4]:
import os
import azureml

from azureml.train.estimator import Estimator
from azureml.train.dnn import PyTorch
from azureml.core import Workspace, Datastore, Experiment, Model, Run
from azureml.core.compute import ComputeTarget
from azureml.widgets import RunDetails

from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data.data_reference import DataReference

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.2.0


# Connect to environment

In [6]:
# Connect to workspace
ws = Workspace.from_config()
print("Workspace:",ws.name,"in region", ws.location)

# Connect to compute cluster
cluster = ComputeTarget(workspace=ws, name="OptimusPrime")
print('Compute cluster:', cluster.name)

# Connect to the default datastore
ds = ws.get_default_datastore()
print("Datastore:",ds.name)

# Connect to the experiment
experiment = Experiment(workspace=ws, name='Simpsons-TF-PyTorch-Pipeline-Heijmans')
print("Experiment:",experiment.name)

Workspace: Cybertron in region westeurope
Compute cluster: OptimusPrime
Datastore: workspaceblobstore
Experiment: Simpsons-TF-PyTorch-Pipeline-Heijmans


# Creating the Pipeline

In [7]:
# Parameters make it easy for us to re-run this training pipeline, including for retraining.
source_dataset = DataPath(
    datastore=ds, 
    path_on_datastore="simpsonslego-v2")

source_dataset_param = (PipelineParameter(name="source_dataset",default_value=source_dataset),
                          DataPathComputeBinding())

# Location for the step scripts
script_folder = "./scripts"

# Name of the model
model_name = "Simpsons-PyTorch-Heijmans"

# Experiment name
experiment_name = "Simpsons-TF-PyTorch-Pipeline-Heijmans"

## Step 1 - Data preparation

In [9]:
# Output location for the pre-proccessed trainings images
training_data_location = PipelineData(name="simpsons_training_data", datastore=ds)

# Create the pre-process step
preProcessDataStep = PythonScriptStep(name="Pre-process data",
                            script_name="steps/prep.py",
                            compute_target=cluster,
                            inputs=[source_dataset_param],
                            arguments=['--source_path', source_dataset_param,
                                       '--destination_path', training_data_location
                                      ],
                            outputs=[training_data_location],
                            source_directory=script_folder)

## Step 2 - Train the model

In [10]:
# Output location for the produced model
model_location = PipelineData(name="model", datastore=ds, output_path_on_compute="model")

# Estimator script params
estimator_script_params = [
    "--data-folder", training_data_location,
    "--output-folder", model_location
]

# Create the PyTorch Estimator
trainEstimator = PyTorch(
                     source_directory = script_folder,
                     compute_target = cluster,
                     entry_script = "steps/train.py", 
                     use_gpu = True,
                     framework_version='1.3'
                )

# Create a pipeline step with the TensorFlow Estimator
trainOnGpuStep = EstimatorStep(
    name='Train Estimator Step',
    estimator=trainEstimator,
    inputs=[training_data_location],
    outputs=[model_location],
    compute_target=cluster,
    estimator_entry_script_arguments = estimator_script_params
) 


## Step 3 - Register the model

In [11]:
registerModelStep = PythonScriptStep(name="Register model in Model Management",
                            script_name="steps/register.py",
                            compute_target=cluster,
                            inputs=[model_location],
                            arguments=['--model_name', model_name,
                                       '--model_assets_path', model_location
                                      ],
                            source_directory=script_folder)

## Create the pipeline

In [12]:
seer_pipeline = Pipeline(workspace=ws, steps=[preProcessDataStep,trainOnGpuStep,registerModelStep])
seer_pipeline.validate()

Step Pre-process data is ready to be created [dacf2776]
Step Train Estimator Step is ready to be created [26c0ea72]
Step Register model in Model Management is ready to be created [327fe427]
Data reference workspaceblobstore_e15e1a2b is ready to be created [da13f8a6], (Consumers of this data will generate new runs.)


[]

In [13]:
mlpipeline = seer_pipeline.publish(name="Simpsons-TF-PyTorch-Pipeline-Heijmans - Training pipeline (From Notebook)",)
print("Pipeline Published ID:"+mlpipeline.id)

Created step Pre-process data [dacf2776][2f18ad52-8a59-4d2a-a68f-84502df78b5a], (This step will run and generate new outputs)
Created step Train Estimator Step [26c0ea72][499c5e52-80e0-4b1b-93a7-3ee6f63fd692], (This step will run and generate new outputs)
Created step Register model in Model Management [327fe427][757f6dd1-1ca6-4621-8ad6-0cb1a754396b], (This step will run and generate new outputs)
Created data reference workspaceblobstore_e15e1a2b for StepId [da13f8a6][c053560b-df77-4ab7-a91c-0bd83632d414], (Consumers of this data will generate new runs.)
Pipeline Published ID:8e86d294-4c31-4804-94a2-96730f5661de


In [14]:
pipeline_run = mlpipeline.submit(ws,experiment_name)
RunDetails(pipeline_run).show()

Submitted PipelineRun 4811c710-71cf-42c6-a119-4000191f9e5d
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/Simpsons-TF-PyTorch-Pipeline-Heijmans/runs/4811c710-71cf-42c6-a119-4000191f9e5d?wsid=/subscriptions/431dbae5-40ca-438a-8daf-77d7d5580b41/resourcegroups/Cybertron-RG/workspaces/Cybertron


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [15]:
oldrun = [r for r in experiment.get_runs() if r.id == '4811c710-71cf-42c6-a119-4000191f9e5d'][0]
RunDetails(oldrun).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …