In [2]:
import os
import azureml

from azureml.train.estimator import Estimator
from azureml.train.dnn import PyTorch
from azureml.core import Workspace, Datastore, Experiment, Model, Run
from azureml.core.compute import ComputeTarget
from azureml.widgets import RunDetails

from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data.data_reference import DataReference

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.85


# Connect to environment

In [3]:
# Connect to workspace
ws = Workspace.from_config()
print("Workspace:",ws.name,"in region", ws.location)

# Connect to compute cluster
cluster = ComputeTarget(workspace=ws, name="OptimusPrime")
print('Compute cluster:', cluster.name)

# Connect to the default datastore
ds = ws.get_default_datastore()
print("Datastore:",ds.name)

# Connect to the experiment
experiment = Experiment(workspace=ws, name='seer-from-notebook')
print("Experiment:",experiment.name)

Workspace: Cybertron in region westeurope
Compute cluster: OptimusPrime
Datastore: workspaceblobstore
Experiment: seer-from-notebook


# Creating the Pipeline

In [4]:
# Parameters make it easy for us to re-run this training pipeline, including for retraining.
source_dataset = DataPath(
    datastore=ds, 
    path_on_datastore="seer")

source_dataset_param = (PipelineParameter(name="source_dataset",default_value=source_dataset),
                          DataPathComputeBinding())

# Location for the step scripts
script_folder = "./scripts"

# Name of the model
model_name = "seer"

# Experiment name
experiment_name = "seer-from-notebook"

## Step 1 - Data preparation

In [1]:
# Output location for the pre-proccessed trainings images
training_data_location = PipelineData(name="seertrainingdata", datastore=ds)

# Create the pre-process step
preProcessDataStep = PythonScriptStep(name="Pre-process data",
                            script_name="steps/prep.py",
                            compute_target=cluster,
                            inputs=[source_dataset_param],
                            arguments=['--source_path', source_dataset_param,
                                       '--destination_path', training_data_location
                                      ],
                            outputs=[training_data_location],
                            source_directory=script_folder)

NameError: name 'PipelineData' is not defined

## Step 2 - Train the model

In [6]:
# Output location for the produced model
model = PipelineData(name="model", datastore=ds, output_path_on_compute="model")

# Estimator script params
estimator_script_params = [
    "--data-folder", training_data_location,
    "--output-folder", model
]

# Create the tensorflow Estimator
trainEstimator = PyTorch(
                     source_directory = script_folder,
                     compute_target = cluster,
                     entry_script = "steps/train.py", 
                     use_gpu = True,
                     framework_version='1.3'
                )

# Create a pipeline step with the TensorFlow Estimator
trainOnGpuStep = EstimatorStep(
    name='Train Estimator Step',
    estimator=trainEstimator,
    inputs=[training_data_location],
    outputs=[model],
    compute_target=cluster,
    estimator_entry_script_arguments = estimator_script_params
) 


## Step 3 - Register the model

In [7]:
registerModelStep = PythonScriptStep(name="Register model in Model Management",
                            script_name="steps/register.py",
                            compute_target=cluster,
                            inputs=[model],
                            arguments=['--model_name', model_name,
                                       '--model_assets_path', model
                                      ],
                            source_directory=script_folder)

## Create the pipeline

In [8]:
seer_pipeline = Pipeline(workspace=ws, steps=[preProcessDataStep,trainOnGpuStep,registerModelStep])
seer_pipeline.validate()

Step Pre-process data is ready to be created [61ad8940]
Step Train Estimator Step is ready to be created [c5be9bf0]
Step Register model in Model Management is ready to be created [dea76a15]
Data reference workspaceblobstore_5f4825fd is ready to be created [6e184c9c], (Consumers of this data will generate new runs.)


[]

In [9]:
mlpipeline = seer_pipeline.publish(name="Seer - Training pipeline",)

print("Pipeline Published ID:"+mlpipeline.id)

Created step Pre-process data [61ad8940][b016b5ea-90c7-4060-9128-a9293b431115], (This step will run and generate new outputs)
Created step Train Estimator Step [c5be9bf0][4e2bb733-1e37-4b6c-85e7-f77b41bfd960], (This step will run and generate new outputs)
Created step Register model in Model Management [dea76a15][c5be40b5-be98-4860-98f1-66a44e684394], (This step will run and generate new outputs)
Created data reference workspaceblobstore_5f4825fd for StepId [6e184c9c][17064024-c726-4f02-a201-fe23dd7a1fa4], (Consumers of this data will generate new runs.)
Pipeline Published ID:7e0548e8-2c14-46c5-bbe3-ba81062d65b3


In [10]:
pipeline_run = mlpipeline.submit(ws,experiment_name)

Submitted PipelineRun 6b60360a-89f9-41d7-98db-fd4becf143af
Link to Azure Machine Learning studio: https://ml.azure.com/experiments/seer-from-notebook/runs/6b60360a-89f9-41d7-98db-fd4becf143af?wsid=/subscriptions/431dbae5-40ca-438a-8daf-77d7d5580b41/resourcegroups/Cybertron-RG/workspaces/Cybertron


In [11]:
RunDetails(pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [None]:
pipeline_run.cancel()

In [None]:
oldrun = [r for r in experiment.get_runs() if r.id == '4742f337-8e80-4a0e-aa5e-b862d8b257bd'][0]

In [None]:
print(oldrun)