In [166]:
import os
import azureml
from shutil import copyfile, rmtree
from pathlib import Path
import urllib.request

from azureml.train.estimator import Estimator
from azureml.core import Workspace, Datastore, Experiment, Environment, Model, Run
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.conda_dependencies import CondaDependencies 
from azureml.core.resource_configuration import ResourceConfiguration
from azureml.core.runconfig import CondaDependencies, RunConfiguration
from azureml.data.data_reference import DataReference

from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.steps import DataTransferStep, PythonScriptStep, EstimatorStep
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.pipeline.core import Pipeline, PipelineData

from azureml.train.dnn import PyTorch

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.81


# Connect to environment

In [167]:
# Connect to workspace
ws = Workspace.from_config()
print("Workspace:",ws.name,"in region", ws.location)

# Connect to compute cluster
cluster = ComputeTarget(workspace=ws, name="OptimusPrime")
print('Compute cluster:', cluster.name)

# Connect to the default datastore
ds = ws.get_default_datastore()
print("Datastore:",ds.name)

# Connect to the experiment
experiment = Experiment(workspace=ws, name='seer-from-notebook')
print("Experiment:",experiment.name)

Workspace: Cybertron in region westeurope
Compute cluster: OptimusPrime
Datastore: workspaceblobstore
Experiment: seer-from-notebook


# Creating the Pipeline

In [179]:
# Parameters make it easy for us to re-run this training pipeline, including for retraining.
source_dataset = DataPath(
    datastore=ds, 
    path_on_datastore="seer")

source_dataset_param = (PipelineParameter(name="source_dataset",default_value=source_dataset),
                          DataPathComputeBinding())

# Location for the step scripts
script_folder = "./scripts"

# Name of the model
model_name = "seer"

# Experiment name
experiment_name = "seer-from-notebook"

## Step 1 - Data preparation

In [180]:
# Step environment
cd = CondaDependencies.create(pip_packages=["azureml-sdk"])
amlcompute_run_config = RunConfiguration(conda_dependencies=cd)

# Output location for the pre-proccessed trainings images
training_data_location = PipelineData(name="seertrainingdata", datastore=ds)

# Create the pre-process step
preProcessDataStep = PythonScriptStep(name="Pre-process data",
                            script_name="steps/prep.py",
                            compute_target=cluster,
                            runconfig=amlcompute_run_config,
                            inputs=[source_dataset_param],
                            arguments=['--source_path', source_dataset_param,
                                       '--destination_path', training_data_location
                                      ],
                            outputs=[training_data_location],
                            source_directory=script_folder)

## Step 2 - Train the model

In [181]:
# Output location for the produced model
model = PipelineData(name="model", datastore=ds, output_path_on_compute="model")

# Estimator script params
estimator_script_params = [
    "--data-folder", training_data_location,
    "--output-folder", model
]

# Create the tensorflow Estimator
trainEstimator = PyTorch(
                     source_directory = script_folder,
                     compute_target = cluster,
                     entry_script = "steps/train.py", 
                     use_gpu = True,
                     framework_version='1.3'
                )

# Create a pipeline step with the TensorFlow Estimator
trainOnGpuStep = EstimatorStep(
    name='Train Estimator Step',
    estimator=trainEstimator,
    inputs=[training_data_location],
    outputs=[model],
    compute_target=cluster,
    estimator_entry_script_arguments = estimator_script_params
) 


## Step 3 - Register the model

In [187]:
registerStep = PythonScriptStep(name="Register model for deployment",
                            script_name="steps/register.py",
                            compute_target=cluster,
                            inputs=[model],
                            arguments=['--dataset_name', model_name,
                                       '--model_assets_path', model
                                      ],
                            source_directory=script_folder)

## Create the pipeline

In [188]:
seer_pipeline = Pipeline(workspace=ws, steps=[preProcessDataStep,trainOnGpuStep,registerStep])
seer_pipeline.validate()

Step Pre-process data is ready to be created [d63d07d7]
Step Train Estimator Step is ready to be created [c2f52cad]
Step Register model for deployment is ready to be created [1cd3d359]


[]

In [189]:
mlpipeline = seer_pipeline.publish(name="Marge Or Homer - Training pipeline",)

print("Pipeline Published ID:"+mlpipeline.id)

Created step Pre-process data [d63d07d7][2dffdd24-324f-4326-88db-a7bc27f3636d], (This step will run and generate new outputs)
Created step Train Estimator Step [c2f52cad][d3aee0f2-47f4-4820-9d19-0d978b8648b6], (This step will run and generate new outputs)
Created step Register model for deployment [1cd3d359][00de07cc-dc58-427a-a0b0-111be96291a4], (This step will run and generate new outputs)
Using data reference workspaceblobstore_c4dac6e3 for StepId [99fecf20][5c16f627-8adc-4ca0-91f2-c2ff80b83a8a], (Consumers of this data are eligible to reuse prior runs.)
Pipeline Published ID:6cb06fa5-5086-49be-acc1-923cc78dcaed


In [190]:
pipeline_run = mlpipeline.submit(ws,experiment_name)

Submitted PipelineRun 4742f337-8e80-4a0e-aa5e-b862d8b257bd
Link to Azure Machine Learning studio: https://ml.azure.com/experiments/seer-from-notebook/runs/4742f337-8e80-4a0e-aa5e-b862d8b257bd?wsid=/subscriptions/431dbae5-40ca-438a-8daf-77d7d5580b41/resourcegroups/Cybertron-RG/workspaces/Cybertron


In [194]:
RunDetails(pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [101]:
oldrun = [r for r in experiment.get_runs() if r.id == '4742f337-8e80-4a0e-aa5e-b862d8b257bd'][0]

In [102]:
print(oldrun)

Run(Experiment: seer-from-notebook,
Id: 23eaef17-495f-4f3a-98f7-6d1b2ff89252,
Type: azureml.PipelineRun,
Status: Completed)
