# Azure Machine Learning - Pipeline

In [None]:
import os
import azureml

from azureml.train.estimator import Estimator
from azureml.train.dnn import PyTorch
from azureml.core import Workspace, Datastore, Experiment, Model, Run
from azureml.core.compute import ComputeTarget
from azureml.widgets import RunDetails

from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data.data_reference import DataReference

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

# Connect to environment

In [None]:
# Connect to workspace
ws = Workspace.from_config()
print("Workspace:",ws.name,"in region", ws.location)

# Connect to compute cluster
cluster = ComputeTarget(workspace=ws, name="OptimusPrime")
print('Compute cluster:', cluster.name)

# Connect to the default datastore
ds = ws.get_default_datastore()
print("Datastore:",ds.name)

# Connect to the experiment
experiment = Experiment(workspace=ws, name='Simpsons-PyTorch-Pipeline')
print("Experiment:",experiment.name)

# Creating the Pipeline

In [None]:
# Parameters make it easy for us to re-run this training pipeline, including for retraining.
source_dataset = DataPath(
    datastore=ds, 
    path_on_datastore="simpsonslego-v2")

source_dataset_param = (PipelineParameter(name="source_dataset",default_value=source_dataset),
                          DataPathComputeBinding())

# Location for the step scripts
script_folder = "./scripts"

# Name of the model
model_name = "Simpsons-PT-Notebook"

# Experiment name
experiment_name = "Simpsons-PT-Pipeline-Notebook"

## Step 1 - Data preparation

In [None]:
# Output location for the pre-proccessed trainings images
training_data_location = PipelineData(name="simpsons_training_data", datastore=ds)

# Create the pre-process step
preProcessDataStep = PythonScriptStep(name="Pre-process data",
                            script_name="steps/prep.py",
                            compute_target=cluster,
                            inputs=[source_dataset_param],
                            arguments=['--source_path', source_dataset_param,
                                       '--destination_path', training_data_location
                                      ],
                            outputs=[training_data_location],
                            source_directory=script_folder)

## Step 2 - Train the model

In [None]:
# Output location for the produced model
model_location = PipelineData(name="model", datastore=ds, output_path_on_compute="model")

# Estimator script params
estimator_script_params = [
    "--data-folder", training_data_location,
    "--output-folder", model_location
]

# Create the PyTorch Estimator
trainEstimator = PyTorch(
                     source_directory = script_folder,
                     compute_target = cluster,
                     entry_script = "steps/train.py", 
                     use_gpu = True,
                     framework_version='1.3'
                )

# Create a pipeline step with the TensorFlow Estimator
trainOnGpuStep = EstimatorStep(
    name='Train Estimator Step',
    estimator=trainEstimator,
    inputs=[training_data_location],
    outputs=[model_location],
    compute_target=cluster,
    estimator_entry_script_arguments = estimator_script_params
) 


## Step 3 - Register the model

In [None]:
registerModelStep = PythonScriptStep(name="Register model in Model Management",
                            script_name="steps/register.py",
                            compute_target=cluster,
                            inputs=[model_location],
                            arguments=['--model_name', model_name,
                                       '--model_assets_path', model_location
                                      ],
                            source_directory=script_folder)

## Create the pipeline

In [None]:
seer_pipeline = Pipeline(workspace=ws, steps=[preProcessDataStep,trainOnGpuStep,registerModelStep])
seer_pipeline.validate()

In [None]:
mlpipeline = seer_pipeline.publish(name="Simpsons-PyTorch-Pipeline - Training pipeline (From Notebook)",)
print("Pipeline Published ID:"+mlpipeline.id)

In [None]:
pipeline_run = mlpipeline.submit(ws,experiment_name)
RunDetails(pipeline_run).show()

In [None]:
oldrun = [r for r in experiment.get_runs() if r.id == 'f8936c45-8697-4017-b8dc-940bef32a215'][0]
RunDetails(oldrun).show()