# Azure Machine Learning - Pipeline

In [None]:
import os
import azureml

from azureml.train.estimator import Estimator
from azureml.train.dnn import PyTorch
from azureml.core import Workspace, Datastore, Experiment, Model, Run, Environment, ScriptRunConfig
from azureml.core.compute import ComputeTarget
from azureml.widgets import RunDetails

from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data.data_reference import DataReference

from azureml.core.runconfig import RunConfiguration

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

# Connect to environment

In [None]:
# Connect to workspace
ws = Workspace.from_config()
print("Workspace:",ws.name,"in region", ws.location)

# Connect to compute cluster
cluster = ComputeTarget(workspace=ws, name="OptimusPrime")
print('Compute cluster:', cluster.name)

# Connect to the default datastore
ds = ws.get_default_datastore()
print("Datastore:",ds.name)

# Connect to the experiment
experiment = Experiment(workspace=ws, name='Simpsons-PyTorch-Pipeline')
print("Experiment:",experiment.name)

# Creating the Pipeline

In [None]:
# Location for the step scripts
script_folder = "./scripts"

# Name of the model
model_name = "Simpsons-PT-Notebook"

# Experiment name
experiment_name = "Simpsons-PT-Pipeline-Notebook"

In [None]:
# Parameters make it easy for us to re-run this training pipeline, including for retraining.
source_dataset = DataPath(
    datastore=ds, 
    path_on_datastore="simpsonslego-v3")

source_dataset_param = (PipelineParameter(name="source_dataset",default_value=source_dataset),
                          DataPathComputeBinding())

## Step 1 - Data preparation

In [None]:
# Output location for the pre-proccessed trainings images
training_data_location = PipelineData(name="simpsons_training_data", datastore=ds)

# Create the pre-process step
preProcessDataStep = PythonScriptStep(name="Pre-process data",
                            script_name="steps/prep.py",
                            compute_target=cluster,
                            inputs=[source_dataset_param],
                            arguments=['--source_path', source_dataset_param,
                                       '--destination_path', training_data_location
                                      ],
                            outputs=[training_data_location],
                            source_directory=script_folder)

## Step 2 - Train the model

In [None]:
# Output location for the produced model
model_location = PipelineData(name="model", datastore=ds, output_path_on_compute="model")

# Define the training Environment
aml_run_config = RunConfiguration()
aml_run_config.target = cluster
aml_run_config.environment = Environment.get(workspace=ws, name="AzureML-PyTorch-1.6-GPU")

script_params = [
    '--data-folder', training_data_location,
    "--output-folder", model_location
]


trainOnGpuStep = PythonScriptStep(
    name = 'Train Model',
    script_name = 'steps/train.py',
    source_directory = script_folder,
    arguments = script_params,
    compute_target = cluster,
    runconfig = aml_run_config,
    inputs=[training_data_location],
    outputs=[model_location]
)


## Step 3 - Register the model

In [None]:
registerModelStep = PythonScriptStep(name="Register model in Model Management",
                            script_name="steps/register.py",
                            compute_target=cluster,
                            inputs=[model_location],
                            arguments=['--model_name', model_name,
                                       '--model_assets_path', model_location
                                      ],
                            source_directory=script_folder)

## Create the pipeline

In [None]:
simpsons_pipeline = Pipeline(workspace=ws, steps=[preProcessDataStep,trainOnGpuStep,registerModelStep])
simpsons_pipeline.validate()

In [None]:
mlpipeline = simpsons_pipeline.publish(name="Simpsons-PyTorch-Pipeline - Training pipeline (From Notebook)",)
print("Pipeline Published ID:"+mlpipeline.id)

In [None]:
pipeline_run = mlpipeline.submit(ws,experiment_name)
RunDetails(pipeline_run).show()

In [None]:
oldrun = [r for r in experiment.get_runs() if r.id == '2d50e69d-7615-407f-a27a-fd71371e3085'][0]
RunDetails(oldrun).show()