# Creating an Azure Machine Learning Pipeline

To create an AML Pipeline, we first create the step object and any intermediate data connections. Next, we create and submit the pipeline.

In [None]:
USER_NAME = 'ENTER_YOUR_NAME_HERE'

ENVIRONMENT_NAME = f'sklearn-{USER_NAME}'
EXPERIMENT_NAME = f'diabetes-{USER_NAME}'
DATASET_NAME = f'diabetes-{USER_NAME}'
COMPUTE_TARGET_NAME = 'cpu-cluster'
MODEL_NAME = f'diabetes-{USER_NAME}'
PIPELINE_ENDPOINT_NAME = f'diabetes-retraining-{USER_NAME}'


DATA_PATH = "./data"
BLOB_PATH = f'/data/{USER_NAME}'

In [None]:
from azureml.core import Workspace, Experiment, RunConfiguration
from azureml.pipeline.core import (Pipeline, PipelineData, PipelineParameter,
                                   PublishedPipeline)
from azureml.pipeline.steps import PythonScriptStep

ws = Workspace.from_config()
exp = Experiment(ws, EXPERIMENT_NAME)

diabetes_dataset = ws.datasets[DATASET_NAME]
sklearn_env = ws.environments[ENVIRONMENT_NAME]
cpu_cluster = ws.compute_targets[COMPUTE_TARGET_NAME]

In [None]:
RC = RunConfiguration()
RC.environment = sklearn_env

## Create PipelineStep 1: Training Step

In [None]:
# This step will leverage a dataset as an input and pipeline data as an output

model_metadata = PipelineData('model_metadata', ws.get_default_datastore())

train_step = PythonScriptStep(
    name="Train Model",
    script_name="ridge-train.py",
    source_directory='./train/',
    compute_target=cpu_cluster,
    outputs=[model_metadata],
    allow_reuse=True,
    arguments=["--data-path", diabetes_dataset.as_named_input('TrainingData').as_mount(), '--model-metadata-folder', model_metadata],
    runconfig=RC
)

## Create PipelineStep 2: Evaluation Step

In [None]:
recommend_register = PipelineData('recommend_register', ws.get_default_datastore())

evaluate_step = PythonScriptStep(
    name="Evaluate Model",
    script_name="evaluate.py",
    source_directory='./evaluate/',
    compute_target=cpu_cluster,
    inputs=[model_metadata],
    outputs=[recommend_register],
    allow_reuse=True,
    arguments=[
        "--validation-data-path", diabetes_dataset.as_named_input('ValidationData').as_mount(), 
        '--model-metadata-folder', model_metadata,
        "--existing-model-name", MODEL_NAME,
        "--registration-decision-folder", recommend_register
        
    ],
    runconfig=RC
)


## Create PipelineStep 3: Register Model Step

In [None]:
register_model = PythonScriptStep(
    name="Register Model",
    script_name="register_model.py",
    source_directory='./register/',
    compute_target=cpu_cluster,
    inputs=[model_metadata, recommend_register],
    allow_reuse=True,
    arguments=[
        "--force", False,
        "--skip", False,
        '--model-metadata', model_metadata,
        "--model-name", MODEL_NAME,
        "--register-model-folder", recommend_register
        
    ],
    runconfig=RC
)


## Create Pipeline and Submit Pipeline

In [None]:
pipeline = Pipeline(ws, steps=[register_model])

In [None]:
pipeline_run = exp.submit(pipeline, tags={"Context": "Pipeline"})

In [None]:
from azureml.widgets import RunDetails
RunDetails(pipeline_run).show()

## Create [PipelineEndpoint](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipeline_endpoint.pipelineendpoint?view=azure-ml-py)


In [None]:
from azureml.pipeline.core import PipelineEndpoint

# The pipeline argument can be either a Pipeline or a PublishedPipeline
pipeline_endpoint = PipelineEndpoint.publish(workspace=ws,
                                            name=PIPELINE_ENDPOINT_NAME,
                                            pipeline=pipeline,
                                            description="Diabetes retraining pipeline endpoint")