In [1]:
from azureml.core import Workspace, Experiment, Datastore
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data.data_reference import DataReference
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.widgets import RunDetails
from azureml.train.estimator import Estimator

print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.53


# Register/Reference a Datastore

In [2]:
#datastore = Datastore.register_azure_blob_container(workspace=ws, 
#                                             datastore_name='seerdata', 
#                                             container_name='your azure blob container name',
#                                             account_name='your storage account name', 
#                                             account_key='your storage account key',
#                                             create_if_not_exists=True)

In [3]:
# workspace
ws = Workspace.from_config()
ws.datastores

{'workspacefilestore': <azureml.data.azure_storage_datastore.AzureFileDatastore at 0x1a159de73c8>,
 'workspaceblobstore': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x1a159dee278>,
 'halworkspacestorage__datasets': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x1a159de7e10>,
 'seerdata': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x1a159de7d30>}

In [4]:
# data
datastore = ws.datastores['seerdata']

# compute target
compute = ws.compute_targets['gandalf']

# Define Pipeline!
The following will be created and then run:
1. Pipeline Parameters
2. Data Fetch Step
3. Data Process Step
4. Training Step
5. Model Registration Step


## Pipeline Parameters
We need to tell the Pipeline what it needs to learn to see!

In [23]:
datapath = DataPath(datastore=datastore, path_on_datastore='burrito_tacos')
data_path_pipeline_param = (PipelineParameter(name="data", 
                                             default_value=datapath), 
                                             DataPathComputeBinding(mode='mount'))

## Data Process Step

In [24]:
seer_tfrecords = PipelineData(
    "tfrecords_set",
    datastore=datastore,
    is_directory=True
)

prep = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='parse.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

prepStep = EstimatorStep(
    name='Data Preparation',
    estimator=prep,
    estimator_entry_script_arguments=["--source_path", data_path_pipeline_param, 
                                      "--target_path", seer_tfrecords],
    inputs=[data_path_pipeline_param],
    outputs=[seer_tfrecords],
    compute_target=compute
)

## Training Step

In [25]:
seer_training = PipelineData(
    "train",
    datastore=datastore,
    is_directory=True
)

train = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='train.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

trainStep = EstimatorStep(
    name='Model Training',
    estimator=train,
    estimator_entry_script_arguments=["--source_path", seer_tfrecords, 
                                      "--target_path", seer_training,
                                      "--epochs", 5,
                                      "--batch", 10,
                                      "--lr", 0.001],
    inputs=[seer_tfrecords],
    outputs=[seer_training],
    compute_target=compute
)

# Register Model Step

In [29]:
seer_model = PipelineData(
    "model",
    datastore=datastore,
    is_directory=True
)

register = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='register.py',
                      use_gpu=True)

registerStep = EstimatorStep(
    name='Model Registration',
    estimator=register,
    estimator_entry_script_arguments=["--source_path", seer_training, 
                                      "--target_path", seer_model],
    inputs=[seer_training],
    outputs=[seer_model],
    compute_target=compute
)

## Test Run

In [30]:
pipeline1 = Pipeline(workspace=ws, steps=[prepStep, trainStep, registerStep])

In [31]:
# Submit the pipeline to be run
pipeline_run1 = Experiment(ws, 'seer').submit(pipeline1)
RunDetails(pipeline_run1).show()

Created step Data Preparation [02604895][4b0bb612-e14f-48b9-9b19-128eccef855a], (This step will run and generate new outputs)
Created step Model Training [45a9879e][684eba98-2534-4ae5-aaef-1970b280c524], (This step will run and generate new outputs)
Created step Model Registration [8dc3f5d1][aa6b60cd-128d-4d27-9c5e-f3775ace5450], (This step will run and generate new outputs)
Using data reference seerdata_9193bbcf for StepId [21012319][09ab9570-092e-42a4-beba-13183967e861], (Consumers of this data are eligible to reuse prior runs.)
Submitted pipeline run: 05c520ed-bf59-4618-9d58-c9f2e9b71031


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [20]:
#pipeline_run1.cancel()

In [33]:
published_pipeline1 = pipeline1.publish(
    name="Seer Pipeline", 
    description="Transfer learned image classifier. Uses folders as labels.")