In [1]:
from azureml.core import Workspace, Experiment, Datastore
from azureml.data.data_reference import DataReference
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.widgets import RunDetails
from azureml.train.estimator import Estimator

print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.53


# Register/Reference a Datastore

In [2]:
#datastore = Datastore.register_azure_blob_container(workspace=ws, 
#                                             datastore_name='seerdata', 
#                                             container_name='your azure blob container name',
#                                             account_name='your storage account name', 
#                                             account_key='your storage account key',
#                                             create_if_not_exists=True)

In [3]:
# workspace
ws = Workspace.from_config()
ws.datastores

{'workspacefilestore': <azureml.data.azure_storage_datastore.AzureFileDatastore at 0x1daf3c13748>,
 'workspaceblobstore': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x1daf3bf36a0>,
 'halworkspacestorage__datasets': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x1daf3bdca58>,
 'seerdata': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x1daf3bdcc88>}

In [4]:
# data
datastore = ws.datastores['seerdata']
datareference = DataReference(
    datastore=datastore,
    data_reference_name="seerdata",
    path_on_datastore='burrito_tacos')

# compute target
compute = ws.compute_targets['gandalf']

# Define Pipeline!
The following will be created and then run:
1. Pipeline Parameters
2. Data Fetch Step
3. Data Process Step
4. Training Step
5. Model Registration Step


## Pipeline Parameters
We need to tell the Pipeline what it needs to learn to see!

In [13]:
categories_param = PipelineParameter(name="categories", default_value="tacos burrito")

## Data Download Step

In [19]:
seer_dataset = PipelineData(
    "training_set",
    datastore=datastore,
    is_directory=True)

fetchStep = PythonScriptStep(
    name="Data Fetch",
    script_name="fetch.py",
    arguments=["--target_path", seer_dataset, "--categories", categories_param],
    inputs=[],
    outputs=[seer_dataset],
    compute_target=compute,
    source_directory="."
)

## Data Process Step

In [20]:
seer_tfrecords = PipelineData(
    "tfrecords_set",
    datastore=datastore,
    is_directory=True
)

prep = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='prep.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

prepStep = EstimatorStep(
    name='Data Preparation',
    estimator=prep,
    estimator_entry_script_arguments=["--source_path", seer_dataset, 
                                      "--target_path", seer_tfrecords],
    inputs=[seer_dataset],
    outputs=[seer_tfrecords],
    compute_target=compute
)

## Training Step

In [21]:
seer_models = PipelineData(
    "models",
    datastore=datastore,
    is_directory=True
)

train = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='train.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

trainStep = EstimatorStep(
    name='Model Training',
    estimator=train,
    estimator_entry_script_arguments=["--source_path", seer_tfrecords, 
                                      "--target_path", seer_models,
                                      "--epochs", 5,
                                      "--batch", 10,
                                      "--lr", 0.001],
    inputs=[seer_tfrecords],
    outputs=[seer_models],
    compute_target=compute
)

## Test Run

In [22]:
pipeline1 = Pipeline(workspace=ws, steps=[fetchStep, prepStep, trainStep])

In [23]:
# Submit the pipeline to be run
pipeline_run1 = Experiment(ws, 'seer').submit(pipeline1)
RunDetails(pipeline_run1).show()

Created step Data Fetch [0ed90c24][e6a0a414-8e3d-42f4-9c2a-9c08fe471f34], (This step will run and generate new outputs)
Created step Data Preparation [7457464d][d784a97a-6ff0-478e-a725-c497ec9b6e5f], (This step will run and generate new outputs)
Created step Model Training [dca2f414][3d2773bb-5549-4337-9360-40917e8e4f8d], (This step will run and generate new outputs)
Submitted pipeline run: aa5cf36d-72ee-421d-81b5-980d094480e1


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [25]:
published_pipeline1 = pipeline1.publish(
    name="Sub Optimal Seer", 
    description="Sub optimal approach to generating Seer AI")