Copyright (c) Microsoft Corporation. All rights reserved. 
Licensed under the MIT License.

![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/NotebookVM/how-to-use-azureml/machine-learning-pipelines/parallel-run/file-dataset-image-inference-mnist.png)

In [72]:
from azureml.core import Workspace

ws = Workspace.from_config()

In [73]:
import os
from azureml.core.compute import AmlCompute, ComputeTarget

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpu-cluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,
                                                                min_nodes = compute_min_nodes, 
                                                                max_nodes = compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
    
    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
     # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

found compute target. just use it. cpu-cluster


In [75]:
scripts_folder = "Code"
dataset_script_file = "create_file_dataset.py"

# peek at contents
with open(os.path.join(scripts_folder, dataset_script_file)) as dataset_file:
    print(dataset_file.read())

import argparse
from azureml.core import Dataset, Run
from azureml.core.dataset import Dataset
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.pipeline.core import PublishedPipeline

parser = argparse.ArgumentParser()
parser.add_argument("--process_folder_param", type=str, help="process folder path")
args = parser.parse_args()

run = Run.get_context()
ws = run.experiment.workspace

def_data_store = ws.get_default_datastore()
mnist_ds_name = 'mnist_version_10_ds_'+ args.process_folder_param
print(mnist_ds_name)

path_on_datastore = def_data_store.path(args.process_folder_param)
input_mnist_ds = Dataset.File.from_files(path=path_on_datastore, validate=False)

# input_mnist_ds = input_mnist_ds.register(workspace=ws,
#                                  name= mnist_ds_name,
#                                  description='mnist images')

experiment = Experiment(ws, 'digit_identification')
published_pipeline = PublishedPipeline.get(workspace=ws, id="880353cd

In [76]:
from azureml.core import Environment
from azureml.core.runconfig import CondaDependencies, DEFAULT_CPU_IMAGE

batch_conda_deps = CondaDependencies.create(pip_packages=["azureml-core", "azureml-pipeline"])
batch_env = Environment(name="aml_environment")
batch_env.python.conda_dependencies = batch_conda_deps
batch_env.docker.enabled = True
batch_env.docker.base_image = DEFAULT_CPU_IMAGE

In [77]:
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import PipelineParameter
from azureml.pipeline.core import PipelineData
from azureml.core import RunConfiguration

# prepped_data_path = PipelineData("dataset_for_inferencing", def_data_store).as_dataset()
process_folder_param = PipelineParameter(name="process_folder_param", default_value="mnist/version_1")

run_config = RunConfiguration()
run_config.environment = batch_env

inference_step = PythonScriptStep(
    script_name=dataset_script_file,
    name="single-inference",
    arguments=["--process_folder_param", process_folder_param],
    compute_target=compute_target,
    source_directory=scripts_folder,
    runconfig = run_config
)

In [78]:
from azureml.core import Experiment
from azureml.pipeline.core.pipeline import Pipeline

pipeline = Pipeline(workspace=ws, steps=[inference_step])
experiment = Experiment(ws, 'digit_identification')
pipeline_run = experiment.submit(pipeline)

Created step single-inference [3bdde023][40d38723-ed2f-40be-9c96-f392618ab4d9], (This step will run and generate new outputs)
Submitted PipelineRun 67e3ea25-9375-45cf-8df0-dd7a6bda36f7
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/digit_identification/runs/67e3ea25-9375-45cf-8df0-dd7a6bda36f7?wsid=/subscriptions/d661a889-c8b8-41f2-93ab-99b3ed99b6e7/resourcegroups/AMLPoC/workspaces/amlpocmlws


### Monitor the run

The pipeline run status could be checked in Azure Machine Learning portal (https://ml.azure.com). The link to the pipeline run could be retrieved by inspecting the `pipeline_run` object.

In [62]:
# This will output information of the pipeline run, including the link to the details page of portal.
pipeline_run

Experiment,Id,Type,Status,Details Page,Docs Page
digit_identification,f9ce4d75-688b-4311-a1d3-8ebc1f94c840,azureml.PipelineRun,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


### Optional: View detailed logs (streaming) 

In [34]:
# Wait the run for completion and show output log to console
pipeline_run.wait_for_completion(show_output=True)

PipelineRunId: ab497d76-e504-4937-b0a0-6fc26137824e
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/digit_identification/runs/ab497d76-e504-4937-b0a0-6fc26137824e?wsid=/subscriptions/d661a889-c8b8-41f2-93ab-99b3ed99b6e7/resourcegroups/AMLPoC/workspaces/amlpocmlws
PipelineRun Status: Running


Expected a StepRun object but received <class 'azureml.core.run.Run'> instead.
This usually indicates a package conflict with one of the dependencies of azureml-core or azureml-pipeline-core.
Please check for package conflicts in your python environment






PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': 'ab497d76-e504-4937-b0a0-6fc26137824e', 'status': 'Completed', 'startTimeUtc': '2020-12-17T14:48:55.844577Z', 'endTimeUtc': '2020-12-17T15:07:15.18294Z', 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{"batch_size_param":"5","process_count_param":"2"}'}, 'inputDatasets': [], 'outputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://amlpocwsstorage.blob.core.windows.net/azureml/ExperimentRun/dcid.ab497d76-e504-4937-b0a0-6fc26137824e/logs/azureml/executionlogs.txt?sv=2019-02-02&sr=b&sig=6iWTXU5jT9OuZd9SloQT4oLLTDM%2B8hT%2FkdcbDZEH1c8%3D&st=2020-12-17T14%3A39%3A20Z&se=2020-12-17T22%3A49%3A20Z&sp=r', 'logs/azureml/stderrlogs.txt': 'https://amlpocwsstorage.blob.core.windows.net/azureml/ExperimentRun/dcid.ab497d76-e504-4937-b0a0-6fc26137824e/logs/azureml/stderrlogs.txt?sv=2019-02-02&sr=b&sig=TI3UDTFv%2B%2FyKtMHuJlHomZ43O%2BSLfdk

'Finished'

## Cleanup Compute resources

For re-occurring jobs, it may be wise to keep compute the compute resources and allow compute nodes to scale down to 0. However, since this is just a single-run job, we are free to release the allocated compute resources.

In [None]:
# uncomment below and run if compute resources are no longer needed 
compute_target.delete() 