## Part 5 - Batch Scoring

**Connect to workspace**

In [1]:
from azureml.core import Workspace

workspace = Workspace.from_config()

**Retrieve datastore, dataset, compute target, and model**

In [12]:
datastore = workspace.get_default_datastore()
cifar_dataset = workspace.datasets['CIFAR-10 Dataset']
compute_target = workspace.compute_targets['d3v2cluster']
model = workspace.models['cifar-classifier']

**Retrieve registered model**

**Define output directory**

In [7]:
from azureml.pipeline.core import Pipeline, PipelineData

output_dir = PipelineData(name='cifarinferences', 
                          datastore=datastore, 
                          output_path_on_compute='cifar/results')

**Create pipeline config to wrap scoring script**

In [8]:
%pycat batch/batch-score.py

In [23]:
from azureml.core import Environment
from azureml.core.runconfig import CondaDependencies, DEFAULT_CPU_IMAGE
from azureml.contrib.pipeline.steps import ParallelRunStep, ParallelRunConfig

dependencies = CondaDependencies.create(pip_packages=['torch==1.4.0', 'torchvision==0.5.0', 'Pillow==6.2.0'])

batch_env = Environment(name='batch_environment')
batch_env.python.conda_dependencies = dependencies
batch_env.docker.enabled = True
batch_env.docker.base_image = DEFAULT_CPU_IMAGE

parallel_run_config = ParallelRunConfig(
    source_directory='batch',
    entry_script='batch-score.py',
    mini_batch_size='5',
    error_threshold=10,
    output_action='append_row',
    environment=batch_env,
    compute_target=compute_target,
    node_count=4)

**Create pipeline step**

In [24]:
parallel_run_step = ParallelRunStep(
    name='cifar-batch-prediction',
    parallel_run_config=parallel_run_config,
    inputs=[cifar_dataset.as_named_input('cifardataset')],
    output=output_dir,
    models=[model],
    arguments=[],
    allow_reuse=True
)

**Run batch scoring pipeline**

In [39]:
from azureml.core import Experiment

pipeline = Pipeline(workspace=workspace, steps=[parallel_run_step])
experiment = Experiment(workspace, 'cifar-batch-scoring')
pipeline_run = experiment.submit(pipeline)

Created step cifar-batch-prediction [511c8069][3c1077bc-0b5c-4bc7-b7d5-a52de0019190], (This step will run and generate new outputs)
Using data reference cifardataset_0 for StepId [5aacde26][39a5a7fb-5aa5-41ce-a172-e4a0ca93253a], (Consumers of this data are eligible to reuse prior runs.)
Submitted PipelineRun 96a905db-907f-446a-9750-5d9e593a923b
Link to Azure Machine Learning studio: https://ml.azure.com/experiments/cifar-batch-scoring/runs/96a905db-907f-446a-9750-5d9e593a923b?wsid=/subscriptions/15ae9cb6-95c1-483d-a0e3-b1a1a3b06324/resourcegroups/john/workspaces/uae-workshop


In [40]:
from azureml.widgets import RunDetails
RunDetails(pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

**View prediction results**

In [41]:
import pandas as pd
import shutil

# remove previous run results, if present
shutil.rmtree('cifar_results', ignore_errors=True)

batch_run = next(pipeline_run.get_children())
batch_output = batch_run.get_output_data('cifarinferences')
batch_output.download(local_path='cifar_results')

for root, dirs, files in os.walk('cifar_results'):
    for file in files:
        if file.endswith('parallel_run_step.txt'):
            result_file = os.path.join(root,file)

df = pd.read_csv(result_file, delimiter=":", header=None)
df.columns = ['Filename', 'Prediction']
df.head(10)

Unnamed: 0,Filename,Prediction
0,/mnt/batch/tasks/shared/LS_root/jobs/uae-works...,airplane
1,/mnt/batch/tasks/shared/LS_root/jobs/uae-works...,airplane
2,/mnt/batch/tasks/shared/LS_root/jobs/uae-works...,airplane
3,/mnt/batch/tasks/shared/LS_root/jobs/uae-works...,airplane
4,/mnt/batch/tasks/shared/LS_root/jobs/uae-works...,airplane
5,/mnt/batch/tasks/shared/LS_root/jobs/uae-works...,ship
6,/mnt/batch/tasks/shared/LS_root/jobs/uae-works...,ship
7,/mnt/batch/tasks/shared/LS_root/jobs/uae-works...,ship
8,/mnt/batch/tasks/shared/LS_root/jobs/uae-works...,ship
9,/mnt/batch/tasks/shared/LS_root/jobs/uae-works...,ship


In [45]:
df['Filename'].iloc[0]
df['Prediction'].iloc[0]

' airplane'