In [1]:
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.data.data_reference import DataReference
from modules.ingestion.data_ingestion_step import data_ingestion_step
from modules.preprocess.data_preprocess_step import data_preprocess_step
from modules.train.train_step import train_step
from modules.evaluate.evaluate_step import evaluate_step
from modules.deploy.deploy_step import deploy_step
from azureml.core.compute import AmlCompute, ComputeTarget

# Get workspace, datastores, and compute targets
print('Connecting to Workspace ...')
workspace = Workspace.from_config()
datastore = workspace.get_default_datastore()

Connecting to Workspace ...
If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


In [2]:
# Create CPU compute target
print('Creating CPU compute target ...')
cpu_cluster_name = 'ds3cluster'
cpu_compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS3_V2', 
                                                           idle_seconds_before_scaledown=1200,
                                                           min_nodes=0, 
                                                           max_nodes=2)
cpu_compute_target = ComputeTarget.create(workspace, cpu_cluster_name, cpu_compute_config)
cpu_compute_target.wait_for_completion(show_output=True)

Creating CPU compute target ...
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [3]:
# Create GPU compute target
print('Creating GPU compute target ...')
gpu_cluster_name = 'k80cluster'
gpu_compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6', 
                                                           idle_seconds_before_scaledown=1200,
                                                           min_nodes=0, 
                                                           max_nodes=2)
gpu_compute_target = ComputeTarget.create(workspace, gpu_cluster_name, gpu_compute_config)
gpu_compute_target.wait_for_completion(show_output=True)

Creating GPU compute target ...
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
# Get datastore reference
datastore = DataReference(datastore, mode='mount')

In [5]:
# Step 1: Data ingestion 
data_ingestion_step, data_ingestion_outputs = data_ingestion_step(datastore, cpu_compute_target)

In [6]:
# Step 2: Data preprocessing 
data_preprocess_step, data_preprocess_outputs = data_preprocess_step(data_ingestion_outputs['raw_data_dir'], cpu_compute_target)

In [7]:
# Step 3: Train Model
train_step, train_outputs = train_step(data_preprocess_outputs['train_dir'], data_preprocess_outputs['valid_dir'], gpu_compute_target, workspace)

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [8]:
# Step 4: Evaluate Model
evaluate_step, evaluate_outputs = evaluate_step(train_outputs['model_dir'], data_preprocess_outputs['test_dir'], gpu_compute_target, workspace)

In [9]:
# Step 5: Deploy Model
deploy_step, deploy_outputs = deploy_step(train_outputs['model_dir'], evaluate_outputs['accuracy_file'], data_preprocess_outputs['test_dir'], cpu_compute_target)


In [10]:
# Submit pipeline
print('Submitting pipeline ...')
pipeline_parameters = {
    'num_images': 100,
    'image_dim': 200,
    'num_epochs': 10, 
    'batch_size': 16,
    'learning_rate': 0.001, 
    'momentum': 0.9
}
pipeline = Pipeline(workspace=workspace, steps=[data_ingestion_step, data_preprocess_step, train_step, evaluate_step, deploy_step])
pipeline_run = Experiment(workspace, 'object-recognition-pipeline').submit(pipeline, pipeline_parameters=pipeline_parameters)


Submitting pipeline ...
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.
Created step data_ingestion.py [26474016][5c5ddd07-983a-49f6-ba92-6ccbf6c1c529], (This step is eligible to reuse a previous run's output)
Created step data_preprocess.py [4744b5ed][9c731824-6d68-4901-840a-ffc9fe6bf0d2], (This step is eligible to reuse a previous run's output)
Created step train-step [51603ddb][13405cbd-870e-41d6-95a5-8b0563d0f06c], (This step is eligible to reuse a previous run's output)
Created step evaulate-step [fa726159][c8236d51-3a55-4e9e-8c13-b2bd039c0957], (This step is eligible to reuse a previous run's output)
Created step deploy.py [f782bdf3][fd37d7ec-0741-442e-b723-15724d031cde], (This step will run and generate new outputs)
Using data reference workspaceblobstore for StepId [9a1bf7a1][2e9aceb1-5642-47cb-8b85-c04af5d94de2], (Consumers of this data are eligible to reuse prior runs.)
Submitted PipelineRun 474de9