In [1]:
import os

from azureml.core import Workspace, Dataset
from azureml.core.compute import AmlCompute, ComputeTarget

from azureml.pipeline.wrapper import Module, dsl
from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path
ws = Workspace.from_config()
#ws = Workspace.get(name='itp-pilot', subscription_id='4aaa645c-5ae2-4ae9-a17a-84b9023bc56a', resource_group='itp-pilot-ResGrp')
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

# prepare compite
pipeline_compute = "gpu-cluster"
try:
    aml_compute = AmlCompute(ws, pipeline_compute)
    print("Found existing compute target: {}".format(pipeline_compute))
except:
    print("Creating new compute target: {}".format(pipeline_compute))
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 1, 
                                                                max_nodes = 4)    
    aml_compute = ComputeTarget.create(ws, pipeline_compute, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

Failure while loading azureml_run_type_providers. Failed to load entrypoint azureml.scriptrun = azureml.core.script_run:ScriptRun._from_run_dto with exception (azure-mgmt-network 11.0.0 (c:\users\biyi\anaconda3\envs\aml\lib\site-packages), Requirement.parse('azure-mgmt-network~=10.0')).
Failure while loading azureml_run_type_providers. Failed to load entrypoint azureml.PipelineRun = azureml.pipeline.core.run:PipelineRun._from_dto with exception (azure-mgmt-network 11.0.0 (c:\users\biyi\anaconda3\envs\aml\lib\site-packages), Requirement.parse('azure-mgmt-network~=10.0'), {'azureml-core'}).
Failure while loading azureml_run_type_providers. Failed to load entrypoint azureml.ReusedStepRun = azureml.pipeline.core.run:StepRun._from_reused_dto with exception (azure-mgmt-network 11.0.0 (c:\users\biyi\anaconda3\envs\aml\lib\site-packages), Requirement.parse('azure-mgmt-network~=10.0'), {'azureml-core'}).
Failure while loading azureml_run_type_providers. Failed to load entrypoint azureml.StepRun

DesignerTest-EUS
DesignerTestRG
eastus
4faaaf21-663f-4391-96fd-47197c630979
Found existing compute target: gpu-cluster


In [2]:
# prepare module functions
base_module_dir = os.path.join('.', 'modules', 'train-score-eval')
try:
    train_module_func = Module.load(ws, namespace='microsoft.com/aml/samples', name='MPI Train')
except Exception:
    train_module_func = Module.register(ws, yaml_file=os.path.join(base_module_dir, 'mpi_train.yaml'))


try:
    score_module_func = Module.load(ws, namespace='microsoft.com/aml/samples', name='Score')
except Exception:
    score_module_func = Module.register(ws, yaml_file=os.path.join(base_module_dir, 'score.yaml'))    

    
try:
    eval_module_func = Module.load(ws, namespace='microsoft.com/aml/samples', name='Evaluate')
except Exception:
    eval_module_func = Module.register(ws, yaml_file=os.path.join(base_module_dir, 'evaluate.yaml'))    
    

try:
    compare_module_func = Module.load(ws, namespace='microsoft.com/aml/samples', name='Compare 2 Models')
except Exception:
    compare_module_func = Module.register(ws, yaml_file=os.path.join(base_module_dir, 'compare2.yaml'))    

In [3]:
# prepare dataset
training_data_name = 'aml_module_training_data'

if training_data_name not in ws.datasets:
    print('Registering a training dataset for sample pipeline ...')
    train_data = Dataset.File.from_files(path=['https://dprepdata.blob.core.windows.net/demo/Titanic.csv'])
    train_data.register(workspace=ws,
                        name=training_data_name,
                        description='Training data (just for illustrative purpose)')
    print('Registerd')
else:
    train_data = ws.datasets[training_data_name]
    print('Training dataset found in workspace')

test_data = get_global_dataset_by_path(ws, 'Automobile_price_data', 'GenericCSV/Automobile_price_data_(Raw)')

Training dataset found in workspace


In [4]:
@dsl.pipeline(name = 'A sub pipeline including train/score/eval',
              description = 'train model and evaluate model perf')
def training_pipeline(input_data, test_data, learning_rate):
   train = train_module_func(
       training_data=input_data,
       max_epochs=5,
       learning_rate=learning_rate)

   train.runsettings.configure(process_count_per_node = 2, node_count = 2)

   score = score_module_func(
       model_input=train.outputs.model_output,
       test_data=test_data)

   eval = eval_module_func(scoring_result=score.outputs.score_output)

   return {'eval_output': eval.outputs.eval_output, 'model_output': train.outputs.model_output}

@dsl.pipeline(name = 'A dummy pipeline that trains multiple models and output the best one',
              description = 'select best model trained with different learning rate',
              default_compute_target = pipeline_compute)
def dummy_automl_pipeline():
    train_and_evalute_model1 = training_pipeline(train_data, test_data, 0.01)
    train_and_evalute_model2 = training_pipeline(train_data, test_data, 0.02)

    compare = compare_module_func(
        model1=train_and_evalute_model1.outputs.model_output,
        eval_result1=train_and_evalute_model1.outputs.eval_output,
        model2=train_and_evalute_model2.outputs.model_output,
        eval_result2=train_and_evalute_model2.outputs.eval_output
    )

    return {'best_model': compare.outputs.best_model}


In [5]:
pipeline = dummy_automl_pipeline()
pipeline.validate()


<IPython.core.display.Javascript object>

ValidateView(container_id='container_id_87e9715e-37f9-4adc-bfc6-1ca437213537_widget', env_json='{"subscription…

{'result': 'validation passed', 'errors': []}

In [6]:
pipeline.export(directory=os.path.join('.', 'data'))

Successfully dump yaml file at .\data\a_dummy_pipeline_that_trains_multiple_models_and_output_the_best_one\a_dummy_pipeline_that_trains_multiple_models_and_output_the_best_one.yaml
Successfully dump yaml file at .\data\a_dummy_pipeline_that_trains_multiple_models_and_output_the_best_one\a_sub_pipeline_including_train_score_eval.yaml
Successfully dump yaml file at .\data\a_dummy_pipeline_that_trains_multiple_models_and_output_the_best_one\modules\mpi_train.yaml
Successfully dump yaml file at .\data\a_dummy_pipeline_that_trains_multiple_models_and_output_the_best_one\modules\score.yaml
Successfully dump yaml file at .\data\a_dummy_pipeline_that_trains_multiple_models_and_output_the_best_one\modules\evaluate.yaml
Successfully dump yaml file at .\data\a_dummy_pipeline_that_trains_multiple_models_and_output_the_best_one\modules\compare_2_models.yaml
Successfully dump yaml file at .\data\a_dummy_pipeline_that_trains_multiple_models_and_output_the_best_one\datasets\aml_module_training_data.ya

'.\\data\\a_dummy_pipeline_that_trains_multiple_models_and_output_the_best_one'