In [2]:
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core import Dataset
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core import ScriptRunConfig
from azureml.core import Model
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline
from azureml.pipeline.core import PipelineData
from azureml.pipeline.core.graph import PipelinePrameter

from azureml.exceptions import ComputeTargetException

# own modules
import sys
sys.path.append('../')
from config.config_training import script_params_remote

In [3]:
# important parameters
compute_name = 'dp-100-v2'
source_directory = '../src'
remote = False

# 1. Configure Workspace

In [4]:
ws = Workspace.from_config('../config/config.json')

# 2. Configure Compute Target

In [9]:
if remote:
    try:
        comput_target = ComputeTarget(workspace=ws, name= compute_name)
        print('Comput target exists!')
    except:
        config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_VS2'
                                                              ,vm_priority='lowpriority'
                                                              ,min_nodes=0
                                                              , max_nodes=1)
        compute_target = ComputeTarget.create(workspace=ws, name=compute_name, provisioning_configuration=config)
        compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
else:
    compute_target = 'local'

# 3. Configurate Dependencies

In [6]:
if remote:
    env = Environment.from_conda_specification(name = 'sklearn-env', file_path = '../config/conda_dependencies.yml')
else:
    env = Environment('user-managed-env')
    env.python.user_managed_dependencies = True

# 4. Get Data

Run this in conda shell before starting jupyter lab: __export LC_ALL=en_US.UTF-8__

In [7]:
tab_ds = Dataset.get_by_name(ws, 'diabetes dataset')
script_params_remote.extend(['--ds', tab_ds.as_named_input('diabetes_dataset')])

# 5. Configure Pipeline

In [None]:
# Define PipelineData object to store data between steps
data_store = ws.get_default_datastore()
prepped_data = PipelineData('prepped', datastore=data_store)

# Add Arguments for the scripts
script_params_remote.extend(['--out_folder', prepped_data])
script_params_remote.extend(['--in_folder', prepped_data])

In [None]:
# Define Pipeline Parameter that is changable during interference
reg_param = PipelineParameter(name='reg_param', default_value=0.01)
script_params_remote.extend(['--reg_param', reg_param])

In [23]:
step_1 = PythonScriptStep(name='preperation'
                          ,source_directory=source_directory
                          ,script_name='preperation.py'
                          ,compute_target=compute_target
                          ,arguments=script_params_remote
                          ,outputs=[prepped_data]
                         )

In [None]:
step_2 = PythonScriptStep(name='training'
                          ,source_directory=source_directory
                          ,script_name='training.py'
                          ,compute_target=compute_target
                          ,inputs=[prepped_data]
                          # Allow step reuse
                          allow_reuse = True
                         )

In [24]:
train_pipeline = Pipeline(workspace=ws, steps=[step_1, step_2])

# 6. Configure Experiment

In [None]:
experiment = Experiment(workspace=ws, name'pipeline_test_v1')

# 7. Run Experiment und Monitor Training

In [25]:
pipeline_run = experiment.submit(train_pipeline)
pipeline_run.wait_for_completion(show_output=True)

RunId: test_v5_1612218008_453d7884
Web View: https://ml.azure.com/experiments/test_v5/runs/test_v5_1612218008_453d7884?wsid=/subscriptions/c2cbd114-566e-42ab-a9f3-ae8000df45a0/resourcegroups/certification_dp100/workspaces/dp_100_v2

Streaming azureml-logs/70_driver_log.txt

[2021-02-01T22:20:18.892933] Entering context manager injector.
[context_manager_injector.py] Command line Options: Namespace(inject=['ProjectPythonPath:context_managers.ProjectPythonPath', 'RunHistory:context_managers.RunHistory', 'TrackUserError:context_managers.TrackUserError'], invocation=['src/train.py', '--param_1', '0.8', '--remote_execution', 'True', '--path_trained_model', './trained_models/', '--ds', '326c9866-03cb-4a7c-b329-f029c8530614'])
Script type = None
Starting the daemon thread to refresh tokens in background for process with pid = 5580
Entering Run History Context Manager.
[2021-02-01T22:20:24.770482] Current directory: /private/var/folders/2x/5phlmdsd0td5pb7srj3fpqth0000gn/T/azureml_runs/test_v5_

{'runId': 'test_v5_1612218008_453d7884',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2021-02-01T22:20:16.68975Z',
 'endTimeUtc': '2021-02-01T22:21:31.625676Z',
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '6eb6c8bb-547b-499a-a64c-4150aba3ad0b',
  'azureml.git.repository_uri': 'https://github.com/carlov93/azure_dp_100.git',
  'mlflow.source.git.repoURL': 'https://github.com/carlov93/azure_dp_100.git',
  'azureml.git.branch': 'master',
  'mlflow.source.git.branch': 'master',
  'azureml.git.commit': '3fd65c16ec916106ec4981e5f5517af4b90acd9c',
  'mlflow.source.git.commit': '3fd65c16ec916106ec4981e5f5517af4b90acd9c',
  'azureml.git.dirty': 'True'},
 'inputDatasets': [{'dataset': {'id': '326c9866-03cb-4a7c-b329-f029c8530614'}, 'consumptionDetails': {'type': 'RunInput', 'inputName': 'diabetes_dataset', 'mechanism': 'Direct'}}],
 'outputDatasets': [],
 'runDefinition': {'script': 'src/train.py',
  'command': '',
  'useAbsolutePath': False,
  '

# 8. Publish a Pipeline

In [None]:
published_pipeline = train_pipeline.publish(name='training_pipeline'
                                           ,description='first try'
                                           ,version='1.0')

# 9. Add trained motel to workspace

In [22]:
model = Model.register(workspace=ws
                       ,model_path='../trained_models/diabetes_model.pkl'
                       ,model_name='Diabetes_Classifier'
                       ,description='First trained model with Azure ML'
                       ,tags={'data-format': 'csv'}
                       ,properties={'Accuracy': run.get_metrics()['Accuracy']}
                       ,model_framework=Model.Framework.SCIKITLEARN  # Framework used to create the model.
                       ,model_framework_version='0.23.2'
                       ,datasets=[('training_data', tab_ds)]
                       )

KeyError: 'Accuracy'