In [None]:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.pipeline.steps import AutoMLStep
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.pipeline.core import PipelineData, TrainingOutput, Pipeline
from azureml.widgets import RunDetails
import json
import pickle

In [None]:
ws = Workspace.get(name='MLDEV')
experiment_name = 'RoadSafety-automl'
project_folder = './pipeline-project'

experiment = Experiment(ws, experiment_name)
experiment

In [None]:
amlcompute_cluster_name = 'automl'

try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)
    
compute_target.wait_for_completion(show_output=True)

In [None]:
found = False
key = 'RoadSafety'

if key in ws.datasets.keys():
    found = True
    dataset = ws.datasets[key]

df = dataset.to_pandas_dataframe()
df.describe()

In [None]:
automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 5,
    "primary_metric": "accuracy"
}

automl_config = AutoMLConfig(compute_target = compute_target,
                            task = 'regression',
                            training_data = dataset,
                            label_column_name = 'Number_of_Casualties',
                            path = project_folder,
                            enable_early_stopping = True,
                            featurization = 'auto',
                            **automl_settings)

In [None]:
ds = ws.get_default_datastore()

metrics_output_name = 'metrics_output'
best_model_output_name = 'best_model_output'

metrics_data = PipelineData(name = 'metrics_data',
                           datastore = ds,
                           pipeline_output_name = metrics_output_name,
                           training_output = TrainingOutput(type='Metrics'))

model_data = PipelineData(name = 'model_data',
                         datastore = ds,
                         pipeline_output_name = best_model_output_name,
                         training_output = TrainingOutput(type='Model'))

automl_step = AutoMLStep(name = 'automl_module',
                        automl_config = automl_config,
                        outputs = [metrics_data, model_data],
                        allow_reuse = True)

pipeline = Pipeline(description = 'RoadSafety_autoML',
                   workspace=ws,
                   steps=[automl_step])

In [None]:
pipeline_run = experiment.submit(pipeline)

In [None]:
RunDetails(pipeline_run).show()

In [None]:
metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)
num_file_downloaded = metrics_output.download('.', show_progress=True)
with open(metrics_output._path_on_datastore) as f:
    metrics_output_result = f.read()
deserialized_metrics_output = json.loads(metrics_output_result)
df = pd.DataFrame(deserialized_metrics_output)
df

In [None]:
best_model_output = pipeline_run.get_pipeline_output(best_model_output_name)
num_file_downloaded = best_model_output.download('.', show_progress=True)
with open(best_model_output._path_on_datastore, 'rb') as f:
    best_model = pickle.load(f)
best_model

In [None]:
best_model.steps