In [3]:
import json
import logging
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter, defaultdict
from typing import List, Union, Tuple, Callable
import os
import csv
import numpy as np
from sklearn import datasets
import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.pipeline.steps import AutoMLStep

print(f"Azure SDK version {azureml.core.VERSION}")

Azure SDK version 1.19.0


In [4]:
ws = Workspace.from_config()
print(f"Workspace name {ws.name}")
print(f"Workspace location {ws.location}")
print(f"Workspace Subscription key {ws.subscription_id}")
print(f"Workspace resource group {ws.resource_group}")


Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code REJKC8FLD to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
Workspace name quick-starts-ws-133109
Workspace location southcentralus
Workspace Subscription key 1b944a9b-fdae-4f97-aeb1-b7eea0beac53
Workspace resource group aml-quickstarts-133109


In [5]:
experiment_name = "bike-count-pred"
exp = Experiment(name=experiment_name, workspace=ws)
exp

Name,Workspace,Report Page,Docs Page
bike-count-pred,quick-starts-ws-133109,Link to Azure Machine Learning studio,Link to Documentation


In [8]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "demo-cluster1"
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2", max_nodes=4)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
compute_target

Name,Workspace,State,Location,VmSize,Application URI,Docs
demo-cluster1,quick-starts-ws-133109,Running,southcentralus,STANDARD_DS3_V2,Jupyter JupyterLab RStudio,Doc


In [10]:
# Try to load the bike-count predicton dataset
found = False 
dataset_dict = ws.datasets
ds_name = "bike-dataset1"
try:
    if ds_name in dataset_dict.keys():
        d_set = dataset_dict[ds_name]
except:
    print("Dataset not found")


In [12]:
df = d_set.to_pandas_dataframe()
df.describe()

Unnamed: 0,instant,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
count,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0
mean,366.0,2.49658,0.500684,6.519836,2.997264,1.395349,0.495385,0.474354,0.627894,0.190486,848.176471,3656.172367,4504.348837
std,211.165812,1.110807,0.500342,3.451913,2.004787,0.544894,0.183051,0.162961,0.142429,0.077498,686.622488,1560.256377,1937.211452
min,1.0,1.0,0.0,1.0,0.0,1.0,0.05913,0.07907,0.0,0.022392,2.0,20.0,22.0
25%,183.5,2.0,0.0,4.0,1.0,1.0,0.337083,0.337842,0.52,0.13495,315.5,2497.0,3152.0
50%,366.0,3.0,1.0,7.0,3.0,1.0,0.498333,0.486733,0.626667,0.180975,713.0,3662.0,4548.0
75%,548.5,3.0,1.0,10.0,5.0,2.0,0.655417,0.608602,0.730209,0.233214,1096.0,4776.5,5956.0
max,731.0,4.0,1.0,12.0,6.0,3.0,0.861667,0.840896,0.9725,0.507463,3410.0,6946.0,8714.0


In [13]:
df.head(2)

Unnamed: 0,instant,date,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,6,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,1,0,1,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801


In [31]:
## Define the AutoML Settings
automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 4,
    "primary_metric": "normalized_mean_absolute_error",
    "n_cross_validations": 3
}

In [32]:
automl_config = AutoMLConfig(
    compute_target=compute_target,
    task="regression",
    training_data=d_set,
    label_column_name="cnt",
    enable_early_stopping=True,
    debug_log="automl_errors.log",
    **automl_settings
)

In [33]:
## Define the output of the Automl pipeline

from azureml.pipeline.core import PipelineData, TrainingOutput

data_store = ws.get_default_datastore()
metrics_output_name = "regression_metrics"
model_output_name = "regression_models"

metrics_data = PipelineData(name="metrics_data",
    datastore=data_store,
    pipeline_output_name=metrics_output_name,
    training_output = TrainingOutput(type="Metrics")
)

models_data = PipelineData(name="models_data",
    datastore=data_store,
    pipeline_output_name=model_output_name,
    training_output = TrainingOutput(type="Model")
)

In [34]:
## Define a AutoML Pipeline STep
automl_step = AutoMLStep(
    name="automl_step",
    automl_config=automl_config,
    outputs = [metrics_data, models_data],
    allow_reused=True
)

In [35]:
# Define a Pipeline
from azureml.pipeline.core import Pipeline

automl_pipeline = Pipeline(
    description="Pipeline with automl regression step",
    workspace=ws,
    steps=[automl_step]
)

In [36]:
# Run the automl pipeline

pipeline_run = exp.submit(automl_pipeline)

Created step automl_step [f5efdb20][f3c2bc0b-d00f-4f0f-baf8-32cc5b8ae4ac], (This step will run and generate new outputs)
Submitted PipelineRun 542014ad-ff66-4d90-81f4-a616559ff0e4
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/bike-count-pred/runs/542014ad-ff66-4d90-81f4-a616559ff0e4?wsid=/subscriptions/1b944a9b-fdae-4f97-aeb1-b7eea0beac53/resourcegroups/aml-quickstarts-133109/workspaces/quick-starts-ws-133109


In [37]:
pipeline_run.wait_for_completion(show_output=True)

PipelineRunId: 542014ad-ff66-4d90-81f4-a616559ff0e4
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/bike-count-pred/runs/542014ad-ff66-4d90-81f4-a616559ff0e4?wsid=/subscriptions/1b944a9b-fdae-4f97-aeb1-b7eea0beac53/resourcegroups/aml-quickstarts-133109/workspaces/quick-starts-ws-133109
PipelineRun Status: Running




PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': '542014ad-ff66-4d90-81f4-a616559ff0e4', 'status': 'Completed', 'startTimeUtc': '2021-01-03T10:17:13.792469Z', 'endTimeUtc': '2021-01-03T10:44:16.439302Z', 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{}'}, 'inputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://mlstrg133109.blob.core.windows.net/azureml/ExperimentRun/dcid.542014ad-ff66-4d90-81f4-a616559ff0e4/logs/azureml/executionlogs.txt?sv=2019-02-02&sr=b&sig=nMTztdOQdiFOum%2FYvh24UwDrDgy8gSw9jXfbcSuYCaA%3D&st=2021-01-03T10%3A12%3A09Z&

This usually indicates a package conflict with one of the dependencies of azureml-core or azureml-pipeline-core.
Please check for package conflicts in your python environment


'Finished'

In [38]:
published_pipe = pipeline_run.publish_pipeline(name="published-bike-cnt", description="Pipeline to perform automl on bike dataset", version="1.0")

In [39]:
# Authenticate for consuming the Endpoint
from azureml.core.authentication import InteractiveLoginAuthentication

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

In [41]:
# Trigger the pipeline via Request
import requests

rest_endpoint = published_pipe.endpoint
rest_endpoint

'https://southcentralus.api.azureml.ms/pipelines/v1.0/subscriptions/1b944a9b-fdae-4f97-aeb1-b7eea0beac53/resourceGroups/aml-quickstarts-133109/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-133109/PipelineRuns/PipelineSubmit/9aae066a-357f-4637-a83c-8ab078367399'

In [42]:
exp_name = "http-triggered-pipe"

exp_pipe = Experiment(workspace=ws, name=exp_name)
response = requests.post(rest_endpoint,
headers=auth_header,
json={"ExperimentName": exp_name}
)
response.status_code

200