In [1]:
# Try and do the aml exercise independently
# Create/USe a workspace

from azureml.core import Workspace

In [2]:
# Get the Workspace
ws = Workspace.from_config()

In [3]:
print(f'Name: {ws.name}, Subscription Id: {ws.subscription_id}, Resorce Group: {ws.resource_group}, Location: {ws.location}')

Name: quick-starts-ws-152995, Subscription Id: 81cefad3-d2c9-4f77-a466-99a7f541c7bb, Resorce Group: aml-quickstarts-152995, Location: southcentralus


In [4]:
# Create an experiment for the run
from azureml.core import Experiment

exp = Experiment(workspace=ws, name="my-automl-exp")

In [5]:
exp

Name,Workspace,Report Page,Docs Page
my-automl-exp,quick-starts-ws-152995,Link to Azure Machine Learning studio,Link to Documentation


In [6]:
# Next, we will create/attach a compute cluster
# Access the cluster. if non-existent, then create one
compute_cluster_name = "my-cc"

from azureml.core.compute import ComputeTarget
from azureml.exceptions import ComputeTargetException
from azureml.core.compute import AmlCompute




In [7]:
try:
    compute_cluster = ComputeTarget(workspace=ws, name=compute_cluster_name)
except ComputeTargetException:
    # Cluster does not exist - create it
    prov_cfg = AmlCompute.provisioning_configuration(vm_size="Standard_DS3_v2", max_nodes=4, description="Pipeline cfg")

    compute_cluster = ComputeTarget.create(ws, compute_cluster_name, prov_cfg)

In [8]:
# Wait for compute cluster to be built
compute_cluster.wait_for_completion(show_output=True)

Creating......
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [9]:
# We next to retrieve the Dataset
# 'https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv'
# Once retrieved - this dataset will subsequently be used for the model

# Loop through the WorkSpace datasets and see if our datset already exists
# if not, create the dataset
from azureml.core.dataset import Dataset
uri = 'https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv'

ds = Dataset.Tabular.from_delimited_files(uri)


In [10]:
# We next specify the Pipeline, starting with the configuration
ds

{
  "source": [
    "https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv"
  ],
  "definition": [
    "GetFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ]
}

In [11]:
df = ds.to_pandas_dataframe()

In [12]:
df.describe()

Unnamed: 0,instant,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
count,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0
mean,366.0,2.49658,0.500684,6.519836,2.997264,1.395349,0.495385,0.474354,0.627894,0.190486,848.176471,3656.172367,4504.348837
std,211.165812,1.110807,0.500342,3.451913,2.004787,0.544894,0.183051,0.162961,0.142429,0.077498,686.622488,1560.256377,1937.211452
min,1.0,1.0,0.0,1.0,0.0,1.0,0.05913,0.07907,0.0,0.022392,2.0,20.0,22.0
25%,183.5,2.0,0.0,4.0,1.0,1.0,0.337083,0.337842,0.52,0.13495,315.5,2497.0,3152.0
50%,366.0,3.0,1.0,7.0,3.0,1.0,0.498333,0.486733,0.626667,0.180975,713.0,3662.0,4548.0
75%,548.5,3.0,1.0,10.0,5.0,2.0,0.655417,0.608602,0.730209,0.233214,1096.0,4776.5,5956.0
max,731.0,4.0,1.0,12.0,6.0,3.0,0.861667,0.840896,0.9725,0.507463,3410.0,6946.0,8714.0


In [13]:
# Register the dataset that is created
key = "Bikesharing Dataset"
description = "Bike Sharing Data"
reg_ds = ds.register(ws, name=key, description=description)

In [14]:
reg_ds

{
  "source": [
    "https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv"
  ],
  "definition": [
    "GetFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ],
  "registration": {
    "id": "1f44d6c5-36e3-46e2-8c9f-c823af6b4b76",
    "name": "Bikesharing Dataset",
    "version": 1,
    "description": "Bike Sharing Data",
    "workspace": "Workspace.create(name='quick-starts-ws-152995', subscription_id='81cefad3-d2c9-4f77-a466-99a7f541c7bb', resource_group='aml-quickstarts-152995')"
  }
}

In [15]:
reg_df = reg_ds.to_pandas_dataframe()
reg_df.describe()

Unnamed: 0,instant,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
count,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0
mean,366.0,2.49658,0.500684,6.519836,2.997264,1.395349,0.495385,0.474354,0.627894,0.190486,848.176471,3656.172367,4504.348837
std,211.165812,1.110807,0.500342,3.451913,2.004787,0.544894,0.183051,0.162961,0.142429,0.077498,686.622488,1560.256377,1937.211452
min,1.0,1.0,0.0,1.0,0.0,1.0,0.05913,0.07907,0.0,0.022392,2.0,20.0,22.0
25%,183.5,2.0,0.0,4.0,1.0,1.0,0.337083,0.337842,0.52,0.13495,315.5,2497.0,3152.0
50%,366.0,3.0,1.0,7.0,3.0,1.0,0.498333,0.486733,0.626667,0.180975,713.0,3662.0,4548.0
75%,548.5,3.0,1.0,10.0,5.0,2.0,0.655417,0.608602,0.730209,0.233214,1096.0,4776.5,5956.0
max,731.0,4.0,1.0,12.0,6.0,3.0,0.861667,0.840896,0.9725,0.507463,3410.0,6946.0,8714.0


In [16]:
reg_df.columns

Index(['instant', 'date', 'season', 'yr', 'mnth', 'weekday', 'weathersit',
       'temp', 'atemp', 'hum', 'windspeed', 'casual', 'registered', 'cnt'],
      dtype='object')

In [17]:
if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 


In [18]:
print(f'{key}')

Bikesharing Dataset


In [19]:
from azureml.pipeline.core import Pipeline


In [20]:
ml_pipeline = Pipeline(
    description=description,
    workspace=ws,
    steps=[]
)

In [21]:
# We will specify the AutoMLStep in this instance
from azureml.pipeline.steps.automl_step import AutoMLStep
from azureml.pipeline.core import PipelineData
from azureml.pipeline.core import TrainingOutput
from azureml.train.automl.automlconfig import AutoMLConfig


In [22]:
project_folder = 'pipeline-bike-folder'

automl_settings={
    'experiment_timeout_minutes':20,
    'max_concurrent_iterations':5,
    'primary_metric':'normalized_root_mean_squared_error',
    'n_cross_validations':5,
}

automl_config = AutoMLConfig(
    compute_target = compute_cluster,
    task='forecasting',
    training_data=reg_ds,
    time_column_name='date',
    label_column_name='cnt',
    path=project_folder,
    enable_early_stopping=True,
    debug_log='automl_errors.log',
    **automl_settings,
)


In [26]:

ds = ws.get_default_datastore()

# Define metrics_data, model_data to be used below in
# AutoMLStep
metrics_data = PipelineData(
    name='metrics_data',
    datastore=ds,
    pipeline_output_name='metrics_output',
    training_output=TrainingOutput(type='Metrics')
)

model_data = PipelineData(
    name='model_data',
    datastore=ds,
    pipeline_output_name='best_model_output',
    training_output=TrainingOutput(type='Model')

)

automlstep = AutoMLStep(
    name="auto_ml_pipeline",
    automl_config=automl_config,
    outputs=[metrics_data, model_data],
    allow_reuse=True
)

In [27]:
# The pipeline next needs to be submitted
ml_pipeline = Pipeline(
    description='Bike Sharing Pipeline',
    workspace=ws,
    steps=[automlstep]
)
pipeline_run = exp.submit(ml_pipeline)

Created step auto_ml_pipeline [d13ef921][fcf3c024-50ec-49bf-8261-772c768e2ad3], (This step will run and generate new outputs)
Submitted PipelineRun 5072be7c-ff2f-434c-95e9-1eb067d33374
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/5072be7c-ff2f-434c-95e9-1eb067d33374?wsid=/subscriptions/81cefad3-d2c9-4f77-a466-99a7f541c7bb/resourcegroups/aml-quickstarts-152995/workspaces/quick-starts-ws-152995&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254


In [28]:
from azureml.widgets import RunDetails

RunDetails(pipeline_run).show()


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [29]:
pipeline_run.wait_for_completion(show_output=True)

PipelineRunId: 5072be7c-ff2f-434c-95e9-1eb067d33374
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/5072be7c-ff2f-434c-95e9-1eb067d33374?wsid=/subscriptions/81cefad3-d2c9-4f77-a466-99a7f541c7bb/resourcegroups/aml-quickstarts-152995/workspaces/quick-starts-ws-152995&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254
PipelineRun Status: Running


StepRunId: aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a?wsid=/subscriptions/81cefad3-d2c9-4f77-a466-99a7f541c7bb/resourcegroups/aml-quickstarts-152995/workspaces/quick-starts-ws-152995&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254
StepRun( auto_ml_pipeline ) Status: Running

StepRun(auto_ml_pipeline) Execution Summary
StepRun( auto_ml_pipeline ) Status: Finished



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': '5072be7c-ff2f-434c-95e9-1eb067d33374', 'status': 'Completed', 'startTimeUtc': '2021-07-30T06:43:59.027405Z', 'endTimeUt

'Finished'

In [31]:
assert(pipeline_run.get_status() == "Finished")

In [33]:
# Retievemetrics
metrics_output = pipeline_run.get_pipeline_output('metrics_output')

In [34]:
num_downloaded = metrics_output.download('.', show_progress=True)

Downloading azureml/aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a/metrics_data
Downloaded azureml/aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a/metrics_data, 1 files out of an estimated total of 1


In [35]:
import json

In [36]:
with open(metrics_output._path_on_datastore) as f:
    metrics_output_result=f.read()

In [37]:
deserialized_metrics_output = json.loads(metrics_output_result)

In [39]:
import pandas as pd

In [40]:
df = pd.DataFrame(deserialized_metrics_output)

In [41]:
df

Unnamed: 0,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_2,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_0,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_4,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_6,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_1,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_5,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_3,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_7,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_9,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_10,...,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_26,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_30,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_8,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_16,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_20,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_22,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_32,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_33,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_34,aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a_38
normalized_root_mean_squared_error,[0.13336401288541186],[0.07953417447378228],[0.26447434221752675],[0.08499332147579457],[2.5737099526532125e-05],[0.07603379133521793],[0.1097100782328578],[5.231791887787208e-17],[0.024588103771260687],[0.021215429913336713],...,[0.021215429913336713],[0.00011343738235796933],[4.247070633954834e-07],[0.026411520869068027],[0.026783673105838096],[0.024188663249259876],[0.00014301275789730918],[7.433784190706861e-05],[0.00010647384107473642],[1.5409804953297425e-07]
mean_absolute_error,[1159.2],[691.3110445261157],[2298.8109825547426],[738.7619502676064],[0.2237068690846172],[660.8857142857144],[953.6],[4.547473508864641e-13],[213.71979797979793],[184.4045168067227],...,[184.4045168067227],[0.9859977274554694],[0.003691553795033542],[229.56893939393936],[232.80368663594473],[210.24786096256685],[1.2430668916434116],[0.6461445218562403],[0.925470626621609],[0.0013394202465406124]
r2_score,[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.2],[0.0],[0.0],...,[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0]
normalized_mean_absolute_error,[0.13336401288541186],[0.07953417447378228],[0.26447434221752675],[0.08499332147579457],[2.5737099526532125e-05],[0.07603379133521793],[0.1097100782328578],[5.231791887787208e-17],[0.024588103771260687],[0.021215429913336713],...,[0.021215429913336713],[0.00011343738235796933],[4.247070633954834e-07],[0.026411520869068027],[0.026783673105838096],[0.024188663249259876],[0.00014301275789730918],[7.433784190706861e-05],[0.00010647384107473642],[1.5409804953297425e-07]
normalized_median_absolute_error,[0.13336401288541186],[0.07953417447378228],[0.26447434221752675],[0.08499332147579457],[2.5737099526532125e-05],[0.07603379133521793],[0.1097100782328578],[5.231791887787208e-17],[0.024588103771260687],[0.021215429913336713],...,[0.021215429913336713],[0.00011343738235796933],[4.247070633954834e-07],[0.026411520869068027],[0.026783673105838096],[0.024188663249259876],[0.00014301275789730918],[7.433784190706861e-05],[0.00010647384107473642],[1.5409804953297425e-07]
root_mean_squared_log_error,[0.6985293680914578],[0.31186941381654715],[0.7546133762185778],[0.3586573816185693],[0.00011028662632259767],[0.3214089607663372],[0.43670407984373905],[0.0],[0.09770103875253593],[0.08922462510476734],...,[0.08922462510476734],[0.0005254697934496732],[1.969182268091174e-06],[0.11646089406595621],[0.10123651218671252],[0.10317703711885215],[0.0004809562967002634],[0.00030160603604265643],[0.00038413995973787254],[7.149066021838734e-07]
root_mean_squared_error,[1159.2],[691.3110445261157],[2298.8109825547426],[738.7619502676064],[0.2237068690846172],[660.8857142857144],[953.6],[4.547473508864641e-13],[213.71979797979793],[184.4045168067227],...,[184.4045168067227],[0.9859977274554694],[0.003691553795033542],[229.56893939393936],[232.80368663594473],[210.24786096256685],[1.2430668916434116],[0.6461445218562403],[0.925470626621609],[0.0013394202465406124]
explained_variance,[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],...,[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0]
median_absolute_error,[1159.2],[691.3110445261157],[2298.8109825547426],[738.7619502676064],[0.2237068690846172],[660.8857142857144],[953.6],[4.547473508864641e-13],[213.71979797979793],[184.4045168067227],...,[184.4045168067227],[0.9859977274554694],[0.003691553795033542],[229.56893939393936],[232.80368663594473],[210.24786096256685],[1.2430668916434116],[0.6461445218562403],[0.925470626621609],[0.0013394202465406124]
spearman_correlation,[-1.0],[-1.0],[-1.0],[-1.0],[-1.0],[-1.0],[-1.0],[-1.0],[-1.0],[-1.0],...,[-1.0],[-1.0],[-1.0],[-1.0],[-1.0],[-1.0],[-1.0],[-1.0],[-1.0],[-1.0]


In [42]:
model_output = pipeline_run.get_pipeline_output('best_model_output')

In [43]:
num_download = model_output.download('.', show_progress=True)

Downloading azureml/aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a/model_data
Downloaded azureml/aadd9200-a29d-4b07-ad33-bfaf3a9a5d8a/model_data, 1 files out of an estimated total of 1


In [44]:
import pickle

In [45]:
with open(model_output._path_on_datastore, "rb") as f:
    best_model = pickle.load(f)

In [46]:
best_model

ForecastingPipelineWrapper(pipeline=Pipeline(memory=None,
                                             steps=[('timeseriestransformer',
                                                     TimeSeriesTransformer(country_or_region=None, drop_column_names=[], featurization_config=FeaturizationConfig(
    blocked_transformers=None,
    column_purposes=None,
    transformer_params=None,
    dataset_language=None,
    drop_columns=None
), force_time_index_features=None, freq='D', gr...
    timeseries_param_dict={'time_column_name': 'date', 'grain_column_names': None, 'drop_column_names': [], 'overwrite_columns': True, 'dropna': False, 'transform_dictionary': {'min': '_automl_target_col', 'max': '_automl_target_col', 'mean': '_automl_target_col'}, 'max_horizon': 1, 'origin_time_colname': 'origin', 'country_or_region': None, 'n_cross_validations': 5, 'short_series_handling': True, 'max_cores_per_iteration': 1, 'feature_lags': None, 'target_aggregation_function': None, 'seasonality': 7, 'use_st