In [1]:
# Try and do the aml exercise independently
# Create/USe a workspace

from azureml.core import Workspace

In [2]:
# Get the Workspace
ws = Workspace.from_config()

In [3]:
print(f'Name: {ws.name}, Subscription Id: {ws.subscription_id}, Resorce Group: {ws.resource_group}, Location: {ws.location}')

Name: quick-starts-ws-152981, Subscription Id: 61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30, Resorce Group: aml-quickstarts-152981, Location: southcentralus


In [4]:
# Create an experiment for the run
from azureml.core import Experiment

exp = Experiment(workspace=ws, name="my-automl-exp")

In [5]:
exp

Name,Workspace,Report Page,Docs Page
my-automl-exp,quick-starts-ws-152981,Link to Azure Machine Learning studio,Link to Documentation


In [6]:
# Next, we will create/attach a compute cluster
# Access the cluster. if non-existent, then create one
compute_cluster_name = "my-cc"

from azureml.core.compute import ComputeTarget
from azureml.exceptions import ComputeTargetException
from azureml.core.compute import AmlCompute




In [7]:
try:
    compute_cluster = ComputeTarget(workspace=ws, name=compute_cluster_name)
except ComputeTargetException:
    # Cluster does not exist - create it
    prov_cfg = AmlCompute.provisioning_configuration(vm_size="Standard_DS3_v2", max_nodes=4, description="Pipeline cfg")

    compute_cluster = ComputeTarget.create(ws, compute_cluster_name, prov_cfg)

In [8]:
# Wait for compute cluster to be built
compute_cluster.wait_for_completion(show_output=True)

Creating......
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [9]:
# We next to retrieve the Dataset
# 'https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv'
# Once retrieved - this dataset will subsequently be used for the model

# Loop through the WorkSpace datasets and see if our datset already exists
# if not, create the dataset
from azureml.core.dataset import Dataset
uri = 'https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv'

ds = Dataset.Tabular.from_delimited_files(uri)


In [10]:
# We next specify the Pipeline, starting with the configuration
ds

{
  "source": [
    "https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv"
  ],
  "definition": [
    "GetFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ]
}

In [11]:
df = ds.to_pandas_dataframe()

In [12]:
df.describe()

Unnamed: 0,instant,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
count,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0
mean,366.0,2.49658,0.500684,6.519836,2.997264,1.395349,0.495385,0.474354,0.627894,0.190486,848.176471,3656.172367,4504.348837
std,211.165812,1.110807,0.500342,3.451913,2.004787,0.544894,0.183051,0.162961,0.142429,0.077498,686.622488,1560.256377,1937.211452
min,1.0,1.0,0.0,1.0,0.0,1.0,0.05913,0.07907,0.0,0.022392,2.0,20.0,22.0
25%,183.5,2.0,0.0,4.0,1.0,1.0,0.337083,0.337842,0.52,0.13495,315.5,2497.0,3152.0
50%,366.0,3.0,1.0,7.0,3.0,1.0,0.498333,0.486733,0.626667,0.180975,713.0,3662.0,4548.0
75%,548.5,3.0,1.0,10.0,5.0,2.0,0.655417,0.608602,0.730209,0.233214,1096.0,4776.5,5956.0
max,731.0,4.0,1.0,12.0,6.0,3.0,0.861667,0.840896,0.9725,0.507463,3410.0,6946.0,8714.0


In [13]:
# Register the dataset that is created
key = "Bikesharing Dataset"
description = "Bike Sharing Data"
reg_ds = ds.register(ws, name=key, description=description)

In [14]:
reg_ds

{
  "source": [
    "https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv"
  ],
  "definition": [
    "GetFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ],
  "registration": {
    "id": "4959b7b0-f7a7-4f86-8612-5c2dd14948e5",
    "name": "Bikesharing Dataset",
    "version": 1,
    "description": "Bike Sharing Data",
    "workspace": "Workspace.create(name='quick-starts-ws-152981', subscription_id='61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30', resource_group='aml-quickstarts-152981')"
  }
}

In [15]:
reg_df = reg_ds.to_pandas_dataframe()
reg_df.describe()

Unnamed: 0,instant,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
count,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0,731.0
mean,366.0,2.49658,0.500684,6.519836,2.997264,1.395349,0.495385,0.474354,0.627894,0.190486,848.176471,3656.172367,4504.348837
std,211.165812,1.110807,0.500342,3.451913,2.004787,0.544894,0.183051,0.162961,0.142429,0.077498,686.622488,1560.256377,1937.211452
min,1.0,1.0,0.0,1.0,0.0,1.0,0.05913,0.07907,0.0,0.022392,2.0,20.0,22.0
25%,183.5,2.0,0.0,4.0,1.0,1.0,0.337083,0.337842,0.52,0.13495,315.5,2497.0,3152.0
50%,366.0,3.0,1.0,7.0,3.0,1.0,0.498333,0.486733,0.626667,0.180975,713.0,3662.0,4548.0
75%,548.5,3.0,1.0,10.0,5.0,2.0,0.655417,0.608602,0.730209,0.233214,1096.0,4776.5,5956.0
max,731.0,4.0,1.0,12.0,6.0,3.0,0.861667,0.840896,0.9725,0.507463,3410.0,6946.0,8714.0


In [20]:
reg_df.columns

Index(['instant', 'date', 'season', 'yr', 'mnth', 'weekday', 'weathersit',
       'temp', 'atemp', 'hum', 'windspeed', 'casual', 'registered', 'cnt'],
      dtype='object')

In [16]:
if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 


In [17]:
print(f'{key}')

Bikesharing Dataset


In [18]:
from azureml.pipeline.core import Pipeline


In [19]:
ml_pipeline = Pipeline(
    description=description,
    workspace=ws,
    steps=[]
)

In [22]:
# We will specify the AutoMLStep in this instance
from azureml.pipeline.steps.automl_step import AutoMLStep
from azureml.pipeline.core import PipelineData
from azureml.pipeline.core import TrainingOutput
from azureml.train.automl.automlconfig import AutoMLConfig


In [23]:
project_folder = 'pipeline-bike-folder'

automl_settings={
    'experiment_timeout_minutes':20,
    'max_concurrent_iterations':5,
    'primary_metric':'normalized_root_mean_squared_error',
    'n_cross_validations':5,
}

automl_config = AutoMLConfig(
    compute_target = compute_cluster,
    task='forecasting',
    training_data=reg_ds,
    time_column_name='date',
    label_column_name='cnt',
    path=project_folder,
    enable_early_stopping=True,
    debug_log='automl_errors.log',
    **automl_settings,
)


In [26]:

ds = ws.get_default_datastore()

# Define metrics_data, model_data to be used below in
# AutoMLStep
metrics_data = PipelineData(
    name='metrics_data',
    datastore=ds,
    pipeline_output_name='metrics_output',
    training_output=TrainingOutput(type='Metrics')
)

model_data = PipelineData(
    name='model_data',
    datastore=ds,
    pipeline_output_name='best_model_output',
    training_output=TrainingOutput(type='Model')

)

automlstep = AutoMLStep(
    name="auto-ml-pipeline",
    automl_config=automl_config,
    outputs=[metrics_data, model_data],
    allow_reuse=True
)