In [2]:
import azureml.core
from azureml.core import Workspace, Dataset, Datastore
from azureml.core import Experiment
from azureml.core.compute import ComputeTarget
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun
from azureml.widgets import RunDetails

import pandas as pd
import numpy as np

print("SDK version:", azureml.core.VERSION)

SDK version: 1.40.0


In [3]:
from azureml.core.authentication import InteractiveLoginAuthentication

# Get the Workspace object from Azure
# You can find tenant id under azure active directory->properties
tenant_id = '198c7d8c-e010-45ce-a018-ec2d9a33f58f'
ia = InteractiveLoginAuthentication(tenant_id=tenant_id)
ws_name = 'automlbook'
subscription_id = '4d278f3d-b4fd-4fa2-86b6-d34b96bc888f'
resource_group = 'Foxy_Resources'
ws = Workspace.get(name=ws_name,
                   subscription_id=subscription_id,
                   resource_group=resource_group,
                   auth=ia)
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

automlbook
Foxy_Resources
centralus
4d278f3d-b4fd-4fa2-86b6-d34b96bc888f


In [4]:
# Create datastore, try getting datastore via Workspace object
datastore = Datastore.get_default(ws)
datastore_name = 'workspaceblobstore'
datastore = Datastore.get(ws, datastore_name)

In [5]:
# Create a dataset from the datastore of the Workspace
dataset_name = 'automlbook Titanic Training Data A'
# dataset = Dataset.get_by_name(ws, dataset_name)
dataset = Dataset.get_by_name(ws, dataset_name, version = 'latest')
dataset_columns = ['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']

# Show a sample of the data in the dataset
dataset.take(10).to_pandas_dataframe()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,False,3,male,22.0,1,0,7.25,,S
1,True,1,female,38.0,1,0,71.2833,C85,C
2,True,3,female,26.0,0,0,7.925,,S
3,True,1,female,35.0,1,0,53.1,C123,S
4,False,3,male,35.0,0,0,8.05,,S
5,False,3,male,,0,0,8.4583,,Q
6,False,1,male,54.0,0,0,51.8625,E46,S
7,False,3,male,2.0,3,1,21.075,,S
8,True,3,female,27.0,0,2,11.1333,,S
9,True,2,female,14.0,1,0,30.0708,,C


In [6]:
# Set (User Managed) Local Environment up
from azureml.core import Environment

# Editing a run configuration property on-fly.
user_managed_env = Environment("user-managed-env")

user_managed_env.python.user_managed_dependencies = True

# You can choose a specific Python environment by pointing to a Python path 
#user_managed_env.python.interpreter_path = '/home/johndoe/miniconda3/envs/myenv/bin/python'

In [59]:
# Run Training Experiment locally
from azureml.core import ScriptRunConfig
import datetime

# Experiment
experiment_name = 'Local_Training_AutoML'
experiment = Experiment(workspace=ws, name=experiment_name)

# Define Compute Cluster to use
compute_target = 'local'
source_directory = './scripts'
script_name = 'localTrainingAutoML.py'
dataset_name = 'automlbook Titanic Training Data A'
# set output file name like 'DecisionTreeClassifier_Titanic_local-2022-04-17 21:40:36.114550.pkl'
suffix = 'local-' + str(datetime.datetime.now())
suffix = suffix.replace(' ', '_') # Clean up datetimestamp
suffix = suffix.replace(':', '-') 
out_model_file_name = 'DecisionTreeClassifier_Titanic_{}.pkl'.format(suffix)
# set output file name like 'DecisionTreeClassifier_Titanic_local-2022-04-17 21:40:36.114550.pkl'

script_arguments = [
"--tenant-id", tenant_id,
"--ws-name", ws_name,
"--subscription-id", subscription_id,
"--resource-group", resource_group,
"--datastore-name", datastore_name,
"--dataset-name", dataset_name,
"--out-model-file-name", out_model_file_name
]
scriptRunConfig = ScriptRunConfig(
        source_directory=source_directory,
        script=script_name,
        arguments=script_arguments,
        environment=user_managed_env,
        compute_target=compute_target)
        
AutoML_run = experiment.submit(scriptRunConfig)
RunDetails(AutoML_run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [60]:
# Register Model from the AutoML_run
description = "Best Local AutoML Regression Run using Titanic Sample Data."
tags = {
  "project" : "Local Training AutoML", 
  "creator": "fox", 
  "task": "classification", 
  "dataset": "automlbook Titanic Training Data A", 
  "metric": "normalized_root_mean_squared_error"
}

# TODO finish todos in python script for this registration of file in ./outputs to work
AutoML_run.register_model(model_path='./outputs', model_name=out_model_file_name, description=description, tags=tags)
# set output file name like 'DecisionTreeClassifier_Titanic_local-2022-04-17 21:40:36.114550.pkl'

Model(workspace=Workspace.create(name='automlbook', subscription_id='4d278f3d-b4fd-4fa2-86b6-d34b96bc888f', resource_group='Foxy_Resources'), name=DecisionTreeClassifier_Titanic_local-2022-04-24_22-36-56.692285.pkl, id=DecisionTreeClassifier_Titanic_local-2022-04-24_22-36-56.692285.pkl:1, version=1, tags={'project': 'Local Training AutoML', 'creator': 'fox', 'task': 'classification', 'dataset': 'automlbook Titanic Training Data A', 'metric': 'normalized_root_mean_squared_error'}, properties={})