In [1]:
# Check core SDK version number
import azureml.core

print("SDK version:", azureml.core.VERSION)

SDK version: 1.18.0


In [3]:
from azureml.core import Workspace, Experiment

#ws = Workspace.get(name="udacity-project")
ws = Workspace.from_config()
ws.write_config(path='.azureml')
exp = Experiment(workspace=ws,name="udacity-project")



print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()


Workspace name: quick-starts-ws-126582
Azure region: southcentralus
Subscription id: ebee3a56-4c54-406a-b732-174015826780
Resource group: aml-quickstarts-126582


In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
import pandas as pd
# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

output = {}
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Experiment Name'] = exp.name
pd.set_option('display.max_colwidth', -1)
outputDF = pd.DataFrame(data=output,index=[''])
outputDF.T

from azureml.core.compute_target import ComputeTargetException
### YOUR CODE HERE ###
cpu_cluster_name = 'cpu-cluster-1'

#verify cluster does not already exsist
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print ('Found Exsisting Cluster')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
    max_nodes=2)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
compute_target.wait_for_completion(show_output=True)



Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [16]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
import os
import shutil


# Specify parameter sampler
from azureml.train.hyperdrive import choice, loguniform
ps = RandomParameterSampling( {
    "--C": choice(0.5, 1, 1.5),
    "--max_iter": choice(50,100,150)
    }
)

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
shutil.copy('train.py' , './training')
est = SKLearn(
    source_directory = '.',
    compute_target = compute_target,
    entry_script = 'train.py'
)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    estimator = est,
    hyperparameter_sampling =ps,
    policy = policy,
    primary_metric_name='Accuracy', 
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
    max_total_runs=20,
    max_concurrent_runs=4, 
    )
                         
                                    


In [91]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
hdr = exp.submit(config=hyperdrive_config)
RunDetails(hdr).show()
hdr.wait_for_completion(show_output=True)
assert (hdr.get_status() == 'Completed')




_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_c55fadf8-bb65-4a54-b823-4a91c945794d
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_c55fadf8-bb65-4a54-b823-4a91c945794d?wsid=/subscriptions/2e801a3e-ca8a-42a8-9ea2-f4d5dc924517/resourcegroups/demoresource/workspaces/demoworkspace

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-11-14T06:55:30.253076][API][INFO]Experiment created<END>\n"<START>[2020-11-14T06:55:31.1912125Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>"<START>[2020-11-14T06:55:32.872559][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2020-11-14T06:55:33.067177][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_c55fadf8-bb65-4a54-b823-4a91c945794d
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_c55fadf8-bb65-4a54-b823-4a91c945794d?wsid=/subscriptions/2e801a3e-ca8a-42a8-9ea2-

In [92]:
import joblib
# Get your best run and save the model from that run.

best_run = hdr.get_best_run_by_primary_metric()
print (best_run.get_details()['runDefinition']['arguments'])
print (best_run.get_file_names())


['--C', '0.5', '--max_iter', '150']
['azureml-logs/55_azureml-execution-tvmps_957fb65feb44a64d6e0e1a8cabbab9986191b061072f553878aea4f527eb0d78_d.txt', 'azureml-logs/65_job_prep-tvmps_957fb65feb44a64d6e0e1a8cabbab9986191b061072f553878aea4f527eb0d78_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_957fb65feb44a64d6e0e1a8cabbab9986191b061072f553878aea4f527eb0d78_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/103_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/model.joblib']


In [93]:
model = best_run.register_model(model_name='First', model_path='outputs/model.joblib')

In [7]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

data = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(path=data)


In [17]:
import train
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(x)
scaled_data = scaler.transform(x)
scaled_data = pd.DataFrame(scaled_data, columns= x.columns)
x_train, x_test, y_train, y_test = train_test_split(scaled_data, y, test_size=0.2, random_state=1)
x_df = x_train.assign(y_train=y_train.values)
df = pd.DataFrame(x_df)
df.to_csv("training/train_data.csv", index=False)

ds = ws.get_default_datastore()
ds.upload(src_dir='./training', target_path='udacity-project', overwrite=True, show_progress=True)
from azureml.core import Dataset
train_data = Dataset.Tabular.from_delimited_files(path=ds.path('udacity-project/train_data.csv'))

Uploading an estimated of 2 files
Uploading ./training/train.py
Uploaded ./training/train.py, 1 files out of an estimated total of 2
Uploading ./training/train_data.csv
Uploaded ./training/train_data.csv, 2 files out of an estimated total of 2
Uploaded 2 files


In [18]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_settings = {
    "experiment_timeout_minutes": 30,
    "n_cross_validations": 2,
    "primary_metric": "AUC_weighted",
   
}

automl_config = AutoMLConfig(
    task = 'classification',
    training_data = train_data,
    label_column_name = 'y_train',
    **automl_settings
)


In [19]:
# Submit your automl run

remote_run = exp.submit(automl_config, show_output = False)
remote_run.wait_for_completion()




{'runId': 'AutoML_9376c28f-1266-4ce2-864b-a1b01b0d1cd6',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2020-11-17T07:19:41.354174Z',
 'endTimeUtc': '2020-11-17T07:54:37.849676Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'AUC_weighted',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '2',
  'target': 'local',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"70aa63ef-31fc-4b5e-9f36-79327722cf3c\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"udacity-project/train_data.csv\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"aml-quickstarts-126582\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"ebee3a56-4c54-406a-b732-174015826780\\\\\\", \\\\\\"w

In [22]:
best_run_customized, fitted_model_customized = remote_run.get_output()

from azureml.widgets import RunDetails
RunDetails(remote_run).show()

from azureml.core.run import Run
model_explainability_run_id = remote_run.id + "_" + "ModelExplain"
print(model_explainability_run_id)
model_explainability_run = Run(experiment=exp, run_id=model_explainability_run_id)
model_explainability_run.wait_for_completion()

# Get the best run object
best_run, fitted_model = remote_run.get_output()
model = best_run.register_model(model_name='train', model_path='outputs/model_automl')

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

AutoML_9376c28f-1266-4ce2-864b-a1b01b0d1cd6_ModelExplain


ServiceException: ServiceException:
	Code: 404
	Message: (UserError) Run AutoML_9376c28f-1266-4ce2-864b-a1b01b0d1cd6_ModelExplain was not found
	Details:

	Headers: {
	    "Date": "Tue, 17 Nov 2020 08:55:21 GMT",
	    "Content-Type": "application/json; charset=utf-8",
	    "Transfer-Encoding": "chunked",
	    "Connection": "keep-alive",
	    "Vary": "Accept-Encoding",
	    "Request-Context": "appId=cid-v1:2d2e8e63-272e-4b3c-8598-4ee570a0e70d",
	    "x-ms-response-type": "error",
	    "x-ms-client-request-id": "d7ff261d-bfde-45ad-a9cc-1cce0516212d",
	    "x-ms-client-session-id": "",
	    "X-Content-Type-Options": "nosniff",
	    "x-request-time": "0.045",
	    "Strict-Transport-Security": "max-age=15724800; includeSubDomains; preload",
	    "Content-Encoding": "gzip"
	}
	InnerException: {
    "additional_properties": {},
    "error": {
        "additional_properties": {
            "debugInfo": null
        },
        "code": "UserError",
        "severity": null,
        "message": "Run AutoML_9376c28f-1266-4ce2-864b-a1b01b0d1cd6_ModelExplain was not found",
        "message_format": null,
        "message_parameters": null,
        "reference_code": null,
        "details_uri": null,
        "target": null,
        "details": [],
        "inner_error": {
            "additional_properties": {},
            "code": "NotFoundError",
            "inner_error": null
        }
    },
    "correlation": {
        "operation": "3bcf0235e1a5f748b745649a74df2769",
        "request": "e852b4f5b013fe45"
    },
    "environment": "southcentralus",
    "location": "southcentralus",
    "time": {},
    "component_name": "run-history"
}

In [None]:
compute_target.delete()