In [50]:
# Check core SDK version number
import azureml.core

print("SDK version:", azureml.core.VERSION)

SDK version: 1.16.0


In [53]:
from azureml.core import Workspace, Experiment

#ws = Workspace.get(name="udacity-project")
ws = Workspace.from_config()
exp = Experiment(workspace=ws,name="udacity-project")

subscription_id = '2e801a3e-ca8a-42a8-9ea2-f4d5dc924517'
resource_group = 'demoresource'
workspace_name = 'demoworkspace'

workspace = Workspace(subscription_id, resource_group, workspace_name)

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()


Workspace name: demoworkspace
Azure region: southcentralus
Subscription id: 2e801a3e-ca8a-42a8-9ea2-f4d5dc924517
Resource group: demoresource


In [54]:
from azureml.core.compute import ComputeTarget, AmlCompute
import pandas as pd
# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

output = {}
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Experiment Name'] = exp.name
pd.set_option('display.max_colwidth', -1)
outputDF = pd.DataFrame(data=output,index=[''])
outputDF.T

from azureml.core.compute_target import ComputeTargetException
### YOUR CODE HERE ###
cpu_cluster_name = 'cpu-cluster-1'

#verify cluster does not already exsist
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print ('Found Exsisting Cluster')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
    max_nodes=2)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
compute_target.wait_for_completion(show_output=True)



Found Exsisting Cluster
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [114]:
from azureml.core.environment import CondaDependencies

myenv = Environment(name="myenv")
conda_dep = CondaDependencies()
conda_dep.add_conda_package("scikit-learn")



In [55]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
import os


# Specify parameter sampler
from azureml.train.hyperdrive import choice, loguniform
ps = RandomParameterSampling( {
    "--C": choice(0.5, 1, 1.5),
    "--max_iter": (50,100,150)
    }
)

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
shutil.copy('train.py' , './training')
est = SKLearn(
    source_directory = './training',
    compute_target = compute_target,
    entry_script = 'train.py'
)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    estimator = est,
    hyperparameter_sampling =ps,
    policy = policy,
    primary_metric_name='Accuracy', 
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
    max_total_runs=12,
    max_concurrent_runs=4, 
    )
                         
                                    


NameError: name 'shutil' is not defined

In [130]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
hdr = experiment.submit(hyperdrive_config)
RunDetails(hdr).show()
hdr.wait_for_completion(show_output=True)
assert (hdr.get_status() == 'Completed')


HyperDriveScenarioNotSupportedException: HyperDriveScenarioNotSupportedException:
	Message: Automated hyperparameter tuning is not supported for types DatabricksCompute and local.
	InnerException None
	ErrorResponse 
{
    "error": {
        "code": "UserError",
        "inner_error": {
            "code": "ValidationError",
            "inner_error": {
                "code": "ScenarioNotSuported"
            }
        },
        "message": "Automated hyperparameter tuning is not supported for types DatabricksCompute and local."
    }
}

In [55]:
import joblib
# Get your best run and save the model from that run.

best_run = hdr.get_best_run_by_primary_metric()
print (best_run.get_details()['runDefinition']['arguments'])
print (best_run.get_file_names())


In [None]:
model = best_run.register_model(model_name='first', model_path='outputs/model.joblib')

In [58]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

data = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(path=data)


In [61]:
import train
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(x)
scaled_data = scaler.transform(x)
scaled_data = pd.DataFrame(scaled_data, columns=x.columns)
x_train, y_train, x_test, y_test = train_test_split(scaled_data, y,test_size=0.2, random_state=1)
x_df = x_train.assign(y_train=y_train.values)
df = pd.DataFrame(x_df)
df.to_csv('training/train_data.csv',index=False)

ds = ws.get_default_datastore()
ds.upload(src_dir='./training',target_path='udacity-project',overwrite=True, show_progress=True)
from azureml.core import Dataset
train_data = Dataset.Tabular.from_delimited_files(path=ds.path('udacity-project/train_data.csv'))


NameError: name 'DataPath' is not defined

In [62]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_settings = {
    "experiment_timeout_minutes=30",
    "n_cross_validations": 2,
    "primary_metric": "AUC_weighted"
   
}

automl_config = AutoMLConfig(
    task = 'classification',
    training_data = training_data,
    label_column_name = 'y_train',
    **automl_settings
)


SyntaxError: invalid syntax (<ipython-input-62-7c46704807ee>, line 9)

In [2]:
# Submit your automl run

remote_run = exp.submit(automl_config, show_output = False)
remote_run.wait_for_completion()


In [None]:
# Retrieve and save your best automl model.

best_run_customized, fitted_model_customized = remote_run.get_output()
from azureml.widgets import RunDetails
RunDetails(remote_run).show()
from azureml.run import Run
model_explanability_run_id = remote_run.id + '_' + 'ModelExplain'
print (model_explanability_run_id)
model_explanability_run = Run(experiment=exp, run_id=model_explanability_run_id)
model_explanability_run.wait_for_completion()

#Get the best run object
best_run, fitted_model = remote_run.get_output()


In [None]:
best_run, onnx_mdl = remote_run.get_output(return_onnx_model=True)

from azureml.automl.runtime.onnx_convert import OnnxConverter
onnx_fl_path = './best_model.onnx'
OnnxConverter.save_onnx_model(onnx_mdl, onnx_fl_path)


In [None]:
compute_target.delete()