In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-188517
Azure region: southcentralus
Subscription id: 6971f5ac-8af1-446e-8034-05acea24681f
Resource group: aml-quickstarts-188517


In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute

cluster_name = "Wilkens-Cluster"

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)

except:
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_D2_V2', max_nodes=4)
    
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
    
compute_target.wait_for_completion(show_output=True)
#compute_target.get_status()



InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
### YOUR CODE HERE ###
ps = RandomParameterSampling(
    {"--C":choice(0.5,1.0),     
    "--max_iter":choice(50,100,150)})  

# Specify a Policy
### YOUR CODE HERE ###
policy = BanditPolicy(evaluation_interval=1, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
#https://docs.microsoft.com/en-us/azure/machine-learning/how-to-migrate-from-estimators-to-scriptrunconfig

#https://knowledge.udacity.com/questions/423888
#env_name = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

#Get Estimator
est = SKLearn(source_directory='.', entry_script='train.py', compute_target=cluster_name)


hyperdrive_config = HyperDriveConfig (
        hyperparameter_sampling=ps,
        primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
        primary_metric_name='Accuracy',
        policy=policy,
        max_total_runs=8, 
        max_concurrent_runs=4,
        estimator=est)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [5]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###

run=exp.submit(config=hyperdrive_config)
RunDetails(run).show()
run.wait_for_completion(show_output=True)



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'â€¦

RunId: HD_6ed891fd-f76d-436a-88e6-f7bdf4450203
Web View: https://ml.azure.com/runs/HD_6ed891fd-f76d-436a-88e6-f7bdf4450203?wsid=/subscriptions/6971f5ac-8af1-446e-8034-05acea24681f/resourcegroups/aml-quickstarts-188517/workspaces/quick-starts-ws-188517&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

"<START>[2022-03-10T13:46:46.661726][API][INFO]Experiment created<END>\n""<START>[2022-03-10T13:46:47.444947][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2022-03-10T13:46:48.023273][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_6ed891fd-f76d-436a-88e6-f7bdf4450203
Web View: https://ml.azure.com/runs/HD_6ed891fd-f76d-436a-88e6-f7bdf4450203?wsid=/subscriptions/6971f5ac-8af1-446e-8034-05acea24681f/resourcegroups/aml-quickstarts-188517/workspaces/quick-starts-ws-188517&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254



{'runId': 'HD_6ed891fd-f76d-436a-88e6-f7bdf4450203',
 'target': 'Wilkens-Cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-03-10T13:46:46.498274Z',
 'endTimeUtc': '2022-03-10T14:01:51.578974Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '01de1467-7f54-4b90-9044-5fb2829d683d',
  'user_agent': 'python/3.8.1 (Linux-5.4.0-1068-azure-x86_64-with-glibc2.10) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.38.0',
  'space_size': '6',
  'score': '0.9094081942336874',
  'best_child_run_id': 'HD_6ed891fd-f76d-436a-88e6-f7bdf4450203_2',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg188517.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_6ed891fd-f76d-436a-88e6-f7bdf4450203/azureml-logs/h

In [19]:
import joblib
# Get your best run and save the model from that run.

### YOUR CODE HERE ###
best_run = run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics() 
##CW
print("Best Run:",best_run.id)
print("\n accuracy:", best_run_metrics['Accuracy'])
print("\n file name :",best_run.get_file_names())
#print("\n best run details ", best_run.get_details())
#print("\n best run metrics :",best_run_metrics)

if "outputs" not in os.listdir():
    os.mkdir("./outputs")

# Save Model
#joblib.dump(value=best_run.id, filename="./outputs/model.joblib")



Best Run: HD_6ed891fd-f76d-436a-88e6-f7bdf4450203_2

 accuracy: 0.9094081942336874

 file name : ['azureml-logs/20_image_build_log.txt', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/rslex.log', 'system_logs/cs_capability/cs-capability.log', 'system_logs/hosttools_capability/hosttools-capability.log', 'system_logs/lifecycler/execution-wrapper.log', 'system_logs/lifecycler/lifecycler.log', 'system_logs/lifecycler/vm-bootstrapper.log', 'user_logs/std_log.txt']


In [20]:
best_run.get_file_names()
best_run_metrics

{'Regularization Strength:': 0.5,
 'Max iterations:': 50,
 'Accuracy': 0.9094081942336874}

In [21]:
#best_model = best_run.register_model(model_name='hyperdrive_model', tags=best_run_metrics,model_path='outputs/model.joblib')
best_model = best_run.register_model(model_name='hyperdrive_model',model_path='.')

In [24]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
data_url  = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(data_url)

In [25]:
import pandas as pd
from train import clean_data
from sklearn.model_selection import train_test_split
from azureml.core import Dataset

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

y_dataframe = pd.DataFrame(y,columns=['y'])
all_data = pd.concat([x, y_dataframe], axis = 1)

x_train, x_test = train_test_split(all_data, test_size= 0.2, random_state = 1)

x_train.to_csv("training/train_data.csv", index = False)

data_store = ws.get_default_datastore()
data_store.upload(src_dir ="./training", target_path ='udacity-project', overwrite = True, show_progress = True)
train_data = Dataset.Tabular.from_delimited_files(path=data_store.path("udacity-project/train_data.csv"))



Uploading an estimated of 1 files
Uploading ./training/train_data.csv
Uploaded ./training/train_data.csv, 1 files out of an estimated total of 1
Uploaded 1 files


In [26]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task= 'classification',
    primary_metric='accuracy',
    training_data= train_data,
    label_column_name= 'y',
    n_cross_validations= 4, compute_target = compute_target)


In [None]:
# Submit your automl run

### YOUR CODE HERE ###
#automl_run = exp.submit(config= automl_config, show_output= True)
automl_run = exp.submit(automl_config,show_output=True)

Submitting remote run.
No run_configuration provided, running on Wilkens-Cluster with default configuration
Running on remote compute: Wilkens-Cluster


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_fbd093f3-9346-4e67-b585-6b0239f90425,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+------------------------------+--------------------------------+-------------------------------------

In [None]:
# Retrieve and save your best automl model.
from azureml.automl.runtime.onnx_convert import OnnxConverter
### YOUR CODE HERE ###
best_run_automl, best_model_automl = automl_run.get_output()

#Save using Onnx converter
OnnxConverter.save_onnx_model(best_model_automl, './outputs/best_model.onnx')



In [None]:
#Delete Model
compute_target.delete()
