# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [83]:
# import dependencies
from azureml.core import Workspace, Experiment
from azureml.core.dataset import Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os

In [84]:
ws = Workspace.from_config()
experiment_name = 'ad-hyperdrive-experiment'

experiment=Experiment(ws, experiment_name)

In [85]:
# get the data
# Try to load the dataset from the Workspace. Otherwise, create it from the file
# NOTE: update the key to match the dataset name
# Data Source : https://www.kaggle.com/fayomi/advertising?select=advertising.csv
found = False
key = "Advertising Data"
description_text = "Kaggle Advertising Dataset"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
        # Create AML Dataset and register it into Workspace
        example_data = 'https://raw.githubusercontent.com/chamsun-imoggo/udacityms-3rdproject/main/data/advertising_training.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)        
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Male,Month,Date,Hour,Minute,Second,Clicked on Ad
count,950.0,950.0,950.0,950.0,950.0,950.0,950.0,950.0,950.0,950.0,950.0
mean,65.251811,35.985263,55100.793063,180.084484,0.48,3.836842,15.430526,11.655789,29.114737,29.664211,0.494737
std,15.869,8.754454,13393.245643,43.718621,0.499863,1.93185,8.713259,6.975982,17.200562,16.852233,0.500236
min,32.6,19.0,13996.5,104.78,0.0,1.0,1.0,0.0,0.0,0.0,0.0
25%,51.565,29.0,47272.555,138.905,0.0,2.0,8.0,6.0,14.0,15.0,0.0
50%,68.645,35.0,57228.185,183.65,0.0,4.0,15.0,12.0,30.0,30.0,0.0
75%,78.695,42.0,65499.1425,218.745,1.0,5.0,23.0,18.0,43.0,44.0,1.0
max,91.43,61.0,79484.8,269.96,1.0,7.0,31.0,23.0,59.0,59.0,1.0


In [86]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name = "compute-ml"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(ws, amlcompute_cluster_name)
    print(f"{compute_name} exists already")
except:
    compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_DS12_v2", max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)
compute_target.wait_for_completion(show_output=True)


Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [87]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(slack_factor = 0.2, evaluation_interval=2, delay_evaluation=5)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling( {
        "--C": choice(0.01, 0.1,1,10,100),
        "--max_iter": choice(50, 75, 100,125,150,175,200),
        "--solver" : choice("liblinear", "sag", "lbfgs","saga")
    }
)

#TODO: Create your estimator and hyperdrive config
estimator = SKLearn(source_directory=os.path.join('./'),compute_target=compute_target,entry_script='train.py')


hyperdrive_run_config = HyperDriveConfig(estimator=estimator,
                                hyperparameter_sampling=param_sampling,
                                policy=early_termination_policy,
                                primary_metric_name='AUC_weighted',
                                primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                max_total_runs=50,
                                max_duration_minutes=25)



In [88]:
#TODO: Submit your experiment
hyperdrive_run = experiment.submit(hyperdrive_run_config)
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_db7a0357-5526-4a5a-bf5d-676a97a8ac82
Web View: https://ml.azure.com/experiments/ad-hyperdrive-experiment/runs/HD_db7a0357-5526-4a5a-bf5d-676a97a8ac82?wsid=/subscriptions/d7f39349-a66b-446e-aba6-0053c2cf1c11/resourcegroups/aml-quickstarts-139198/workspaces/quick-starts-ws-139198

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-02-20T15:23:03.062322][API][INFO]Experiment created<END>\n""<START>[2021-02-20T15:23:03.723875][GENERATOR][INFO]Trying to sample '50' jobs from the hyperparameter space<END>\n""<START>[2021-02-20T15:23:04.296017][GENERATOR][INFO]Successfully sampled '50' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-02-20T15:23:04.4942807Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_db7a0357-5526-4a5a-bf5d-676a97a8ac82
Web View: https://ml.azure.com/experiments/ad-hyperdrive-experiment/runs/HD_db7a0357-5526-4a5a-bf5d-676a97a8ac82?wsid=

{'runId': 'HD_db7a0357-5526-4a5a-bf5d-676a97a8ac82',
 'target': 'compute-ml',
 'status': 'Completed',
 'startTimeUtc': '2021-02-20T15:23:02.803602Z',
 'endTimeUtc': '2021-02-20T15:40:16.536915Z',
 'properties': {'primary_metric_config': '{"name": "AUC_weighted", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'fde7b2fd-224c-4df1-b0a5-2f26b7b103cf'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg139198.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_db7a0357-5526-4a5a-bf5d-676a97a8ac82/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=uGK%2F5Gu3TaHkQ7W8JMKuv8G3WnQiAARIugclUIRKko0%3D&st=2021-02-20T15%3A30%3A21Z&se=2021-02-20T23%3A40%3A21Z&sp=r'},
 'submittedBy': 'ODL_User 139198'}

In [89]:
# get run details
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

{'runId': 'HD_db7a0357-5526-4a5a-bf5d-676a97a8ac82',
 'target': 'compute-ml',
 'status': 'Completed',
 'startTimeUtc': '2021-02-20T15:23:02.803602Z',
 'endTimeUtc': '2021-02-20T15:40:16.536915Z',
 'properties': {'primary_metric_config': '{"name": "AUC_weighted", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'fde7b2fd-224c-4df1-b0a5-2f26b7b103cf'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg139198.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_db7a0357-5526-4a5a-bf5d-676a97a8ac82/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=uGK%2F5Gu3TaHkQ7W8JMKuv8G3WnQiAARIugclUIRKko0%3D&st=2021-02-20T15%3A30%3A21Z&se=2021-02-20T23%3A40%3A21Z&sp=r'},
 'submittedBy': 'ODL_User 139198'}

In [90]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
print(best_run)

None


In [None]:
# get best model
best_run = hyperdrive_run.get_best_run_by_primary_metric()
print(best_run)
print(best_run.get_details()['runDefinition']['arguments'])
print(best_run.get_file_names())
best_run.download_file('./outputs/model.joblib', output_file_path='./outputs')

In [None]:
#TODO: Save the best model
model = best_run.register_model(model_name='hyperdrive_best_run', model_path='outputs/model.joblib')

In [None]:
# delete the service 
service.delete()

In [None]:
# delete the compute target
compute_target.delete()