# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [3]:
import logging
import os

import numpy as np
import pandas as pd

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace

In [1]:
#files and directories
os.listdir(os.curdir)


['.ipynb_aml_checkpoints',
 'hyperparameter_tuning (1).ipynb',
 'keras_env.yml',
 'keras_train.py']

# Initialize the Workspace and an Experiment


In [4]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

experiment_name = 'keras_housing'
project_folder = '.'
os.makedirs(project_folder, exist_ok=True)

experiment = Experiment(ws, experiment_name)
experiment

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code RG8ACEZFQ to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
quick-starts-ws-127698
aml-quickstarts-127698
southcentralus
7a5e5192-86c5-4374-9780-5ddf26e7d9e1


Name,Workspace,Report Page,Docs Page
keras_housing,quick-starts-ws-127698,Link to Azure Machine Learning studio,Link to Documentation


# Create or Attach a Compute Resource

In [5]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException


amlcompute_cluster_name = "compute-gpu"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Found existing cluster, use it.

Running


# Environment Set Up

In [6]:
from azureml.core import Environment

keras_env = Environment.from_conda_specification(name = 'keras-2.3.1', file_path = 'keras_env.yml')

# Specify a GPU base image
keras_env.docker.enabled = True
keras_env.docker.base_image = 'mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.0-cudnn7-ubuntu18.04'

In [7]:
from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.core import ScriptRunConfig
from azureml.train.hyperdrive import choice, loguniform
from azureml.widgets import RunDetails

## Dataset

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

data = pd.read_csv("housing.csv", header=0)

def prepare_data(data):
    encoded_column=pd.get_dummies(data['ocean_proximity'], prefix='ocp')
    data=data.join(encoded_column)
    data=data.drop("ocean_proximity", axis=1)

    target="median_house_value"
    y=data[target]
    x=data.drop(target, axis=1)
    
    return x,y

def split_scale(x,y):
    x_train, x_test, y_train, y_test= train_test_split(x, y, test_size=0.2, random_state=42) # returns Dataframes and Series
    x_train = x_train.fillna(x_train.mean())
    x_test = x_test.fillna(x_test.mean())

    columns=x_train.columns.to_list()
    scaler_x = MinMaxScaler().fit(x_train[columns])
    scaled_x_train = scaler_x.transform(x_train[columns]) # numpy array
    scaled_x_test = scaler_x.transform(x_test[columns])

    

    return scaled_x_train, scaled_x_test,  y_train.values, y_test.values
x,y = prepare_data(data)
x_train, x_test, y_train, y_test  = split_scale(x,y)

In [9]:
x_train_frame=pd.DataFrame(x_train, columns=x.columns)
x_test_frame=pd.DataFrame(x_test, columns=x.columns)
x_train_frame.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocp_<1H OCEAN,ocp_INLAND,ocp_ISLAND,ocp_NEAR BAY,ocp_NEAR OCEAN
0,0.729084,0.017021,0.627451,0.079455,0.097145,0.06438,0.102286,0.190322,0.0,0.0,0.0,0.0,1.0
1,0.616534,0.129787,0.941176,0.085966,0.121974,0.036744,0.124157,0.228452,0.0,0.0,0.0,0.0,1.0
2,0.385458,0.224468,0.058824,0.048197,0.05121,0.025561,0.05509,0.252162,0.0,0.0,0.0,0.0,1.0
3,0.721116,0.014894,0.686275,0.03609,0.056797,0.039659,0.058214,0.099488,0.0,0.0,0.0,0.0,1.0
4,0.453187,0.45,0.823529,0.060532,0.066729,0.024412,0.062325,0.210638,0.0,1.0,0.0,0.0,0.0


In [10]:
x_train_frame["median_house_value"]=y_train 
x_test_frame["median_house_value"]=y_test

x_train_frame.shape, x_test_frame.shape
x_train_frame.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocp_<1H OCEAN,ocp_INLAND,ocp_ISLAND,ocp_NEAR BAY,ocp_NEAR OCEAN,median_house_value
0,0.729084,0.017021,0.627451,0.079455,0.097145,0.06438,0.102286,0.190322,0.0,0.0,0.0,0.0,1.0,103000.0
1,0.616534,0.129787,0.941176,0.085966,0.121974,0.036744,0.124157,0.228452,0.0,0.0,0.0,0.0,1.0,382100.0
2,0.385458,0.224468,0.058824,0.048197,0.05121,0.025561,0.05509,0.252162,0.0,0.0,0.0,0.0,1.0,172600.0
3,0.721116,0.014894,0.686275,0.03609,0.056797,0.039659,0.058214,0.099488,0.0,0.0,0.0,0.0,1.0,93400.0
4,0.453187,0.45,0.823529,0.060532,0.066729,0.024412,0.062325,0.210638,0.0,1.0,0.0,0.0,0.0,96500.0


In [11]:
x_train_frame.to_pickle('train.pkl')
x_test_frame.to_pickle('test.pkl')

In [12]:
train=pd.read_pickle("train.pkl")
train.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocp_<1H OCEAN,ocp_INLAND,ocp_ISLAND,ocp_NEAR BAY,ocp_NEAR OCEAN,median_house_value
0,0.729084,0.017021,0.627451,0.079455,0.097145,0.06438,0.102286,0.190322,0.0,0.0,0.0,0.0,1.0,103000.0
1,0.616534,0.129787,0.941176,0.085966,0.121974,0.036744,0.124157,0.228452,0.0,0.0,0.0,0.0,1.0,382100.0
2,0.385458,0.224468,0.058824,0.048197,0.05121,0.025561,0.05509,0.252162,0.0,0.0,0.0,0.0,1.0,172600.0
3,0.721116,0.014894,0.686275,0.03609,0.056797,0.039659,0.058214,0.099488,0.0,0.0,0.0,0.0,1.0,93400.0
4,0.453187,0.45,0.823529,0.060532,0.066729,0.024412,0.062325,0.210638,0.0,1.0,0.0,0.0,0.0,96500.0


In [13]:
datastore=ws.get_default_datastore()
datastore.upload_files(['train.pkl'])

Uploading an estimated of 1 files
Uploading train.pkl
Uploaded train.pkl, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_workspaceblobstore

In [14]:
datastore.upload_files(['test.pkl'])

Uploading an estimated of 1 files
Uploading test.pkl
Uploaded test.pkl, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_workspaceblobstore

In [15]:
os.listdir(os.curdir)

['.ipynb_aml_checkpoints',
 'housing.csv',
 'hyperparameter_tuning (1).ipynb',
 'keras_env.yml',
 'keras_train.py',
 'test.pkl',
 'train.pkl']

## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [18]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
policy =  BanditPolicy(evaluation_interval=2, slack_factor=0.1, slack_amount=None, delay_evaluation=0)

#TODO: Create the different params that you will be using during training
ps = RandomParameterSampling(
    {
        '--batch-size': choice(25, 50, 100),
        '--number-epochs': choice(5,10,15),
        '--first-layer-neurons': choice(range(2, 12, 2)),
        '--second-layer-neurons': choice(range(2,12,2))
    }
)

#TODO: Create your estimator and hyperdrive config
src = ScriptRunConfig(source_directory=project_folder,
                      script='keras_train.py',
                      compute_target=compute_target,
                      environment=keras_env)

hyperdrive_config = HyperDriveConfig(
    hyperparameter_sampling = ps, 
    primary_metric_name ='MAE', 
    primary_metric_goal = PrimaryMetricGoal.MINIMIZE, 
    max_total_runs = 8, 
    max_concurrent_runs=4, 
    policy=policy, 
    run_config=src
)

## Submit run and Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [19]:
#TODO: Submit your experiment
hyperdrive_run = experiment.submit(hyperdrive_config, show_output=True)
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [20]:
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_066c962e-314e-4361-b540-3c31a24c5144
Web View: https://ml.azure.com/experiments/keras_housing/runs/HD_066c962e-314e-4361-b540-3c31a24c5144?wsid=/subscriptions/7a5e5192-86c5-4374-9780-5ddf26e7d9e1/resourcegroups/aml-quickstarts-127698/workspaces/quick-starts-ws-127698

Execution Summary
RunId: HD_066c962e-314e-4361-b540-3c31a24c5144
Web View: https://ml.azure.com/experiments/keras_housing/runs/HD_066c962e-314e-4361-b540-3c31a24c5144?wsid=/subscriptions/7a5e5192-86c5-4374-9780-5ddf26e7d9e1/resourcegroups/aml-quickstarts-127698/workspaces/quick-starts-ws-127698

{
  "error": {
    "code": "UserError",
    "severity": null,
    "message": "User errors were found in at least one of the child runs.",
    "messageFormat": null,
    "messageParameters": {},
    "referenceCode": null,
    "detailsUri": null,
    "target": null,
    "details": [],
    "innerError": null,
    "debugInfo": null
  },
  "correlation": null,
  "environment": null,
  "location": null,
  "time": "0001-01-01T0

{'runId': 'HD_066c962e-314e-4361-b540-3c31a24c5144',
 'target': 'compute-gpu',
 'status': 'Completed',
 'startTimeUtc': '2020-11-24T12:21:20.797169Z',
 'endTimeUtc': '2020-11-24T12:27:02.432036Z',
 'error': {'error': {'code': 'UserError',
   'message': 'User errors were found in at least one of the child runs.',
   'messageParameters': {},
   'details': []},
  'time': '0001-01-01T00:00:00.000Z'},
   'message': '{\n  "error": {\n    "code": "UserError",\n    "severity": null,\n    "message": "User errors were found in at least one of the child runs.",\n    "messageFormat": null,\n    "messageParameters": {},\n    "referenceCode": null,\n    "detailsUri": null,\n    "target": null,\n    "details": [],\n    "innerError": null,\n    "debugInfo": null\n  },\n  "correlation": null,\n  "environment": null,\n  "location": null,\n  "time": "0001-01-01T00:00:00+00:00",\n  "componentName": null\n}'}],
 'properties': {'primary_metric_config': '{"name": "MAE", "goal": "minimize"}',
  'resume_from':

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [21]:
best_run=hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
best_run_metrics

{'Batch Size': 25,
 'Epochs': 10,
 'First hidden layer': 4,
 'Second hidden layer': 4,
 'Loss': 50654265725.023254,
 'MAE': 193928.734375}

In [22]:
print('Bets Run ID', best_run.id)
print('\n MAE', best_run_metrics['MAE'])

Bets Run ID HD_066c962e-314e-4361-b540-3c31a24c5144_2

 MAE 193928.734375


In [23]:
print(best_run.get_details()['runDefinition']['arguments'])

['--batch-size', '25', '--first-layer-neurons', '4', '--number-epochs', '10', '--second-layer-neurons', '4']


In [24]:
os.listdir(os.curdir)

['.ipynb_aml_checkpoints',
 'housing.csv',
 'hyperparameter_tuning (1).ipynb',
 'keras_env.yml',
 'keras_train.py',
 'scoring.py',
 'test.pkl',
 'train.pkl']

In [26]:
#TODO: Save the best model
# in keras_housing_train.py the model was saved in ./outputs_keras/model/

# Register model
model = best_run.register_model(model_name='keras-housing', model_path='./outputs/model')

In [27]:
model.name

'keras-housing'

In [28]:
from azureml.core.conda_dependencies import CondaDependencies

cd = CondaDependencies.create()
cd.add_tensorflow_conda_package()
cd.add_conda_package('h5py<=2.10.0')
cd.add_conda_package('keras<=2.3.1')
cd.add_pip_package("azureml-defaults")
cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')

print(cd.serialize_to_string())

# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
  - azureml-defaults
- tensorflow=1.13.1
- h5py<=2.10.0
- keras<=2.3.1
channels:
- anaconda
- conda-forge



In [29]:
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core.model import Model
from azureml.core.environment import Environment


myenv = Environment.from_conda_specification(name="myenv", file_path="myenv.yml")
inference_config = InferenceConfig(entry_script="scoring.py", environment=myenv)

aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,
                                               auth_enabled=True, # this flag generates API keys to secure access
                                               memory_gb=1,
                                               tags={'name': 'housing', 'framework': 'Keras'},
                                               description='Keras MLP on california housing')

service = Model.deploy(workspace=ws, 
                           name='keras-housing-svc', 
                           models=[model], 
                           inference_config=inference_config, 
                           deployment_config=aciconfig)

service.wait_for_deployment(True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running................................................................................................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [30]:

print(service.scoring_uri)

http://d16dbe64-9b27-48b9-9cf8-3bc605fec3c7.southcentralus.azurecontainer.io/score


## Test the deployed model

In [45]:
test_s=x_test[:5].tolist()

In [48]:
import json

test_samples = json.dumps({"data": test_s})
test_samples = bytes(test_samples, encoding='utf8')

# predict using the deployed model
result = service.run(input_data=test_samples)


In [50]:
from sklearn.metrics import mean_absolute_error
mae_test = mean_absolute_error(y_test[:5], np.array(result))
print(round(mae_test, 3))

218020.2


In [55]:

model = ws.models['keras-housing']
print("Model: {}, ID: {}".format('keras-housing', model.id))
    
webservice = ws.webservices['keras-housing-svc']
print("Webservice: {}, scoring URI: {}".format('keras-housing-svc', webservice.scoring_uri))

Model: keras-housing, ID: keras-housing:2
Webservice: keras-housing-svc, scoring URI: http://d16dbe64-9b27-48b9-9cf8-3bc605fec3c7.southcentralus.azurecontainer.io/score


## Clean Up

In [56]:
service.delete()

In [57]:
compute_target.delete()