In [9]:
# Create a new workspace and define an experiment.

from azureml.core import Workspace, Experiment

#ws = Workspace.get(name="udacity-project")
ws = Workspace.from_config()
ws.get_details()

# Choose a name for the experiment
experiment_name = 'udacity-project'
exp = Experiment(workspace=ws, name= experiment_name)

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-141428
Azure region: southcentralus
Subscription id: a0a76bad-11a1-4a2d-9887-97a29122c8ed
Resource group: aml-quickstarts-141428


In [10]:
# Createa  compute cluster to provision VM Resources.

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

# Choose a name for the cluster
cpu_cluster_name = 'cpu-cluster-01'

#Verify that the culster does not exist already
try:
    compute_target = ComputeTarget(workspace = ws, name = cpu_cluster_name)
    print('Found existing cluster, use it')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size = 'STANDARD_D2_V2', max_nodes = 4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Creating...
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [None]:
# Setup Hyperparameter Tuning with Hyperdrive.

from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
import os

#Define the parameter search space/method
# Specify parameter sampler 
ps = RandomParameterSampling({
    'learning_rate': uniform(0.05,0.1),
    'batch_size': choice(16,32,64,128)})

# Specify an early termination Policy
# Other options are median policy
# Bandit policy stops if its less than 10% of best model, starting and interval 5
early_termination_policypolicy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est = ### YOUR CODE HERE ###

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=script_run_config,
     hyperparameter_sampling=param_sampling,
     policy=early_termination_policy,
     primary_metric_name="accuracy",
     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
     max_total_runs=100,
     max_concurrent_runs=4)

In [None]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

from azureml.core.experiment import Experiment

experiment = Experiment(workspace, experiment_name)
hyperdrive_run = experiment.submit(hd_config)

In [None]:
import joblib

# Get your best run and save the model from that run.

best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['Aruguments']

print('Best Run id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['accuracy'])
print('\n learning rate', parameter_values[3])
print('\n keep probability:', parametr_vlues[5])
print('\n batch size:', parameter_values[7])

In [15]:
from azureml.data.dataset_factory import TabularDatasetFactory
import pandas as pd

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

#  path to URL from Chrome DevTools Console
url = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

#  read remote URL data to DataFrame
ds = pd.read_csv(url)

In [21]:
def clean_data(data):
    # Dict for cleaning data
    months = {"jan":1, "feb":2, "mar":3, "apr":4, "may":5, "jun":6, "jul":7, "aug":8, "sep":9, "oct":10, "nov":11, "dec":12}
    weekdays = {"mon":1, "tue":2, "wed":3, "thu":4, "fri":5, "sat":6, "sun":7}

    # Clean and one hot encode data
    #x_df = data.to_pandas_dataframe().dropna(), not required from original code, data is already a dataframe.
    x_df = data.dropna()
    jobs = pd.get_dummies(x_df.job, prefix="job")
    x_df.drop("job", inplace=True, axis=1)
    x_df = x_df.join(jobs)
    x_df["marital"] = x_df.marital.apply(lambda s: 1 if s == "married" else 0)
    x_df["default"] = x_df.default.apply(lambda s: 1 if s == "yes" else 0)
    x_df["housing"] = x_df.housing.apply(lambda s: 1 if s == "yes" else 0)
    x_df["loan"] = x_df.loan.apply(lambda s: 1 if s == "yes" else 0)
    contact = pd.get_dummies(x_df.contact, prefix="contact")
    x_df.drop("contact", inplace=True, axis=1)
    x_df = x_df.join(contact)
    education = pd.get_dummies(x_df.education, prefix="education")
    x_df.drop("education", inplace=True, axis=1)
    x_df = x_df.join(education)
    x_df["month"] = x_df.month.map(months)
    x_df["day_of_week"] = x_df.day_of_week.map(weekdays)
    x_df["poutcome"] = x_df.poutcome.apply(lambda s: 1 if s == "success" else 0)

    y_df = x_df.pop("y").apply(lambda s: 1 if s == "yes" else 0)
    return x_df, y_df #two outputs, x_df and y_df

In [22]:
#from train import clean_data

# Use the clean_data function to clean your data.

x, y = clean_data(ds)


In [23]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task= 'classification'
    primary_metric = 'AUC_weighted',
    training_data= ds,
    label_column_name = 'y',
    n_cross_validations = 5)

SyntaxError: invalid syntax (<ipython-input-23-725e6deadd7b>, line 10)

In [2]:
# Submit your automl run

from azureml.core.experiment import Experiment

experiment = Experiment(ws,"automl_udacity_project")
run.experiment.submit(config = automml_config, show_output = TRUE)

In [None]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###