In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print(
    "Workspace name: " + ws.name,
    "Azure region: " + ws.location,
    "Subscription id: " + ws.subscription_id,
    "Resource group: " + ws.resource_group,
    sep="\n",
)

run = exp.start_logging()


UserErrorException: UserErrorException:
	Message: The workspace configuration file config.json, could not be found in /home/jan/workspace/deep-learning-journey/udacity/azure-ml/udacity-azure-ml-engineer-project-1 or its parent directories. Please check whether the workspace configuration file exists, or provide the full path to the configuration file as an argument. You can download a configuration file for your workspace, via http://ml.azure.com and clicking on the name of your workspace in the right top.
	InnerException None
	ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "The workspace configuration file config.json, could not be found in /home/jan/workspace/deep-learning-journey/udacity/azure-ml/udacity-azure-ml-engineer-project-1 or its parent directories. Please check whether the workspace configuration file exists, or provide the full path to the configuration file as an argument. You can download a configuration file for your workspace, via http://ml.azure.com and clicking on the name of your workspace in the right top."
    }
}

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute

cluster_name = "my_aml_cluster"

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
# form lecture #boiler plate

MAX_NODES = 3
VM_SIZE = "Standard_D2_V2"
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print("Found existing cluster, use it.")
except:
    print(f"Creating ({MAX_NODES} of {VM_SIZE})")
    compute_config = AmlCompute.provisioning_configuration(
        vm_size=VM_SIZE, max_nodes=MAX_NODES
    )
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)
print("Init compute resource done")
print(compute_target.get_status())


In [None]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        "--C": uniform(0.01, 10.0),
        "--max_iter": choice( 64, 128, 256,512),
    }
)

# Specify a Policy
# see https://azure.github.io/azureml-sdk-for-r/reference/bandit_policy.html#:~:text=Bandit%20is%20an%20early%20termination,the%20best%20performing%20training%20run.
policy = BanditPolicy(slack_factor=0.1, evaluation_interval=1, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(
    name="sklearn-env", file_path="conda_dependencies.yml"
)

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(
    source_directory=".",
    script="train.py",
    compute_target=compute_target,
    environment=sklearn_env,
)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=ps,
    primary_metric_name="Accuracy",
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=4,
    policy=policy,
)  ### YOUR CODE HERE ###


In [None]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
hd_run = exp.submit(config=hyperdrive_config)
RunDetails(hd_run).show()
hd_run.wait_for_completion(show_output=True)

In [None]:
import joblib

# Get your best run and save the model from that run.
best_run = hd_run.get_best_run_by_primary_metric()
metrics = hd_run.get_metrics()

print(f"Best Accuracy: {metrics['Accuracy']}")
# save the model
the_model = best_run.register_model("best_hd_model", model_path="./")
the_model

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at:
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
#same as in train.py
ds = TabularDatasetFactory.from_delimited_files(
        "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
    )



In [None]:
from train import clean_data, TEST_SIZE, LUCKY_NUMBER
from sklearn.model_selection import train_test_split
import pandas as pd

# Use the clean_data function to clean your data.
x, y = clean_data(ds)
# split it as in train.py
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=TEST_SIZE, random_state=LUCKY_NUMBER
)

combine = pd.concat([x_train, y_train], axis=1)


In [None]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task="classification",
    primary_metric="accuracy",
    training_data=combine,
    label_column_name="y",
    n_cross_validations=5,
)


In [2]:
# Submit your automl run

### YOUR CODE HERE ###
automl_run = Experiment(workspace=ws, name="udacity-prj-1-automl").submit(automl_config)
automl_run.wait_for_completion(show_output=True)
RunDetails(aml_run).show()

In [None]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###
