In [1]:
from azureml.core import Workspace, Experiment
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')
 
run = exp.start_logging()

Workspace name: epe-venkat-aiml
Azure region: centralus
Subscription id: 16bc73b5-82be-47f2-b5ab-f2373344794c
Resource group: kalakkad-ddl


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
 
from azureml.core.compute_target import ComputeTargetException
cluster_name = "ClusterKal"

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.


try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # To use a different region for the compute, add a location='<region>' parameter
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)
 

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [3]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps = RandomParameterSampling( 
    {
    'C': uniform(0.001, 1.0),  
    'max_iter': choice(50, 100, 500, 1000)  
    }
) 


In [4]:
 
 
# Specify a Policy
e_policy =   BanditPolicy(slack_factor = 0.1, delay_evaluation = 5, evaluation_interval = 1)

In [5]:
print(os.listdir())
print(os.getcwd())

['.amlignore', '.amlignore.amltmp', '.azureml', '.config', '.ipynb_aml_checkpoints', '.ipynb_checkpoints', 'automl.log', 'azureml_automl.log', 'conda_dependencies.yml', 'Current-copy.ipynb', 'current-copy.ipynb.amltmp', 'current.ipynb.amltmp', 'LATESTCOPY.ipynb', 'model_outputs', 'outputs', 'Tosubmitfile.ipynb', 'train.py', 'training', '__pycache__']
/mnt/batch/tasks/shared/LS_root/mounts/clusters/nanokal/code/Users/vkalakk1/newsource/training


In [6]:
if "training" not in os.listdir():
    os.mkdir("./training")


# Setup environment for your training run
sklearn_env = Environment.from_conda_specification('sklearn-env',"conda_dependencies.yml")



In [7]:
# Create a ScriptRunConfig Object to specify the configuration details of your training job
config = ScriptRunConfig(source_directory='.',
                            command=['python', 'train.py'],
                            compute_target=cpu_cluster,
                            environment=sklearn_env)



In [8]:
# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config =  HyperDriveConfig(run_config=config, 
                             hyperparameter_sampling=ps,
                             policy= e_policy,
                             primary_metric_name='Accuracy', 
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                             max_total_runs=8,
                             max_concurrent_runs=4)

In [85]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

prun = exp.submit( config=hyperdrive_config)
RunDetails(prun).show()
prun.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_a889e53e-ae33-415c-b82a-b199770fe4ed
Web View: https://ml.azure.com/runs/HD_a889e53e-ae33-415c-b82a-b199770fe4ed?wsid=/subscriptions/16bc73b5-82be-47f2-b5ab-f2373344794c/resourcegroups/kalakkad-ddl/workspaces/epe-venkat-aiml&tid=db05faca-c82a-4b9d-b9c5-0f64b6755421

Streaming azureml-logs/hyperdrive.txt

"<START>[2022-06-07T18:31:17.238576][API][INFO]Experiment created<END>\n""<START>[2022-06-07T18:31:18.098203][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n"<START>[2022-06-07T18:31:18.8357653Z][SCHEDULER][INFO]Scheduling job, id='HD_a889e53e-ae33-415c-b82a-b199770fe4ed_1'<END><START>[2022-06-07T18:31:18.8912594Z][SCHEDULER][INFO]Scheduling job, id='HD_a889e53e-ae33-415c-b82a-b199770fe4ed_0'<END><START>[2022-06-07T18:31:18.9183167Z][SCHEDULER][INFO]Scheduling job, id='HD_a889e53e-ae33-415c-b82a-b199770fe4ed_2'<END><START>[2022-06-07T18:31:19.0057382Z][SCHEDULER][INFO]Scheduling job, id='HD_a889e53e-ae33-415c-b82a-b199770fe4ed_3'<END>"<START>[20

{'runId': 'HD_a889e53e-ae33-415c-b82a-b199770fe4ed',
 'target': 'ClusterKal',
 'status': 'Completed',
 'startTimeUtc': '2022-06-07T18:31:16.919397Z',
 'endTimeUtc': '2022-06-07T18:35:21.819199Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '49547846-8ef0-4fe3-8d38-9147bd0c1b18',
  'user_agent': 'python/3.8.5 (Linux-5.4.0-1074-azure-x86_64-with-glibc2.10) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.40.0',
  'space_size': 'infinite_space_size',
  'score': '0.9126707132018209',
  'best_child_run_id': 'HD_a889e53e-ae33-415c-b82a-b199770fe4ed_0',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_a889e53e-ae33-415c-b82a-b199770fe4ed_0'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://epevenkataiml9756921869.b

In [86]:
prun.get_metrics()


{'HD_a889e53e-ae33-415c-b82a-b199770fe4ed_7': {'Regularization Strength:': 1.0,
  'Max iterations:': 100,
  'Accuracy': 0.9126707132018209},
 'HD_a889e53e-ae33-415c-b82a-b199770fe4ed_6': {'Regularization Strength:': 1.0,
  'Max iterations:': 100,
  'Accuracy': 0.9126707132018209},
 'HD_a889e53e-ae33-415c-b82a-b199770fe4ed_5': {'Regularization Strength:': 1.0,
  'Max iterations:': 100,
  'Accuracy': 0.9126707132018209},
 'HD_a889e53e-ae33-415c-b82a-b199770fe4ed_4': {'Regularization Strength:': 1.0,
  'Max iterations:': 100,
  'Accuracy': 0.9126707132018209},
 'HD_a889e53e-ae33-415c-b82a-b199770fe4ed_3': {'Regularization Strength:': 1.0,
  'Max iterations:': 100,
  'Accuracy': 0.9126707132018209},
 'HD_a889e53e-ae33-415c-b82a-b199770fe4ed_0': {'Regularization Strength:': 1.0,
  'Max iterations:': 100,
  'Accuracy': 0.9126707132018209},
 'HD_a889e53e-ae33-415c-b82a-b199770fe4ed_2': {'Regularization Strength:': 1.0,
  'Max iterations:': 100,
  'Accuracy': 0.9126707132018209},
 'HD_a889e53e

In [103]:

print(os. getcwd())
path_parent = os. path. dirname(os.getcwd())
print(path_parent)
 
br  = prun.get_best_run_by_primary_metric()
Best_model = br.register_model(model_name = "BestHyperdriveModel",model_path="outputs")
 
#using this step since not clear how to use joblib to save best run

/mnt/batch/tasks/shared/LS_root/mounts/clusters/nanokal/code/Users/vkalakk1/newsource/training
/mnt/batch/tasks/shared/LS_root/mounts/clusters/nanokal/code/Users/vkalakk1/newsource


In [88]:
metrics_for_best_run =  br.get_metrics()
 
print (metrics_for_best_run)

#Hyperparameters
 


{'Regularization Strength:': 1.0, 'Max iterations:': 100, 'Accuracy': 0.9126707132018209}


In [15]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

ds = TabularDatasetFactory.from_delimited_files("https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv")

In [16]:
print(os.getcwd() )
from train  import clean_data


# Use the clean_data function to clean your data.
x, y = clean_data(ds)
 

/mnt/batch/tasks/shared/LS_root/mounts/clusters/nanokal/code/Users/vkalakk1/newsource/training


In [17]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task="classification",
    compute_target = cpu_cluster,
    primary_metric="accuracy",
    training_data=ds,
    validation_size = 0.2,
    label_column_name="y",
    n_cross_validations=8)

In [18]:
# Submit your automl run

aml_run = exp.submit(automl_config)
RunDetails(aml_run).show()
aml_run.wait_for_completion(show_output=True)
 

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_e31cf009-7fff-4c15-bac0-e69c172e8771,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_e31cf009-7fff-4c15-bac0-e69c172e8771,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Received interrupt. Returning now.

{'runId': 'AutoML_e31cf009-7fff-4c15-bac0-e69c172e8771',
 'target': 'ClusterKal',
 'status': 'Running',
 'startTimeUtc': '2022-06-07T15:55:14.968874Z',
 'services': {},
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0.2',
  'acquisition_parameter': '0',
  'num_cross_validation': '8',
  'target': 'ClusterKal',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"7c00fadb-be6b-40f7-aaef-e34cf3d1bebb\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-widgets": "1.40.0", "azureml-training-tabular": "1.40.0", "azureml-train": "1.40.0", "azureml-train-restclients-hyperdrive": "1.40.0", "azureml-train-core": "1.40.0", "azureml-train-automl": "1.40.0", "azureml-train-automl-runtime": "1.40.0.post1", "azureml-train-automl-client": "

In [100]:
# Retrieve and save your best automl model.
best_run, fitted_model = aml_run.get_output()
print(best_run)

 

Package:azureml-automl-runtime, training version:1.41.1, current version:1.40.0
Package:azureml-core, training version:1.41.0.post3, current version:1.40.0
Package:azureml-dataprep, training version:3.1.3, current version:3.0.0
Package:azureml-dataprep-rslex, training version:2.5.4, current version:2.4.0
Package:azureml-dataset-runtime, training version:1.41.0, current version:1.40.0
Package:azureml-defaults, training version:1.41.0, current version:1.40.0
Package:azureml-interpret, training version:1.41.0, current version:1.40.0
Package:azureml-mlflow, training version:1.41.0, current version:1.40.0
Package:azureml-pipeline-core, training version:1.41.0, current version:1.40.0
Package:azureml-responsibleai, training version:1.41.0, current version:1.40.0
Package:azureml-telemetry, training version:1.41.0, current version:1.40.0
Package:azureml-train-automl-client, training version:1.41.0, current version:1.40.0
Package:azureml-train-automl-runtime, training version:1.41.1, current ver

Run(Experiment: udacity-project,
Id: AutoML_e31cf009-7fff-4c15-bac0-e69c172e8771_24,
Type: azureml.scriptrun,
Status: Completed)


In [104]:
# Save the trained model
import joblib
if "model_outputs" not in os.listdir():
    os.mkdir("model_outputs")
path_parent = os. path. dirname(os.getcwd())
print(path_parent)
os.chdir("model_outputs")
joblib.dump(fitted_model,"AUTOML_BEST.pkl")


/mnt/batch/tasks/shared/LS_root/mounts/clusters/nanokal/code/Users/vkalakk1/newsource


['AUTOML_BEST.pkl']

In [102]:
os.getcwd()

'/mnt/batch/tasks/shared/LS_root/mounts/clusters/nanokal/code/Users/vkalakk1/newsource/training'

In [106]:
 
 cpu_cluster.delete()

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

