### Titanic

In [1]:
# connect workspace
import azureml.core
from azureml.core import Workspace
ws = Workspace.from_config()
print('Ready to use Azure ML {} to use {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.13.0 to use ML01


In [2]:
# prepare data
from azureml.core import Datastore
titanic_datastore = Datastore.get(ws, 'titanic_data')
print(titanic_datastore.name, ':', titanic_datastore.datastore_type \
     + ' (' + titanic_datastore.account_name + ')')

titanic_data : AzureBlob (exxxtitanic)


In [3]:
# set default datastore
ws.set_default_datastore('titanic_data')
default_ds = ws.get_default_datastore()
print(default_ds.name)

titanic_data


In [4]:
# data_ref = default_ds.path('azml_titanic.csv').as_download(path_on_compute='titanic_data')
# print(data_ref)

# Prepare a Compute Target

In [5]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "cluster02"

try:
    # Check for existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)


Found existing cluster, use it.


# use auto ML

In [6]:
# prepare data from dataset
titanic_ds = ws.datasets.get('titanic dataset')
train_ds, test_ds = titanic_ds.random_split(percentage=0.7, seed=1234)
print("Data ready!")

Data ready!


In [7]:
from azureml.train.automl import AutoMLConfig

automl_config = AutoMLConfig(name='Titanic Automated ML ',
                             task='classification',
                             compute_target=training_cluster,
                             training_data = train_ds,
                             validation_data = test_ds,
                             label_column_name='Survived',
                             iterations=20,
                             primary_metric = 'AUC_weighted',
                             max_concurrent_iterations=4,
                             featurization='auto'
                             )

print("Ready for Auto ML run.")

Ready for Auto ML run.


# Run an Automated ML

In [8]:
from azureml.core.experiment import Experiment
from azureml.widgets import RunDetails

print('Submitting Auto ML experiment...')
automl_experiment = Experiment(ws, 'titanic_automl')
automl_run = automl_experiment.submit(automl_config)
RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)

Submitting Auto ML experiment...
Running on remote or ADB.


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…


Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturizationCompleted. Completed fit featurizers and featurizing the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       DONE
DESCRIPTION:  If the missing values are expected, let the run complete. Otherwise cancel the current run and use a script to customize the handling of missing feature values that may be more appropriate based on the data type and b

{'runId': 'AutoML_5944fa12-04a1-442f-a4c0-b2a2af41908a',
 'target': 'cluster02',
 'status': 'Completed',
 'startTimeUtc': '2020-09-14T20:38:37.534934Z',
 'endTimeUtc': '2020-09-14T20:47:37.042165Z',
 'properties': {'num_iterations': '20',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'AUC_weighted',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'cluster02',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"ebfd84d6-bacc-4d73-8334-49ba40ee8f59\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"UI/09-14-2020_071420_UTC/azml_titanic2.csv\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"dp100\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"514404cd-0583-4780-8e00-0e352b1266bf\\\\\\", \\\\\\

# chose best model

In [1]:
# best_run, fitted_model = automl_run.get_output()
# print(best_run)
# print(fitted_model)
# best_run_metrics = best_run.get_metrics()
# for metric_name in best_run_metrics:
#     metric = best_run_metrics[metric_name]
#     print(metric_name, metric)