In [1]:
# Recieve input parameters from run context
# model_name = dbutils.widgets.get("model_name")
try:
    model_name = model_name
except:
    model_name = 'AutoML_Model'

import azureml.core
from azureml.core.authentication import ServicePrincipalAuthentication
from azureml.core import Workspace, Dataset

print("SDK version:", azureml.core.VERSION)

keyVaultScope = "databricks-aml-demo"

service_principal_id = dbutils.secrets.get(keyVaultScope, "databricks-aml-demo-sp-client-id")
service_principal_password = dbutils.secrets.get(keyVaultScope, "databricks-aml-demo-sp-client-key")
tenant_id = dbutils.secrets.get(keyVaultScope, "azure-tenant-id")


# AML Workspace
workspace_name = "amls-databricks"
subscription_id = dbutils.secrets.get(keyVaultScope, "azure-subscription-id")
resource_group = "jp-databricks"

svc_pr = ServicePrincipalAuthentication(
    service_principal_id=service_principal_id,
    service_principal_password=service_principal_password,
    tenant_id=tenant_id
    )

ws = Workspace(workspace_name=workspace_name,
               subscription_id=subscription_id,
               resource_group=resource_group,
               auth=svc_pr)

print("Found workspace {} at location {}".format(ws.name, ws.location))

In [2]:
# import os
import logging

# sample_projects_folder = './amls-databricks-example'

# if not os.path.isdir(sample_projects_folder):
#     os.mkdir(sample_projects_folder)
    
# print('Sample projects will be created in {}.'.format(sample_projects_folder))

In [3]:
from azureml.data.datapath import DataPath

datastore = ws.get_default_datastore()

# Load data
datastore_path = [DataPath(datastore, '/population-vs-price/data-geo-prepped/*.parquet')]
dataset = Dataset.Tabular.from_parquet_files(path = datastore_path)

# Set up AutoML job
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun

# Choose a name for the experiment and specify the project folder.
# experiment_name = 'ADBExampleExperimentAutoML'
# project_folder = './amls-databricks-example/automl-adb-regression'

experiment = Experiment(ws, dbutils.widgets.get('--AZUREML_ARM_PROJECT_NAME'))

# task can be one of classification, regression, forecasting
automl_config = AutoMLConfig(task = 'regression',
                             debug_log = 'automl_errors.log',
                             primary_metric = 'normalized_root_mean_squared_error',
                             iteration_timeout_minutes = 3,
                             experiment_timeout_minutes = 20,
                             enable_early_stopping = True,
                             iterations = 3,
                             featurization = 'auto',
                             n_cross_validations = 5,
                             max_concurrent_iterations = 2, #change it based on number of worker nodes
                             verbosity = logging.INFO,
                             spark_context=sc, #databricks/spark related
                             training_data = dataset, 
                             label_column_name = '2015_median_sales_price')

automl_run = experiment.submit(automl_config, show_output=True)


In [4]:
# Register best model in Workspace

description="Regression model to predict house prices"
automl_run.register_model(model_name=model_name,
                          description = description,
                          tags={'area': "prices", 'type': "regression"})
print(automl_run.model_id)