In [None]:
import azureml.core

from azureml.core import Experiment, Workspace, Dataset, Datastore, Run
from azureml.train.automl import AutoMLConfig
from notebookutils import mssparkutils
from azureml.data.dataset_factory import TabularDatasetFactory

In [None]:
linkedService_name = "AzureMLService"
experiment_name = "train_nyc_taxi"

ws = mssparkutils.azureML.getWorkspace(linkedService_name)
experiment = Experiment(ws, experiment_name)

In [None]:
df = spark.sql("SELECT * FROM default.train_nyc_taxi")

datastore = Datastore.get_default(ws)
dataset = TabularDatasetFactory.register_spark_dataframe(df, datastore, name = experiment_name + "-dataset")

In [None]:
automl_config = AutoMLConfig(spark_context = sc,
                             task = "regression",
                             training_data = dataset,
                             label_column_name = "fareAmount",
                             primary_metric = "spearman_correlation",
                             experiment_timeout_hours = 3,
                             max_concurrent_iterations = 2,
                             enable_onnx_compatible_models = False)

In [None]:
run = experiment.submit(automl_config)

In [None]:
displayHTML("<a href={} target='_blank'>Your experiment in Azure Machine Learning portal: {}</a>".format(run.get_portal_url(), run.id))

In [None]:
run.wait_for_completion()
import mlflow

# Get best model from automl run
best_run, non_onnx_model = run.get_output()

artifact_path = experiment_name + "_artifact"

mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())
mlflow.set_experiment(experiment_name)

with mlflow.start_run() as run:
    # Save the model to the outputs directory for capture
    mlflow.sklearn.log_model(non_onnx_model, artifact_path)

    # Register the model to AML model registry
    mlflow.register_model("runs:/" + run.info.run_id + "/" + artifact_path, "synapseezi1n72-train_nyc_taxi-20221114013337-Best")