In [None]:
# Submit your automl run
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(ws, 'alexei-AutoML')

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory
import os

# Create TabularDataset
path_data = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(path=path_data)

In [None]:
from train import clean_data
from sklearn.model_selection import train_test_split
import pandas as pd

# Use the clean_data function to clean your data.
x, y = clean_data(ds)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20)

train_df = pd.concat([x_train, y_train], axis=1)
test_df = pd.concat([x_test, y_test], axis=1)

train_df.head()
data = pd.concat([x,y], axis=1)

In [None]:
#save the dataframe as csv and upload to datastore
if not os.path.isdir('training'): os.mkdir('training')
pd.DataFrame(train_df).to_csv("training/train_df.csv", index=False)
pd.DataFrame(test_df).to_csv("training/test_df.csv", index=False)

data_store = ws.get_default_datastore()
data_store.upload(src_dir='./training', target_path='bankmarketing', overwrite=True, show_progress=True)

In [None]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig

automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task="classification",
    primary_metric="accuracy",
    training_data=data,
    validation_size=0.3,
    label_column_name='y',
    n_cross_validations=5,
    enable_onnx_compatible_models=True
)

automl_run = exp.submit(automl_config, show_output = True)
automl_run.wait_for_completion()

In [None]:
# Retrieve and save best automl model.
best_automl_run = automl_run.get_best_child()
best_automl_run.get_details()
best_automl_model, fitted_model = automl_run.get_output()
print(best_automl_model)
print(fitted_model)

model = best_automl_model.register_model(model_name="best_hyperdrive_model", model_path="./outputs/model.pkl")

In [None]:
cluster_name.delete()