In [None]:
import azureml.core
from azureml.core import Workspace, Environment, Experiment, Datastore, Dataset, ScriptRunConfig
from azureml.core.compute import ComputeTarget, AmlCompute, DatabricksCompute
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration
from azureml.exceptions import ComputeTargetException
from azureml.pipeline.steps import HyperDriveStep, HyperDriveStepRun, PythonScriptStep, DatabricksStep
from azureml.pipeline.core import Pipeline, PipelineData, TrainingOutput
from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive import choice, loguniform
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import PipelineParameter

import os
import shutil
import urllib
import numpy as np
import matplotlib.pyplot as plt

import mlflow
import mlflow.sklearn

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)


In [None]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

In [None]:
script_folder = './scripts'

exp = Experiment(workspace=ws, name='AML_Pipeline')

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "cpu-cluster"

compute_target = ComputeTarget(workspace=ws, name=cluster_name)
print(compute_target.get_status().serialize())


In [None]:
# Use the default blob storage
def_blob_store = ws.get_default_datastore()
print('Datastore {} will be used'.format(def_blob_store.name))


In [None]:
def register_dataset(datastore, dataset_name):
    remote_path = f'dataset-demo/{dataset_name}/'
    local_path = './data/titanic.csv'
    datastore.upload_files(files = [local_path],
                       target_path = remote_path,
                       overwrite = True,
                       show_progress = False)
    
    dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, remote_path)])
    dataset = dataset.register(ws, name=dataset_name, create_new_version=True)
    return dataset

In [None]:
ds_titanic_raw = register_dataset(def_blob_store, 'titanic_raw')


In [None]:
from azureml.data import OutputFileDatasetConfig

output = OutputFileDatasetConfig(name="titanic_processed", 
                                 destination=(def_blob_store, "")).read_delimited_files().register_on_complete('titanic_processed')



In [None]:
env = Environment.get(ws, name="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu")

In [None]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

rcfg = RunConfiguration()
rcfg.environment = env

data_prep = PythonScriptStep(script_name='data_prep.py',
                                       source_directory=script_folder,
                                       name="Data_Prep",
                                       compute_target=compute_target,
                                       arguments=['--input_data', ds_titanic_raw.as_named_input('input_data'), 
                                                  '--processed_data', output],
                                       allow_reuse=True,
                                       runconfig=rcfg)


In [None]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=script_folder,
                      script='hyperdrive.py',
                      arguments=['--titanic_processed', output.as_input('titanic_processed')],
                      compute_target=compute_target,
                      environment=env)


In [None]:
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice

param_sampling = RandomParameterSampling(
    {
        '--solver': choice('liblinear', 'lbfgs'),
        '--penalty': choice('l2'),
        '--tol': choice(0.0001, 0.0002),
        '--max_iter': choice(50, 100)
    }
)

hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=param_sampling, 
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=2,
                                     max_concurrent_runs=2)


In [None]:
metrics_output_name = 'metrics_output'
metrics_data = PipelineData(name='metrics_data',
                            datastore=def_blob_store,
                            pipeline_output_name=metrics_output_name,
                            training_output=TrainingOutput("Metrics"))

model_output_name = 'model_output'
saved_model = PipelineData(name='saved_model',
                            datastore=def_blob_store,
                            pipeline_output_name=model_output_name,
                            training_output=TrainingOutput("Model",
                                                           model_file="outputs/model/model.pkl"))

hd_step_name='hd_step01'
hd_step = HyperDriveStep(
    name=hd_step_name,
    hyperdrive_config=hyperdrive_config,
    allow_reuse=False,
    outputs=[metrics_data, saved_model])


In [None]:
rcfg = RunConfiguration()
rcfg.environment = env

python_script_name = "register_model.py"

register_model_step = PythonScriptStep(source_directory=script_folder,
                                       script_name=python_script_name,
                                       name="register_model_step",
                                       inputs=[metrics_data, saved_model],
                                       compute_target=compute_target,
                                       arguments=['--model_name', 'titanic_model'],
                                       allow_reuse=True,
                                       runconfig=rcfg)


In [None]:
steps = [register_model_step]
pipeline = Pipeline(workspace=ws, steps=steps)


In [None]:
published_pipeline = pipeline.publish(name='pipelinepublish', description='pipelinepublish')


In [None]:
from azureml.pipeline.core import PipelineEndpoint

pipeline_name = "ProdEndpoint"

if [x for x in PipelineEndpoint.list(ws) if x.name == pipeline_name]:
    pipeline_endpoint = PipelineEndpoint.get(ws, name=pipeline_name)
    pipeline_endpoint.add_default(published_pipeline)
else:
    pipeline_endpoint = PipelineEndpoint.publish(workspace=ws, name=pipeline_name,
                                             pipeline=published_pipeline, description="Test description Notebook")

In [None]:
pipeline_endpoint
