In [107]:
import os
try:
    os.chdir('Users/giosue.cotugno/mlops_titanic/notebooks/')
except:
    print('already moved')

already moved


# imports

In [108]:

from azureml.data.data_reference import DataReference
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
import os
from azureml.core import Workspace,Experiment, Environment
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# Defining utility function

In [109]:
def getOrCreateCompute(ws:Workspace):
    
    

    aml_compute_target = "testcot"
    try:
        aml_compute = AmlCompute(ws, aml_compute_target)
        print("found existing compute target.")
    except ComputeTargetException:
        print("creating new compute target")

        provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                    min_nodes = 0, 
                                                                    max_nodes = 4)    
        aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
        aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    finally:
        return aml_compute

In [110]:
def createRunConfig(ws):
    
    from azureml.core.runconfig import RunConfiguration
    from azure.ai.ml import MLClient
    from azure.identity import DefaultAzureCredential
    from azureml.core import Environment
    # create a new runconfig object
    run_config = RunConfiguration()
    env = Environment.get(workspace=ws, name='TITANIC', version='1')

    run_config.environment=env

    

    return run_config

# Build dev pipeline

In [111]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')


# Default datastore (Azure blob storage)
def_blob_store = ws.get_default_datastore()
#def_blob_store = Datastore(ws, "workspaceblobstore")
print("Blobstore's name: {}".format(def_blob_store.name))
#Upload file to datastore

# Use a CSV to read in the data set.
file_name = "../data/rawdata/train.csv"

if not os.path.exists(file_name):
    raise Exception(
        'Could not find CSV dataset at "%s". '
        % file_name
    )  # NOQA: E50
# Upload file to default datastore in workspace

mlops-aml-ws
mlops-rg
westeurope
f90533aa-280d-40b9-9949-a7ba0ee9511f
Blobstore's name: workspaceblobstore


# Uploading data to blob storage

In [112]:
target_path = "training-data/"
def_blob_store.upload_files(
    files=[file_name],
    target_path=target_path,
    overwrite=True,
    show_progress=False,
)

blob_input_data = DataReference(
    datastore=def_blob_store,
    data_reference_name="test_data",
    path_on_datastore="training-data/train.csv")


aml_compute = getOrCreateCompute(ws)
run_config = createRunConfig(ws)
#processed_data1 = PipelineData("processed_data1",datastore=def_blob_store)
models_data = PipelineData("models_data",datastore=def_blob_store)

found existing compute target.


## Preprocess step

In [113]:
source_directory="../src/preprocess/"
preprocess_step = PythonScriptStep(
    
    name="Preprocessing Step",
    script_name="preprocess.py", 
    arguments=["--data", blob_input_data],
    inputs=[blob_input_data],
    compute_target=aml_compute, 
    source_directory=source_directory,
    runconfig=run_config,
    allow_reuse=True
)

In [114]:
from azureml.pipeline.core import Pipeline
Pipeline(ws, [preprocess_step])

<azureml.pipeline.core.pipeline.Pipeline at 0x7f473a040160>

## Training step

In [115]:
source_directory="../src/train/"
train_step = PythonScriptStep(
    name="Train Model Step",
    script_name="train.py",
    compute_target=aml_compute,
    source_directory=source_directory,
    outputs=[models_data],
    arguments=[
        "--model",
        models_data
    ],
    runconfig=run_config,
    allow_reuse=True,
)

## Validation step

In [116]:
source_directory="../src/evaluation/"
evaluate_step = PythonScriptStep(
    name="Evaluate Model Step",
    script_name="eval.py",
    compute_target=aml_compute,
    source_directory=source_directory,
    inputs=[models_data],
    arguments=[
        "--model_path",
        models_data,
    ],
    runconfig=run_config,
    allow_reuse=True,
)

## Registration step

In [117]:
source_directory="../src/register/"
register_step = PythonScriptStep(
    name="Registration Model Step",
    script_name="register.py",
    compute_target=aml_compute,
    source_directory=source_directory,
    inputs=[models_data],
    arguments=[
        "--model_path",
        models_data,
    ],
    runconfig=run_config,
    allow_reuse=False,
)

# Pipeline creation

In [118]:
from azureml.pipeline.core import Pipeline
train_step.run_after(preprocess_step)
evaluate_step.run_after(train_step)
register_step.run_after(evaluate_step)
steps = [preprocess_step,train_step, evaluate_step,register_step]

train_pipeline = Pipeline(workspace=ws, steps=steps)
#train_pipeline._set_experiment_name


In [119]:
train_pipeline.validate()
published_pipeline = train_pipeline.publish(
    name="preproc-train-register pipeline",
    description="Model training/retraining pipeline"
)
print(f"Published pipeline: {published_pipeline.name}")

Step Evaluate Model Step is ready to be created [7726e3da]
Step Registration Model Step is ready to be created [a984e0d1]
Created step Preprocessing Step [aaef40e6][a499580b-0362-4649-9672-de20520987dc], (This step is eligible to reuse a previous run's output)
Created step Train Model Step [47e6535f][495e622c-4469-4b8d-b0f0-0d5209d34274], (This step is eligible to reuse a previous run's output)
Created step Evaluate Model Step [7726e3da][fcde025d-b63a-4e98-9f55-79314e367eec], (This step will run and generate new outputs)
Created step Registration Model Step [a984e0d1][50484aed-f455-4a32-8ff3-0abb133bf6bc], (This step will run and generate new outputs)
Using data reference test_data for StepId [5006b6bb][ee17b8e7-aad7-4670-a604-b0b5a122ca4f], (Consumers of this data are eligible to reuse prior runs.)
Published pipeline: preproc-train-register pipeline


In [120]:
pipeline_run1 = Experiment(ws, 'titanic-pipeline').submit(train_pipeline)

Submitted PipelineRun 969b34c2-d1bd-49c5-9ee5-33764d38ffcc
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/969b34c2-d1bd-49c5-9ee5-33764d38ffcc?wsid=/subscriptions/f90533aa-280d-40b9-9949-a7ba0ee9511f/resourcegroups/mlops-rg/workspaces/mlops-aml-ws&tid=e99647dc-1b08-454a-bf8c-699181b389ab


In [121]:
pipeline_run1.wait_for_completion(show_output=True)

PipelineRunId: 969b34c2-d1bd-49c5-9ee5-33764d38ffcc
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/969b34c2-d1bd-49c5-9ee5-33764d38ffcc?wsid=/subscriptions/f90533aa-280d-40b9-9949-a7ba0ee9511f/resourcegroups/mlops-rg/workspaces/mlops-aml-ws&tid=e99647dc-1b08-454a-bf8c-699181b389ab
PipelineRun Status: Running


Expected a StepRun object but received <class 'azureml.core.run.Run'> instead.
This usually indicates a package conflict with one of the dependencies of azureml-core or azureml-pipeline-core.
Please check for package conflicts in your python environment






Expected a StepRun object but received <class 'azureml.core.run.Run'> instead.
This usually indicates a package conflict with one of the dependencies of azureml-core or azureml-pipeline-core.
Please check for package conflicts in your python environment








Expected a StepRun object but received <class 'azureml.core.run.Run'> instead.
This usually indicates a package conflict with one of the dependencies of azureml-core or azureml-pipeline-core.
Please check for package conflicts in your python environment







Expected a StepRun object but received <class 'azureml.core.run.Run'> instead.
This usually indicates a package conflict with one of the dependencies of azureml-core or azureml-pipeline-core.
Please check for package conflicts in your python environment






PipelineRun Execution Summary
PipelineRun Status: Finished

Failed to post metric due to validation failure. Metric column found with a malformed type



'Finished'