# imports

In [79]:

from azureml.data.data_reference import DataReference
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
import os

from azureml.core import Workspace,Experiment, Environment




# Defining utility function

In [80]:
def getOrCreateCompute(ws:Workspace):
    
    from azureml.core.compute import AmlCompute
    from azureml.core.compute import ComputeTarget
    from azureml.core.compute_target import ComputeTargetException

    aml_compute_target = "testcot"
    try:
        aml_compute = AmlCompute(ws, aml_compute_target)
        print("found existing compute target.")
    except ComputeTargetException:
        print("creating new compute target")

        provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                    min_nodes = 0, 
                                                                    max_nodes = 4)    
        aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
        aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    finally:
        return aml_compute

In [81]:
def createRunConfig(ws):
    
    from azureml.core.runconfig import RunConfiguration
    from azure.ai.ml import MLClient
    from azure.identity import DefaultAzureCredential
    from azureml.core import Environment
    # create a new runconfig object
    run_config = RunConfiguration()
    env = Environment.get(workspace=ws, name='TITANIC', version='1')

    run_config.environment=env

    

    return run_config

In [82]:
#os.chdir('Users/giosue.cotugno/mlops_titanic/notebooks/')

# Build dev pipeline

In [83]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')


# Default datastore (Azure blob storage)
def_blob_store = ws.get_default_datastore()
#def_blob_store = Datastore(ws, "workspaceblobstore")
print("Blobstore's name: {}".format(def_blob_store.name))
#Upload file to datastore

# Use a CSV to read in the data set.
file_name = "../data/rawdata/train.csv"

if not os.path.exists(file_name):
    raise Exception(
        'Could not find CSV dataset at "%s". '
        % file_name
    )  # NOQA: E50
# Upload file to default datastore in workspace

mlops-aml-ws
mlops-rg
westeurope
f90533aa-280d-40b9-9949-a7ba0ee9511f
Blobstore's name: workspaceblobstore


# Uploading data to blob storage

In [84]:
target_path = "training-data/"
def_blob_store.upload_files(
    files=[file_name],
    target_path=target_path,
    overwrite=True,
    show_progress=False,
)

blob_input_data = DataReference(
    datastore=def_blob_store,
    data_reference_name="test_data",
    path_on_datastore="training-data/train.csv")


aml_compute = getOrCreateCompute(ws)
run_config = createRunConfig(ws)
#processed_data1 = PipelineData("processed_data1",datastore=def_blob_store)
models_data = PipelineData("models_data",datastore=def_blob_store)

found existing compute target.


## Preprocess step

In [85]:
source_directory="../src/preprocess/"
preprocess_step = PythonScriptStep(
    script_name="preprocess.py", 
    arguments=["--data", blob_input_data],
    inputs=[blob_input_data],
    compute_target=aml_compute, 
    source_directory=source_directory,
    runconfig=run_config,
    allow_reuse=True
)

In [86]:
from azureml.pipeline.core import Pipeline
Pipeline(ws, [preprocess_step])

<azureml.pipeline.core.pipeline.Pipeline at 0x7fd5a218ee50>

## Training step

In [87]:
source_directory="../src/train/"
train_step = PythonScriptStep(
    name="Train Model",
    script_name="train.py",
    compute_target=aml_compute,
    source_directory=source_directory,
    outputs=[models_data],
    arguments=[
        "--model",
        models_data
    ],
    runconfig=run_config,
    allow_reuse=False,
)

## Validation step

In [88]:
source_directory="../src/evaluation/"
evaluate_step = PythonScriptStep(
    name="Evaluate Model ",
    script_name="eval.py",
    compute_target=aml_compute,
    source_directory=source_directory,
    inputs=[models_data],
    arguments=[
        "--model_path",
        models_data,
    ],
    runconfig=run_config,
    allow_reuse=False,
)

# Pipeline creation

In [89]:
from azureml.pipeline.core import Pipeline
train_step.run_after(preprocess_step)
evaluate_step.run_after(train_step)
steps = [preprocess_step,train_step, evaluate_step]

train_pipeline = Pipeline(workspace=ws, steps=steps)
#train_pipeline._set_experiment_name


In [90]:
pipeline_run1 = Experiment(ws, 'titanic-pipeline').submit(train_pipeline)

Created step preprocess.py [e49eb4ab][2c63d416-1b63-40da-a0f6-5f6e2a4067b1], (This step is eligible to reuse a previous run's output)
Created step Train Model [d791a49a][7ca2f8ce-f336-4ff6-b7d3-e401ca5a0cef], (This step will run and generate new outputs)
Created step Evaluate Model  [5938d295][28fa0393-7e81-4441-af92-fba48aad6584], (This step will run and generate new outputs)
Using data reference test_data for StepId [5d8b7444][ee17b8e7-aad7-4670-a604-b0b5a122ca4f], (Consumers of this data are eligible to reuse prior runs.)
Submitted PipelineRun 0416612c-07cb-4379-8b8b-7968c4934b2e
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/0416612c-07cb-4379-8b8b-7968c4934b2e?wsid=/subscriptions/f90533aa-280d-40b9-9949-a7ba0ee9511f/resourcegroups/mlops-rg/workspaces/mlops-aml-ws&tid=e99647dc-1b08-454a-bf8c-699181b389ab


In [91]:
pipeline_run1.wait_for_completion(show_output=True)

PipelineRunId: 0416612c-07cb-4379-8b8b-7968c4934b2e
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/0416612c-07cb-4379-8b8b-7968c4934b2e?wsid=/subscriptions/f90533aa-280d-40b9-9949-a7ba0ee9511f/resourcegroups/mlops-rg/workspaces/mlops-aml-ws&tid=e99647dc-1b08-454a-bf8c-699181b389ab
PipelineRun Status: NotStarted
PipelineRun Status: Running


Expected a StepRun object but received <class 'azureml.core.run.Run'> instead.
This usually indicates a package conflict with one of the dependencies of azureml-core or azureml-pipeline-core.
Please check for package conflicts in your python environment







Expected a StepRun object but received <class 'azureml.core.run.Run'> instead.
This usually indicates a package conflict with one of the dependencies of azureml-core or azureml-pipeline-core.
Please check for package conflicts in your python environment






PipelineRun Execution Summary
PipelineRun Status: Failed


ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "Pipeline has some failed steps. See child run or execution logs for more details.",
        "messageFormat": "Pipeline has some failed steps. {0}",
        "messageParameters": {},
        "referenceCode": "PipelineHasStepJobFailed",
        "details": []
    },
    "environment": "westeurope",
    "location": "westeurope",
    "time": "2022-12-15T16:42:59.71229Z",
    "componentName": ""
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"Pipeline has some failed steps. See child run or execution logs for more details.\",\n        \"messageFormat\": \"Pipeline has some failed steps. {0}\",\n        \"messageParameters\": {},\n        \"referenceCode\": \"PipelineHasStepJobFailed\",\n        \"details\": []\n    },\n    \"environment\": \"westeurope\",\n    \"location\": \"westeurope\",\n    \"time\": \"2022-12-15T16:42:59.71229Z\",\n    \"componentName\": \"\"\n}"
    }
}