In [1]:
import configparser

from azure.ai.ml import Input, load_component, MLClient
from azure.ai.ml.entities import AmlCompute
from azure.ai.ml.dsl import pipeline
from azure.identity import DefaultAzureCredential

In [2]:
config = configparser.ConfigParser()
config.read('config.ini')
subscription_id = config.get('Azure', 'subscription_id')
resource_group = config.get('Azure', 'resource_group')
workspace_name = config.get('Azure', 'workspace')
datastore_name = config.get('Azure', 'datastore_name')

In [3]:
credential = DefaultAzureCredential()
ml_client = MLClient(
    credential=credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name,
)

In [4]:
cpu_compute_target = "cpu-cluster"

try:
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
    print(
        f"You already have a cluster named {cpu_compute_target}, we'll reuse it as is."
    )

except Exception:
    print("Creating a new cpu compute target...")
    cpu_cluster = AmlCompute(
        name=cpu_compute_target,
        type="amlcompute",
        size="Standard_D11_V2",
        min_instances=0,
        max_instances=4,
        idle_time_before_scale_down=180,
        tier="Dedicated",
    )
    print(
        f"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}"
    )
    cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)

You already have a cluster named cpu-cluster, we'll reuse it as is.


In [5]:
component_names = ['description', 'clean', 'correlation', 'split', 'logistic_regression_train', 'decission_trees_train', 'score', 'evaluate']
components = {}
for name in component_names:
    component = load_component(source=f"./components/{name}_component/{name}.yml")
    components[name] = ml_client.create_or_update(component)


In [6]:
@pipeline(
    default_compute=cpu_compute_target
)
def water_potability_logistic_regression(pipeline_input_data):
    correlation_node = components['correlation'](
        data=pipeline_input_data
    )
    clean_node = components['clean'](
        data=pipeline_input_data
    )
    split_node = components['split'](
        split_data=clean_node.outputs.clean_data_output
    )
    logistic_regression_train_node = components['logistic_regression_train'](
        train_data=split_node.outputs.train_output,
        objective='Potability'
    )
    score_node = components['score'](
        model=logistic_regression_train_node.outputs.model_output,
        test_data=split_node.outputs.test_output,
    )
    evaluate_node = components['evaluate'](
        test_data=split_node.outputs.test_output,
        predict_data=score_node.outputs.predict_output,
        objective='Potability',
        target_name='Potable',
    )
    return {
        'pairplot': correlation_node.outputs.results_output,
        'model': logistic_regression_train_node.outputs.model_output,
        'report': evaluate_node.outputs.report_output
    }

In [7]:
data_asset = ml_client.data.get(name="water-potability", version='1')
water_potability = Input(type="uri_file", path=data_asset.path)
pipeline = water_potability_logistic_regression(pipeline_input_data=water_potability)

In [8]:
pipeline_job = ml_client.jobs.create_or_update(
    pipeline,
    experiment_name='pipeline-exp',
)
ml_client.jobs.stream(pipeline_job.name)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


RunId: bubbly_lettuce_cjb56vt8l8
Web View: https://ml.azure.com/runs/bubbly_lettuce_cjb56vt8l8?wsid=/subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/resourcegroups/GrpPipeline4/workspaces/Pipeline4

Streaming logs/azureml/executionlogs.txt

[2023-11-08 19:58:37Z] Submitting 2 runs, first five are: 21cab911:9b49c2f5-92a1-456b-8217-fe15256454e0,72d862ff:6e890019-285d-4914-870b-796cf2c2739d
[2023-11-08 19:58:40Z] Completing processing run id 9b49c2f5-92a1-456b-8217-fe15256454e0.
[2023-11-08 20:07:15Z] Completing processing run id 6e890019-285d-4914-870b-796cf2c2739d.
[2023-11-08 20:07:16Z] Submitting 1 runs, first five are: 5843539d:5075315e-4cf2-4e8c-af0f-fc6776225e00
[2023-11-08 20:08:05Z] Completing processing run id 5075315e-4cf2-4e8c-af0f-fc6776225e00.
[2023-11-08 20:08:06Z] Submitting 1 runs, first five are: f3750b2a:6840f6a7-230b-48c7-8cfe-2e3f3200a08e
[2023-11-08 20:08:48Z] Completing processing run id 6840f6a7-230b-48c7-8cfe-2e3f3200a08e.
[2023-11-08 20:08:48Z] Submitting 1 ru

In [10]:
output = ml_client.jobs.download(name=pipeline_job.name, download_path='./pipeline_output', all=True)

Downloading artifact azureml://subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/resourcegroups/GrpPipeline4/workspaces/Pipeline4/datastores/workspaceblobstore/paths/azureml/b01383f9-6a1b-46ef-aaaf-95fc70eb3e1b/results_output/ to pipeline_output/named-outputs/pairplot
Downloading artifact azureml://subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/resourcegroups/GrpPipeline4/workspaces/Pipeline4/datastores/workspaceblobstore/paths/azureml/6840f6a7-230b-48c7-8cfe-2e3f3200a08e/model_output to pipeline_output/named-outputs/model
Downloading artifact azureml://subscriptions/f3aa5221-5b34-4091-bcec-acf7b816f5b6/resourcegroups/GrpPipeline4/workspaces/Pipeline4/datastores/workspaceblobstore/paths/azureml/da42fdb8-5840-44dd-b43a-723cef7bd970/report_output to pipeline_output/named-outputs/report
Downloading artifact azureml://datastores/workspaceartifactstore/paths/ExperimentRun/dcid.bubbly_lettuce_cjb56vt8l8/ to pipeline_output/artifacts
Bad pipe message: %s [b'UA\xaa\xa8\x89\x94j\xbe\xff\\\