In [2]:
from azure.ai.ml import load_component
from azure.ai.ml.dsl import pipeline
from azure.ai.ml import MLClient, Input
from azure.identity import DefaultAzureCredential, EnvironmentCredential
from azure.ai.ml.entities import AmlCompute
import os

In [3]:
# Get a handle to workspace
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential=credential)

Found the config file in: /config.json


In [4]:
# Define a comput
def get_comput_target(ml_client, name="cpu-cluster", family='Standard_D2_v2'):
    cpu_compute_target = name
    
    try:
        # let's see if the compute target already exists
        cpu_cluster = ml_client.compute.get(cpu_compute_target)
    except Exception:
        cpu_cluster = AmlCompute(
            name=cpu_compute_target,
            type="amlcompute",
            size=family,
            min_instances=0,
            max_instances=4,
            idle_time_before_scale_down=180,
            tier="Dedicated",
        )
    
        cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster).result()

In [5]:
compute_target = get_comput_target(ml_client)

In [6]:
# Components
split_component = load_component(source="./split-component/split.yml")
train_rf_component = load_component(source="./train_RandomForest_component/train_RandomForest.yml")
train_svm_component = load_component(source="./train_SupportVectorMachine_component/train_SupportVectorMachine.yml")
score_component = load_component(source="./score-component/score.yml")
eval_component = load_component(source="./eval-component/eval.yml")
pr_projection_cc_component = load_component(source="./pr_projection_cc_component/pr_projection_cc.yml")

In [7]:
# define a pipeline: Fill, split, train LR, train DT, score, eval
@pipeline(
    default_compute='cpu-cluster',
)
def pipeline_downscaling_pr(pipeline_input_data,
                            project_pr_input_data):     
        
    split_node=split_component(
        data_set = pipeline_input_data,
    )
    
    train_rf_node=train_rf_component(
        data_train = split_node.outputs.data_train,
        criterion = "squared_error"     
    )

    train_svm_node=train_svm_component(
        data_train = split_node.outputs.data_train,
        kernel = "rbf"
    )

    score_rf_node=score_component(
        model_input=train_rf_node.outputs.model_output_rf_pickle,
        test_data=split_node.outputs.data_test,        
    )

    score_svm_node=score_component(
        model_input=train_svm_node.outputs.model_output_svm_pickle,
        test_data=split_node.outputs.data_test,             
    )

    eval_rf_node=eval_component(
        score_result=score_rf_node.outputs.score_output
    )

    eval_svm_node = eval_component(
        score_result=score_svm_node.outputs.score_output   
    )

    pr_projection_rf_node=pr_projection_cc_component(
        data_pr_cc=project_pr_input_data,
        model_input=train_rf_node.outputs.model_output_rf_pickle
    )

    pr_projection_svm_node=pr_projection_cc_component(
        data_pr_cc=project_pr_input_data,
        model_input=train_svm_node.outputs.model_output_svm_pickle,
    )


    return {
        "pipeline_model_pkl_rf_output" : train_rf_node.outputs.model_output_rf_pickle,     
        "pipeline_model_pkl_svm_output" : train_svm_node.outputs.model_output_svm_pickle,
        "pipeline_eval_rf_output": eval_rf_node.outputs.eval_output,
        "pipeline_eval_svm_output": eval_svm_node.outputs.eval_output,
        "pipeline_pr_projection_cc_rf_output": pr_projection_rf_node.outputs.projected_downscaling_pr,
        "pipeline_pr_projection_cc_svm_output": pr_projection_svm_node.outputs.projected_downscaling_pr

    }


In [8]:
# create a pipeline
pr_angostura_ds =  Input(
            type="uri_file",
            path="azureml://subscriptions/3deaa453-5a6c-4bcd-85f1-1645c3ccd539/resourcegroups/dpl_proj_downscaling/workspaces/downscaling-pr-cc/datastores/workspaceblobstore/paths/UI/2023-12-03_190648_UTC/az_cordex_ICHEC-EC-EARTH_historical_1976_2005.csv",
        )

pr_angostura_diez =  Input(
            type="uri_file",
            path="azureml://subscriptions/3deaa453-5a6c-4bcd-85f1-1645c3ccd539/resourcegroups/dpl_proj_downscaling/workspaces/downscaling-pr-cc/datastores/workspaceblobstore/paths/UI/2023-12-03_212926_UTC/az_SAM-44_ICHEC-EC-EARTH_rcp85_r12i1p1_SMHI-RCA4_v3_day.csv",
        )

pipeline_job = pipeline_downscaling_pr(pipeline_input_data=pr_angostura_ds,
                                       project_pr_input_data=pr_angostura_diez
                                        )

In [9]:
pipeline_job = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="pipeline_pr_downscaling_p1"
)
pipeline_job

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Experiment,Name,Type,Status,Details Page
pipeline_pr_downscaling_p1,maroon_soccer_bnjsz1q1y8,pipeline,Preparing,Link to Azure Machine Learning studio


In [10]:
# wait until the job completes
ml_client.jobs.stream(pipeline_job.name)

RunId: maroon_soccer_bnjsz1q1y8
Web View: https://ml.azure.com/runs/maroon_soccer_bnjsz1q1y8?wsid=/subscriptions/3deaa453-5a6c-4bcd-85f1-1645c3ccd539/resourcegroups/dpl_proj_downscaling/workspaces/downscaling-pr-cc

Streaming logs/azureml/executionlogs.txt

[2023-12-03 23:01:02Z] Completing processing run id dbd65f68-d251-46fc-97a3-4d8f62da0493.
[2023-12-03 23:01:03Z] Completing processing run id fdf3e5c5-5b14-4da6-bc68-36a4b7df4ac9.
[2023-12-03 23:01:03Z] Completing processing run id 0f09fb13-4b31-481e-95e6-bbbe56974b83.
[2023-12-03 23:01:06Z] Completing processing run id f7aacbe7-21ba-437c-8167-8cf27edece78.
[2023-12-03 23:01:06Z] Completing processing run id 7502b535-4629-4453-9791-9b52eb9ddabe.
[2023-12-03 23:01:06Z] Completing processing run id 34f01c9c-b8ae-4049-abe3-7fb9719c85c2.
[2023-12-03 23:01:06Z] Completing processing run id 93a6b265-266f-4d0a-b890-a4e95f2baad0.
[2023-12-03 23:01:09Z] Completing processing run id cae4b7ba-60c2-4945-83b0-ee7f2cd1a27b.
[2023-12-03 23:01:09Z]

In [11]:
# Download all the outputs of the job
output = ml_client.jobs.download(name=pipeline_job.name, download_path='./pipeline_output', all=True)

Downloading artifact azureml://subscriptions/3deaa453-5a6c-4bcd-85f1-1645c3ccd539/resourcegroups/dpl_proj_downscaling/workspaces/downscaling-pr-cc/datastores/workspaceblobstore/paths/azureml/a1d47945-b32e-4e0f-85e7-f91fe1e1e55f/model_output_rf_pickle/ to pipeline_output/named-outputs/pipeline_model_pkl_rf_output
Downloading artifact azureml://subscriptions/3deaa453-5a6c-4bcd-85f1-1645c3ccd539/resourcegroups/dpl_proj_downscaling/workspaces/downscaling-pr-cc/datastores/workspaceblobstore/paths/azureml/d16875e2-b467-46f3-883b-cb531f43e5b6/model_output_svm_pickle/ to pipeline_output/named-outputs/pipeline_model_pkl_svm_output
Downloading artifact azureml://subscriptions/3deaa453-5a6c-4bcd-85f1-1645c3ccd539/resourcegroups/dpl_proj_downscaling/workspaces/downscaling-pr-cc/datastores/workspaceblobstore/paths/azureml/fbb9383c-d07f-499d-83ba-8b871e143141/eval_output/ to pipeline_output/named-outputs/pipeline_eval_rf_output
Downloading artifact azureml://subscriptions/3deaa453-5a6c-4bcd-85f1-164