In [2]:
# ---------------------------------------------------------
# NOTEBOOK: 3_pipeline_trigger.ipynb
# PART 1: SETUP COMPUTE & ENVIRONMENT (ROBUST)
# ---------------------------------------------------------
from azure.ai.ml import MLClient
from azure.ai.ml.entities import AmlCompute, Environment
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential

# 1. ROBUST AUTHENTICATION (The "Beginner's Trap" Fix)
# We try to log in silently. If that fails (because you aren't logged in locally),
# we force a browser popup so you can sign in.
try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    print("Silent login failed. Falling back to Interactive Login...")
    credential = InteractiveBrowserCredential()

# 2. CONNECT TO WORKSPACE
# Note: path="../config.json" tells the code to look in the ROOT folder,
# because this notebook is inside the 'notebooks' subfolder.
try:
    ml_client = MLClient.from_config(credential=credential, path="../config.json")
    print(f"Connected to Workspace: {ml_client.workspace_name}")
except Exception as e:
    print("ERROR: Could not find config.json!")
    print("Make sure 'config.json' is in the main project folder (one level up).")
    raise e

# 3. CREATE COMPUTE CLUSTER (The Engine)
cluster_name = "clinical-cluster-cpu"

try:
    cluster = ml_client.compute.get(cluster_name)
    print(f"Cluster '{cluster_name}' already exists. Using it.")
except Exception:
    print(f"Creating new cluster '{cluster_name}'...")
    cluster = AmlCompute(
        name=cluster_name,
        type="amlcompute",
        size="STANDARD_DS11_V2",
        min_instances=0,
        max_instances=2,
        idle_time_before_scale_down=120
    )
    ml_client.compute.begin_create_or_update(cluster).result()
    print("Cluster created!")

# 4. REGISTER ENVIRONMENT (The Operating System)
env_name = "clinical-prep-env"

job_env = Environment(
    name=env_name,
    description="Environment for Clinical Data Prep",
    conda_file="../src/conda.yml", # Pointing to src folder
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest"
)

print(f"Registering Environment: {env_name}...")
ml_client.environments.create_or_update(job_env)
print("Environment Registered.")

Found the config file in: ..\config.json
Class DeploymentTemplateOperations: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Connected to Workspace: AML-Clinical-Readmission
Cluster 'clinical-cluster-cpu' already exists. Using it.
Registering Environment: clinical-prep-env...
Environment Registered.


In [None]:
#--------------------------------------------
# Part 2: PREP JOB
#--------------------------------------------
from azure.ai.ml import command, Input, Output
from azure.ai.ml.constants import AssetTypes

data_asset_name = "diabetes-clinical-enriched-130us"
version = "1127_1028"   # latest you registered

my_input_data = Input(
    type=AssetTypes.MLTABLE,
    path=f"azureml:{data_asset_name}:{version}",
)

prep_job = command(
    code="../src",
    command=(
        "python prep.py "
        "--data ${{inputs.raw_data}} "
        "--train_data ${{outputs.train_data}} "
        "--test_data ${{outputs.test_data}}"
    ),
    inputs={
        "raw_data": my_input_data
    },
    outputs={
        "train_data": Output(type="uri_folder"),
        "test_data": Output(type="uri_folder"),
    },
    environment="azureml:clinical-prep-env:2",
    compute="clinical-cluster-cpu",
    display_name="Clinical_Prep_Binary_001",
    experiment_name="Clinical_Readmission_Pipeline",
)

print("Submitting PREP job...")
returned_job = ml_client.jobs.create_or_update(prep_job)
print("PREP job submitted:", returned_job.studio_url)


Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Submitting PREP job...


Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading src (0.02 MBs): 100%|##########| 19499/19499 [00:00<00:00, 170151.61it/s]
[39m

pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored


PREP job submitted: https://ml.azure.com/runs/mighty_glass_8np6kdy7wl?wsid=/subscriptions/3aeb63fe-f831-47f0-8175-3732f2efd2a1/resourcegroups/RG-Clinical-Readmission/workspaces/AML-Clinical-Readmission&tid=deb5bf9d-8bb0-4783-8f54-42a424392492


In [None]:
#--------------------------------------------
# Part 3: TRAIN JOB
#--------------------------------------------
from azure.ai.ml import command, Input, Output

path_to_train = "azureml:azureml_mighty_glass_8np6kdy7wl_output_data_train_data:1"
path_to_test  = "azureml:azureml_mighty_glass_8np6kdy7wl_output_data_test_data:1"

train_job = command(
    code="../src",
    command=(
        "python train.py "
        "--train_data ${{inputs.train_data}} "
        "--test_data ${{inputs.test_data}} "
        "--model ${{outputs.model}}"
    ),
    inputs={
        "train_data": Input(type="uri_folder", path=path_to_train),
        "test_data": Input(type="uri_folder", path=path_to_test),
    },
    outputs={
        "model": Output(type="uri_folder"),
    },
    environment="azureml:clinical-prep-env:2",
    compute="clinical-cluster-cpu",
    display_name="Clinical_Train_Run_001_autolog",
    experiment_name="Clinical_Readmission_Pipeline",
)

returned_job = ml_client.jobs.create_or_update(train_job)
print(returned_job.studio_url)


pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored


https://ml.azure.com/runs/brave_bulb_jnjkwj40qn?wsid=/subscriptions/3aeb63fe-f831-47f0-8175-3732f2efd2a1/resourcegroups/RG-Clinical-Readmission/workspaces/AML-Clinical-Readmission&tid=deb5bf9d-8bb0-4783-8f54-42a424392492
