In [4]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient, dsl, Input, Output, command
#---------------------------------------------------------
# 1. CONNECT TO WORKSPACE (silent auth first, then interactive)
# ---------------------------------------------------------
try:
    credential = DefaultAzureCredential()
    # Probe token to see if silent auth works
    credential.get_token("https://management.azure.com/.default")
except Exception:
    print("DefaultAzureCredential failed; falling back to InteractiveBrowserCredential...")
    credential = InteractiveBrowserCredential()

# config.json should be one level up from this notebook (../config.json)
ml_client = MLClient.from_config(credential=credential, path="../config.json")
print(f"Connected to workspace: {ml_client.workspace_name}")


# Register TRAIN folder (created by prep.py) as a URI_FOLDER data asset
train_data_asset = Data(
    name="diabetes-train-130us",
    description="Train split for clinical readmission (from Data Prep step).",
    type=AssetTypes.URI_FOLDER,
    path="../data/processed_icd/train",  # folder that contains train.csv
)

train_data_asset = ml_client.data.create_or_update(train_data_asset)
print(f"Train asset: {train_data_asset.name}:{train_data_asset.version}")

# Register TEST folder as a URI_FOLDER data asset
test_data_asset = Data(
    name="diabetes-test-130us",
    description="Test split for clinical readmission (from Data Prep step).",
    type=AssetTypes.URI_FOLDER,
    path="../data/processed_icd/test",   # folder that contains test.csv
)

test_data_asset = ml_client.data.create_or_update(test_data_asset)
print(f"Test asset:  {test_data_asset.name}:{test_data_asset.version}")


Found the config file in: ..\config.json
Class DeploymentTemplateOperations: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Connected to workspace: AML-Clinical-Readmission


[32mUploading train (16.82 MBs): 100%|##########| 16816144/16816144 [00:02<00:00, 6122656.75it/s]
[39m



Train asset: diabetes-train-130us:1


[32mUploading test (4.2 MBs): 100%|##########| 4204771/4204771 [00:00<00:00, 7905578.13it/s]
[39m



Test asset:  diabetes-test-130us:1


In [5]:
ENV_STR = "azureml:clinical-prep-env:2"
print(ENV_STR)


azureml:clinical-prep-env:2


In [6]:
TRAIN_VER = "1"  # <-- replace with actual version printed for diabetes-train-130us
TEST_VER  = "1"  # <-- replace with actual version printed for diabetes-test-130us

base_train_job = command(
    display_name="xgb_manual_cli_baseline",
    description="Baseline XGBoost training using train.py on registered train/test assets.",
    code="../src",  # folder that contains train.py
    command=(
        "python train.py "
        "--train_data ${{inputs.train_data}} "
        "--test_data ${{inputs.test_data}} "
        "--model_output ${{outputs.model_output}} "
        "--max_depth ${{inputs.max_depth}} "
        "--learning_rate ${{inputs.learning_rate}} "
        "--n_estimators ${{inputs.n_estimators}} "
        "--subsample ${{inputs.subsample}} "
        "--colsample_bytree ${{inputs.colsample_bytree}}"
    ),
    inputs={
        # data inputs: your registered URI_FOLDER assets
        "train_data": Input(
            type="uri_folder",
            path=f"azureml:diabetes-train-130us:{TRAIN_VER}",
        ),
        "test_data": Input(
            type="uri_folder",
            path=f"azureml:diabetes-test-130us:{TEST_VER}",
        ),

        # hyperparameters as inputs (fixed values for this baseline run)
        "max_depth": 6,
        "learning_rate": 0.1,
        "n_estimators": 200,
        "subsample": 1.0,
        "colsample_bytree": 1.0,
    },
    outputs={
        "model_output": Output(type="uri_folder")
    },
    environment=ENV_STR,
    compute="clinical-cluster-cpu",
)
returned_job = ml_client.jobs.create_or_update(
    base_train_job,
    experiment_name="Clinical_Readmission_XGB_Manual",
)

print(returned_job.studio_url)


Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
pathOnCompute is not a known attribute

https://ml.azure.com/runs/helpful_tree_6v8xw2b7kf?wsid=/subscriptions/3aeb63fe-f831-47f0-8175-3732f2efd2a1/resourcegroups/RG-Clinical-Readmission/workspaces/AML-Clinical-Readmission&tid=deb5bf9d-8bb0-4783-8f54-42a424392492


In [8]:

sweep_job = base_train_job.sweep(
    compute="clinical-cluster-cpu",
    sampling_algorithm="random",        # or "bayesian" later
    primary_metric="auc_custom",        # must match mlflow.log_metric("auc_custom", ...)
    goal="Maximize",                    # or "Minimize"
    search_space={
        "max_depth": Choice(values=[3, 5, 7, 9]),
        "learning_rate": Uniform(min_value=0.01, max_value=0.3),
        "n_estimators": Choice(values=[100, 200, 400]),
        "subsample": Uniform(min_value=0.7, max_value=1.0),
        "colsample_bytree": Uniform(min_value=0.7, max_value=1.0),
    },
)

sweep_job.set_limits(
    max_total_trials=20,
    max_concurrent_trials=4,
)

sweep_job.display_name = "xgb_manual_random_sweep_auc"


returned_sweep = ml_client.jobs.create_or_update(
    sweep_job,
    experiment_name="Clinical_Readmission_XGB_Sweep",
)

print(returned_sweep.studio_url)


pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored


https://ml.azure.com/runs/green_garden_qmyn1f0wrt?wsid=/subscriptions/3aeb63fe-f831-47f0-8175-3732f2efd2a1/resourcegroups/RG-Clinical-Readmission/workspaces/AML-Clinical-Readmission&tid=deb5bf9d-8bb0-4783-8f54-42a424392492
