### Load Libraries ###

In [142]:
import os

from azure.identity import InteractiveBrowserCredential
from azure.ai.ml import MLClient, command, Input, Output
from azure.ai.ml.constants import AssetTypes, InputOutputModes, ModelType
from azure.ai.ml.entities import Environment, AmlCompute, Data, Model


from load_secrets import load_secrets

### Connect to Azure

In [143]:
tenant_id = load_secrets('secrets.ini')["tenant_id"]
credential = InteractiveBrowserCredential(tenant_id=tenant_id)

In [144]:
client = MLClient.from_config(credential=credential, file_name="./azure-config.json")

Found the config file in: .\./azure-config.json


### Get workspace

In [145]:
workspace = client.workspaces.get()
print(workspace.name)

mlops-e2e-03


In [146]:
# client.workspaces.get(client.workspace_name).mlflow_tracking_uri

### Create Evnvrironment

In [147]:
dependencies_dir = "./dependencies"
os.makedirs(dependencies_dir, exist_ok=True)

In [148]:
# %%writefile {dependencies_dir}/conda.yaml
# name: model-env
# channels:
#   - conda-forge
# dependencies:
#   - python=3.11.5
#   - pip
#   - pip:
#     - pandas==2.2.2
#     - numpy==1.26.4
#     - seaborn==0.13.2
#     - matplotlib==3.8.4
#     - torch==2.3.0
#     - pillow==10.3.0
#     - opencv-python==4.9.0.80
#     - torchvision==0.18.0
#     - tqdm==4.66.2
#     - scikit-learn==1.4.2
#     - mlflow==2.12.1
#     - azureml-mlflow==1.56.0

In [149]:
env_name = "mlopse2eenv"

env = Environment(
    name=env_name,
    conda_file=os.path.join("dependencies", "conda.yaml"),
    # image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest"
    image="mcr.microsoft.com/azureml/curated/mldesigner:24"
)

env = client.environments.create_or_update(env)

In [150]:
print(f"Environment with name {env.name} is registered to workspace, the environment version is {env.version}")

Environment with name mlopse2eenv is registered to workspace, the environment version is 1


### Create Compute

In [151]:
compute = AmlCompute(name="compute1",
                     max_instances=4,
                     min_instances=0,
                     idle_time_before_scale_down=120,
                     type="amlcompute",
                     size="STANDARD_DS3_v2")

In [152]:
compute = client.compute.begin_create_or_update(compute)

In [153]:
compute_name = next(client.compute.list()).name
print(compute_name)

compute1


### Create data asset

In [154]:
try:
    data_asset = client.data.get(name="mlopse2edata", version="1")
    print("Data asset already exists")
except Exception as e:
    data_asset = Data(
        path="./data/raw_data/",
        type=AssetTypes.URI_FOLDER,
        description="CAPTCHA data",
        name="mlopse2edata",
        version="1")
    client.data.create_or_update(data_asset)  
    print("Created data asset")

data_asset = client.data.get(name="mlopse2edata", version="1")
print("Created data asset found")


[32mUploading raw_data (8.97 MBs): 100%|##########| 8973240/8973240 [00:16<00:00, 541914.64it/s] 
[39m



Created data asset
Created data asset found


In [229]:
data_path = data_asset.path
print(data_path)

azureml://subscriptions/6202d817-1165-4812-9031-c0f106108096/resourcegroups/mlops-e2e/workspaces/mlops-e2e-03/datastores/workspaceblobstore/paths/LocalUpload/62043ef1c8d68645d2daf3f24e844eff/raw_data/


### Create pipelines

In [186]:
train_inputs = {
    "data": Input(
        type=AssetTypes.URI_FOLDER,
        path=data_path,
        mode=InputOutputModes.RO_MOUNT),
    "epochs": 50
}

# train_outputs = {
#     "model_info": Output(
#         type=AssetTypes
#     )
# }

In [187]:
train_job = command(
    name="train_job02",
    description="Train job for MLOPS E2E",
    display_name="train_job",
    inputs=train_inputs,
    # outputs=
    code="./src",
    command="python ./pipeline/train_pipeline.py --data ${{inputs.data}} --epochs ${{inputs.epochs}}",
    compute=compute_name,
    experiment_name="mlopse2e",
    environment=f"{env.name}@latest",
)

In [188]:
train_job = client.create_or_update(train_job)

[32mUploading src (0.06 MBs): 100%|##########| 59207/59207 [00:09<00:00, 6046.26it/s]  
[39m



In [190]:
train_job.name, train_job.status

('train_job02', 'Starting')

## Train job with hyperparameter optimization

In [204]:
from azure.ai.ml.sweep import Choice, Uniform, MedianStoppingPolicy

In [241]:
train_inputs_sweep = {
    "data": Input(
        type=AssetTypes.URI_FOLDER,
        path=data_path,
        mode=InputOutputModes.RO_MOUNT),
    "epochs": 2,
    "learning_rate": 0.001,
    "batch_size": 32
}

In [242]:
train_job_sweep_command = command(
    name="train_job_sweep",
    description="Train job for MLOPS E2E with sweep",
    display_name="train_job_sweep",
    inputs=train_inputs_sweep,
    # outputs=
    code="./src",
    command="python ./pipeline/train_pipeline.py \
                --data ${{inputs.data}} \
                --epochs ${{inputs.epochs}}\
                --learning_rate ${{inputs.learning_rate}} \
                --batch_size ${{inputs.batch_size}}",
    compute=compute_name,
    experiment_name="mlopse2e",
    environment=f"{env.name}@latest",
)

In [243]:
train_job_sweep = train_job_sweep_command(
    learning_rate=Uniform(min_value=0.001, max_value=0.01),
    batch_size=Choice(values=[16, 32])
    )

In [244]:
train_sweep_job = train_job_sweep.sweep(
    compute=compute_name,
    sampling_algorithm="random",
    primary_metric="accuracy",
    goal="Maximize"
    )

train_sweep_job.set_limits(max_total_trials=20, max_concurrent_trials=10, timeout=7200)


In [245]:
sweep_train_job = client.create_or_update(train_sweep_job)

## Evaluate the model

In [197]:
# mlflow_model_path = f"{client.jobs.get(job.name).id}/model"

mlflow_model_path = f"runs:/{job.name}/model"
mlflow_model_path

'runs:/train_job02/model'

In [198]:
eval_inputs = {
    "data": Input(
        type=AssetTypes.URI_FOLDER,
        path=data_path,
        mode=InputOutputModes.RO_MOUNT),
    "model_path": mlflow_model_path
}

In [201]:
eval_job = command(
    name="eval_job03",
    description="Eval job for MLOPS E2E",
    display_name="eval_job",
    inputs=eval_inputs,
    code="./src",
    command="python ./pipeline/inference_pipeline.py --data ${{inputs.data}} --model_path ${{inputs.model_path}}",
    compute=compute_name,
    experiment_name="mlopse2e",
    environment=f"{env.name}@latest",
)

In [202]:
eval_job = client.create_or_update(eval_job)

### Register and save the model

Default model location in AzureML:
"azureml://jobs/~*run-id*~/outputs/artifacts/paths/model/"


run_id is job.name

In [78]:
model_path = f"azureml://jobs/{job.name}/outputs/artifacts/paths/model/"

In [111]:
client.jobs.get("train_job04").id

'/subscriptions/6202d817-1165-4812-9031-c0f106108096/resourceGroups/mlops-e2e/providers/Microsoft.MachineLearningServices/workspaces/mlops-e2e-02/jobs/train_job04'

In [87]:
run_model = Model(
    path=model_path,
    name="mlopse2e_model",
    description="Model from run.",
    type="mlflow_model"
)

In [88]:
model = client.models.create_or_update(run_model)

In [92]:
next(client.models.list())

Model({'job_name': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': True, 'auto_delete_setting': None, 'name': 'azureml_train_job02_output_mlflow_log_model_909613730', 'description': None, 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/6202d817-1165-4812-9031-c0f106108096/resourceGroups/mlops-e2e/providers/Microsoft.MachineLearningServices/workspaces/mlops-e2e-02/models/azureml_train_job02_output_mlflow_log_model_909613730', 'Resource__source_path': '', 'base_path': 'd:\\MLOps', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x0000017C3BBCF350>, 'serialize': <msrest.serialization.Serializer object at 0x0000017C3C1B0C10>, 'version': None, 'latest_version': None, 'path': None, 'datastore': None, 'utc_time_created': None, 'flavors': None, 'arm_type': 'model_version', 'type': 'custom_model', 'stage': None})