# Job creation and submission to Azure

**!Important!** 
This script uses **Azure SDKv2** for the creation of the training job. 
The notebook used for uploading the data uses **Azure SDKv1**

In [21]:
import os
from azure.ai.ml import MLClient, Input
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import Environment
from azure.ai.ml import command

In [2]:
from src.utils import load_json

config = load_json("./../../configs/azure_config.json")
config

{'subscription_id': 'bc82dccd-f19d-42cb-9ce3-0d5df33ef086',
 'resource_group': 'a0047-STUFIIT-ML01',
 'workspace_name': 'a0047stufiitml01'}

In [3]:
# Authenticate and create a MLClient instance
credential = DefaultAzureCredential()
ml_client = MLClient(
    credential=credential,
    subscription_id=config["subscription_id"],
    resource_group_name=config["resource_group"],
    workspace_name=config["workspace_name"]
)

In [4]:
# Check connection to workspace
ws = ml_client.workspaces.get(ml_client.workspace_name)
print(ws.name, ws.location, ws.resource_group, sep='\t')

a0047stufiitml01	westeurope	a0047-STUFIIT-ML01


In [5]:
# Get created Dataset as Data Asset
dataset = ml_client.data.get(name="xsivakm1_tiger_segmentation", version="1")

print(dataset.name)
print(dataset.path)

xsivakm1_tiger
azureml://subscriptions/bc82dccd-f19d-42cb-9ce3-0d5df33ef086/resourcegroups/a0047-STUFIIT-ML01/workspaces/a0047stufiitml01/datastores/workspaceblobstore/paths/datasets/sivak-tiger/


In [18]:
# Prepare Environment
env = Environment(
    name="tiger-segmentation",
    description="For segmentation training on TIGER dataset",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04",
    conda_file="./azure_conda.yaml"
)

print(env)

conda_file:
  channels:
  - defaults
  - conda-forge
  dependencies:
  - python=3.12
  - pip
  - pip:
    - torch
    - torchvision
    - pytorch-lightning
    - segmentation-models-pytorch
    - wandb
    - opencv-python-headless
    - pyyaml
    - json5
  name: tiger-segmentation
description: For segmentation training on TIGER dataset
image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
name: tiger-segmentation
tags: {}


In [22]:
mask_dirs = os.listdir("./../../data/processed-train/patches/masks")
mask_dirs

['normalized_blur_otsu',
 'hematoxylin_blur_adaptive',
 'raw_otsu',
 'raw_eq_blur_otsu',
 'normalized_adaptive',
 'raw_adaptive',
 'normalized_blur_adaptive',
 'hematoxylin_adaptive',
 'raw_blur_otsu',
 'normalized_eq_otsu',
 'hematoxylin_otsu',
 'hematoxylin_eq_blur_otsu',
 'hematoxylin_blur_otsu',
 'raw_eq_otsu',
 'raw_eq_adaptive',
 'raw_blur_adaptive',
 'raw_eq_blur_adaptive',
 'normalized_eq_blur_otsu',
 'normalized_eq_blur_adaptive',
 'normalized_eq_adaptive',
 'normalized_otsu',
 'hematoxylin_eq_otsu',
 'hematoxylin_eq_blur_adaptive',
 'hematoxylin_eq_adaptive']

In [19]:
# Define job params
wandb_key = "80d7df7ab330e7fe22301afbab951204c5c0b33a"
model_name = "Unet"

for mask_dir_name in mask_dirs:
    # Define jobs
    job = command(
        code="../../",  # Project folder (contains main.py)
        command=f"python main.py --data_path ${{{{inputs.input_data}}}} --wandb {wandb_key} --model_name {model_name} --mask_dir_name {mask_dir_name}",
        inputs={
            "input_data": Input(
                type="uri_folder",
                path=dataset.path,
                mode="ro_mount"  # Mount it like .as_mount() in old SDK
            )
        },
        environment=env,
        experiment_name=f"xsivakm1_segmentation_{model_name}",
        compute="xsivakm1-24-v100",  # compute cluster name
        display_name=f"{mask_dir_name}"  # run display name
    )
    
    # Submit the job
    ml_client.jobs.create_or_update(job)

Uploading bc_project (2.05 MBs): 100%|██████████| 2048370/2048370 [00:10<00:00, 197295.15it/s]




Experiment,Name,Type,Status,Details Page
xsivakm1_segmentation_Unet,tender_papaya_0s19f7s7zs,command,Starting,Link to Azure Machine Learning studio
