#### Import Package

In [None]:
import webbrowser

from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient
from azure.ai.ml.entities import AmlCompute
from azure.ai.ml import command
from azure.ai.ml import Input
from azure.ai.ml.entities import ResourceConfiguration

#### Connect to AzureML

In [None]:
credential = DefaultAzureCredential()
credential.get_token("https://management.azure.com/.default")

In [None]:
# Initalize a client to connect to Azure Workspace
ml_client = MLClient(
    subscription_id="<SUBSCRIPTION_ID>",
    resource_group_name="<RESOURCE_GROUP>",
    workspace_name="<AML_WORKSPACE_NAME>",
    credential=credential,
)

cpu_cluster = None
gpu_cluster = None

#### Create CPU Cluster

In [None]:
cpu_compute_target = "cpu-cluster"

try:
    # Initialize preexisting CPU cluster
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
    print(f"Reusing {cpu_compute_target}...")

except:
    # Initialize new CPU cluster
    print("Creating a new cpu compute target...")

    cpu_cluster = AmlCompute(
        name="cpu-cluster",
        type="amlcompute",
        size="STANDARD_DS3_V2",
        min_instances=0,
        max_instances=4,
        idle_time_before_scale_down=180,
        tier="Dedicated",
    )

    cpu_cluster = ml_client.begin_create_or_update(cpu_cluster)

print(f"AMLCompute with name {cpu_cluster.name} is created, the compute size is {cpu_cluster.size}")

#### Create GPU Cluster

In [None]:
gpu_compute_target = "gpu-cluster"

try:
    # Initialize preexisting GPU cluster
    gpu_cluster = ml_client.compute.get(gpu_compute_target)
    print(f"Reusing {gpu_compute_target}...")

except:
    # Initialize new CPU cluster
    print("Creating a new gpu compute target...")

    gpu_cluster = AmlCompute(
        name="gpu-cluster",
        type="amlcompute",
        size="STANDARD_NC6s_v3",
        min_instances=0,
        max_instances=4,
        idle_time_before_scale_down=180,
        tier="Dedicated",
    )

    gpu_cluster = ml_client.begin_create_or_update(gpu_cluster)

print(f"AMLCompute with name {gpu_cluster.name} is created, the compute size is {gpu_cluster.size}")

#### Define Train Job

In [None]:
training_job = command(
    # Path to train
    code="./src/pytorch_dl_train/",
    
    # Command to pass
    command="python dlip_train.py",
    
    # Environment
    environment="AzureML-pytorch-1.10-ubuntu18.04-py38-cuda11-gpu@latest",
    
    # Compute
    compute="gpu-cluster"
    
    # Number of GPU's
    distribution={
        "type": "PyTorch",
        "process_count_per_instance": 1,
    },
    
    # Instance Count
    instance_count=2,
    
    # Display Name and Description
    display_name="shade_dlip_training",
    description="Training dlip",
)

#### Submit Train Job

In [None]:
# Submit the job
returned_job = ml_client.jobs.create_or_update(
    training_job,
    experiment_name="shade_dlip",
)

# URL for the status of the job
print("Url to see live job running (sdk returned):")
print(returned_job.studio_url)
webbrowser.open(returned_job.studio_url)

# Pipeline run id
print(
    f"Pipeline details: {returned_job.name}"
)