In [None]:
# Import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml import command
from azure.ai.ml.entities import AmlCompute

In [None]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential

try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    # This will open a browser page for
    credential = InteractiveBrowserCredential()

In [None]:
try:
    ml_client = MLClient.from_config(credential=credential)
except Exception as ex:
    # NOTE: Update following workspace information if not correctly configure before
    client_config = {
        "subscription_id": "<SUBSCRIPTION_ID>",
        "resource_group": "<RESOURCE_GROUP>",
        "workspace_name": "<AML_WORKSPACE_NAME>",
    }

    if client_config["subscription_id"].startswith("<"):
        print(
            "please update your <SUBSCRIPTION_ID> <RESOURCE_GROUP> <AML_WORKSPACE_NAME> in notebook cell"
        )
        raise ex
    else:  # write and reload from config file
        import json, os

        config_path = "../.azureml/config.json"
        os.makedirs(os.path.dirname(config_path), exist_ok=True)
        with open(config_path, "w") as fo:
            fo.write(json.dumps(client_config))
        ml_client = MLClient.from_config(credential=credential, path=config_path)
print(ml_client)

In [None]:
# specify aml compute name.
gpu_compute_target = "gpu-cluster"

try:
    ml_client.compute.get(gpu_compute_target)
except Exception:
    print("Creating a new gpu compute target...")
    compute = AmlCompute(
        name=gpu_compute_target, size="STANDARD_NC12", min_instances=0, max_instances=3
    )
    ml_client.compute.begin_create_or_update(compute).result()

In [None]:
job = command(
    code="./src",  # local path where the code is stored
    command="python train.py",
    environment="AzureML-tensorflow-2.7-ubuntu20.04-py38-cuda11-gpu@latest",
    compute="gpu-cluster",
    instance_count=2,
    distribution={
        "type": "tensorflow",
        "parameter_server_count": 1,
        "worker_count": 2,
        "added_property": 7,
    },
    display_name="tensorflow_lstm_2workers",
    experiment_name="tensorflow-distributed-test"
)

In [None]:
# submit the command
returned_job = ml_client.create_or_update(job)