In [14]:
#Imports
import os
from azure.identity import ClientSecretCredential
from azure.ai.ml import MLClient, Input, command
from azure.ai.ml.entities import Environment


In [15]:
# Get credential using key vault and assigned service principal
# from dotenv import load_dotenv

# try:
#     # Get Configuration Settings
#     load_dotenv()
#     ai_endpoint = os.getenv('AI_SERVICE_ENDPOINT')
#     key_vault_name = os.getenv('KEY_VAULT')
#     app_tenant = os.getenv('TENANT_ID')
#     app_id = os.getenv('APP_ID')
#     app_password = os.getenv('APP_PASSWORD')

#     # Get Azure AI services key from keyvault using the service principal credentials
#     key_vault_uri = f"https://geologicmodeli4807379474.vault.azure.net/"
#     credential = ClientSecretCredential(app_tenant, app_id, app_password)

#     print(credential)

# except Exception as ex:
#     print(ex)

In [16]:
# Alternative - get credential interactively in web browser

# Authentication package
from azure.identity import InteractiveBrowserCredential
credential = InteractiveBrowserCredential()

In [17]:
# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id="c86805c3-3059-4256-b792-9a7f1b58c4fc",
    resource_group_name="mltest-rg",
    workspace_name="mltest-aml",
)

print(ml_client)

MLClient(credential=<azure.identity._credentials.browser.InteractiveBrowserCredential object at 0x000001E9789E6D10>,
         subscription_id=c86805c3-3059-4256-b792-9a7f1b58c4fc,
         resource_group_name=mltest-rg,
         workspace_name=mltest-aml)


In [18]:
from azure.ai.ml.entities import AmlCompute

gpu_compute_target = "gpu-cluster"

try:
    # let's see if the compute target already exists
    gpu_cluster = ml_client.compute.get(gpu_compute_target)
    print(
        f"You already have a cluster named {gpu_compute_target}, we'll reuse it as is."
    )

except Exception:
    print("Creating a new gpu compute target...")

    # Let's create the Azure ML compute object with the intended parameters
    gpu_cluster = AmlCompute(
        # Name assigned to the compute cluster
        name="gpu-cluster",
        # Azure ML Compute is the on-demand VM service
        type="amlcompute",
        # VM Family
        size="Standard_NC8as_T4_v3",#Standard_E4ds_v4",#"STANDARD_NC6s_v3",
        # Minimum running nodes when there is no job running
        min_instances=0,
        # Nodes in cluster
        max_instances=1,
        # How many seconds will the node running after the job termination
        idle_time_before_scale_down=180,
        # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination
        tier="Dedicated",
    )

    # Now, we pass the object to MLClient's create_or_update method
    gpu_cluster = ml_client.begin_create_or_update(gpu_cluster).result()

print(
    f"AMLCompute with name {gpu_cluster.name} is created, the compute size is {gpu_cluster.size}"
)

You already have a cluster named gpu-cluster, we'll reuse it as is.
AMLCompute with name gpu-cluster is created, the compute size is Standard_NC8as_T4_v3


In [19]:
#Specify preconfigured environment
#curated_env_name = "AzureML-pytorch-1.9-ubuntu18.04-py37-cuda11-gpu@latest"
# Won't work, does not have pytorch geometric installed

In [20]:
#Create conda yaml file with custom dependencies

dependencies_dir = "./AML/dependencies"
os.makedirs(dependencies_dir, exist_ok=True)

In [21]:
%%writefile {dependencies_dir}/conda.yaml
name: model-env
channels:
  - conda-forge
dependencies:
  - python=3.10.13
  - pytorch_geometric=2.4.0

Overwriting ./AML/dependencies/conda.yaml


In [22]:
from azure.ai.ml.entities import Environment

custom_env_name = "aml-pytorch-geometric"

custom_job_env = Environment(
    name=custom_env_name,
    description="Custom environment for geomodeling with pytorch geometric",
    tags={"pytorch_geometric": "2.4.0"},
    conda_file=os.path.join(dependencies_dir, "conda.yaml"),
    image="pytorch/pytorch:latest",
)
custom_job_env = ml_client.environments.create_or_update(custom_job_env)

print(
    f"Environment with name {custom_job_env.name} is registered to workspace, the environment version is {custom_job_env.version}"
)

Environment with name aml-pytorch-geometric is registered to workspace, the environment version is 5


In [23]:
command_job = command(
    compute=gpu_compute_target,
    environment=custom_job_env,#curated_env_name,
    code="./AML/",
    command="python TrainModel.py", #--input-dir ${{input_dir}} --output-dir ${{output_dir}} --num-epoch ${{num_epoch}}",
    experiment_name="pytorch-geomodeling-ex",
    display_name="pytorch-geomodeling",
)

In [24]:
# run the command
# submit the command
returned_job = ml_client.jobs.create_or_update(command_job)
# get a URL for the status of the job
returned_job.studio_url



'https://ml.azure.com/runs/good_thread_1qcdvbdkk1?wsid=/subscriptions/c86805c3-3059-4256-b792-9a7f1b58c4fc/resourcegroups/mltest-rg/workspaces/mltest-aml&tid=1b16ab3e-b8f6-4fe3-9f3e-2db7fe549f6a'