# Initial Initialization

In [25]:
import azureml.core 

print("SDK version:", azureml.core.VERSION)

SDK version: 1.30.0


In [26]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep='\n')

Workspace name: mlwork
Azure region: eastus
Subscription id: 2699b46d-b293-4398-af19-3f902cf2d287
Resource group: ml_resource_group_pn


In [27]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = 'gpu-backup'  # between 6-16 char only. 

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target.')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_ND6s',
                                                           vm_priority="lowpriority",
                                                           idle_seconds_before_scaledown=30,
                                                           max_nodes=1)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

    compute_target.wait_for_completion(show_output=True)

# use get_status() to get a detailed status for the current AmlCompute. 
print(compute_target.get_status().serialize())

Found existing compute target.
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-06-29T13:13:50.668000+00:00', 'errors': None, 'creationTime': '2021-06-28T07:35:27.587430+00:00', 'modifiedTime': '2021-06-28T15:51:47.157067+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 1, 'nodeIdleTimeBeforeScaleDown': 'PT60S'}, 'vmPriority': 'LowPriority', 'vmSize': 'STANDARD_ND6S'}


In [28]:
from azureml.core import Experiment

experiment_name = "azureml-train"
experiment = Experiment(workspace=ws, name=experiment_name)

In [29]:
from azureml.widgets import RunDetails
from azureml.core import Environment
from azureml.core.runconfig import DockerConfiguration

# env = Environment.from_conda_specification("arielenv", "environment.yml")
# env = Environment.get(ws, "ArielEnv")
env = Environment.get(ws, "AzureML-pytorch-1.7-ubuntu18.04-py37-cuda11-gpu")

docker_config = DockerConfiguration(use_docker=True)
# env.docker.base_image = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04"

In [30]:
ds = ws.datasets.get("Ariel Space Dataset")
ds

{
  "source": [
    "('ariel_space', 'aug_noisy_train_img/*.png')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ],
  "registration": {
    "id": "738233d5-2880-46b1-86d8-ae1f3ab84dd2",
    "name": "Ariel Space Dataset",
    "version": 1,
    "description": "Ariel Space Challenge",
    "tags": {
      "format": "PNG"
    },
    "workspace": "Workspace.create(name='mlwork', subscription_id='2699b46d-b293-4398-af19-3f902cf2d287', resource_group='ml_resource_group_pn')"
  }
}

In [31]:
from azureml.core import Workspace, Experiment, ScriptRunConfig

# set up script run configuration
config = ScriptRunConfig(
    source_directory='.',
    script='transfer_learning.py',
    compute_target=compute_target,
    environment=env,
    docker_runtime_config=docker_config,
    arguments=['--ds', ds.as_download()],
)

# submit script to AML
run = experiment.submit(config)
print(run.get_portal_url()) # link to ml.azure.com
# run.wait_for_completion(show_output=True)

https://ml.azure.com/runs/azureml-train_1624977165_c2965db9?wsid=/subscriptions/2699b46d-b293-4398-af19-3f902cf2d287/resourcegroups/ml_resource_group_pn/workspaces/mlwork&tid=ce1926b3-4f07-45ee-a67c-e02806b3f612


In [32]:
from azureml.widgets import RunDetails

RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [16]:
from azureml.widgets import RunDetails

RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…