In [1]:
import numpy as np
import matplotlib.pyplot as plt

import azureml.core
from azureml.core import Workspace

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.10


In [2]:
from azureml.train.dnn import PyTorch

In [3]:
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\t')


Found the config file in: /home/shanuv/Documents/696/aml_config/config.json
AmherstWorkspace	eastus2	AmherstRG	eastus2


In [4]:
experiment_name = 'cifar10-classification'

from azureml.core import Experiment
exp = Experiment(workspace=ws, name=experiment_name)

In [5]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6/STANDARD_D2_V2
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_STANDARD_NC6", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,
                                                                min_nodes = compute_min_nodes, 
                                                                max_nodes = compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
    
    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
     # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

found compute target. just use it. cpucluster


In [6]:
ds = ws.get_default_datastore()
print(ds.datastore_type, ds.account_name, ds.container_name)

#ds.upload(src_dir='../682/assignment2/cs682/datasets', target_path='cifar10', overwrite=True, show_progress=True)

AzureBlob amherstwstorageinnganzr azureml-blobstore-fe92660d-c6c1-4086-b2f7-71f9c508e6c7


In [7]:
from azureml.train.dnn import PyTorch

script_params = {
    '--data-folder': ds.as_mount()
}

pt_est = PyTorch(source_directory='./',
                 script_params=script_params,
                 compute_target=compute_target,
                 entry_script='cnn_pytorch.py',
                 use_gpu=True)

In [8]:
run = exp.submit(pt_est)

Submitting /home/shanuv/Documents/696 directory for run. The size of the directory >= 25 MB, so it can take a few minutes.


In [9]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 's…

In [None]:
run.wait_for_completion(show_output=False) # specify True for a verbose log