In [5]:
import azureml.core

print("This notebook was created using version 1.38.0 of the Azure ML SDK")
print("You are currently using version", azureml.core.VERSION, "of the Azure ML SDK")

This notebook was created using version 1.38.0 of the Azure ML SDK
You are currently using version 1.38.0 of the Azure ML SDK


In [6]:
import os

subscription_id = os.getenv("SUBSCRIPTION_ID", default="8864b28b-e6bd-4055-91e7-96394abaa575")
resource_group = os.getenv("RESOURCE_GROUP", default="machine_learning_resource_group")
workspace_name = os.getenv("WORKSPACE_NAME", default="machine_learning_workspace")
workspace_region = os.getenv("WORKSPACE_REGION", default="uksouth")

In [7]:
from azureml.core import Workspace

try:
    ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)
    # write the details of the workspace to a configuration file to the notebook library
    ws.write_config()
    print("Workspace configuration succeeded. Skip the workspace creation steps below")
except:
    print("Workspace not accessible. Change your parameters or create a new workspace below")

Workspace configuration succeeded. Skip the workspace creation steps below


In [8]:
from azureml.core import Workspace

# Create the workspace using the specified parameters
ws = Workspace.create(name = workspace_name,
                      subscription_id = subscription_id,
                      resource_group = resource_group,
                      location = workspace_region,
                      create_resource_group = True,
                      sku = 'basic',
                      exist_ok = True)
ws.get_details()

# write the details of the workspace to a configuration file to the notebook library
ws.write_config()

In [9]:
# from azureml.core.compute import ComputeTarget, AmlCompute
# from azureml.core.compute_target import ComputeTargetException
#
# # Choose a name for your CPU cluster
# cpu_cluster_name = "cpu-cluster"
#
# # Verify that cluster does not exist already
# try:
#     cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
#     print("Found existing cpu-cluster")
# except ComputeTargetException:
#     print("Creating new cpu-cluster")
#
#     # Specify the configuration for the new cluster
#     compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2",
#                                                            min_nodes=0,
#                                                            max_nodes=4)
#
#     # Create the cluster with the specified name and configuration
#     cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
#
#     # Wait for the cluster to complete, show the output log
#     cpu_cluster.wait_for_completion(show_output=True)

Creating new cpu-cluster
InProgress.....
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [26]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your GPU cluster
gpu_cluster_name = "gpu-cluster"

# Verify that cluster does not exist already
try:
    gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)
    print("Found existing gpu cluster")
except ComputeTargetException:
    print("Creating new gpu-cluster")

    # Specify the configuration for the new cluster
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_NC6",
                                                           min_nodes=0,
                                                           max_nodes=4)
    # Create the cluster with the specified name and configuration
    gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)

    # Wait for the cluster to complete, show the output log
    gpu_cluster.wait_for_completion(show_output=True)

Creating new gpu-cluster
InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [11]:
# Check core SDK version number
import azureml.core

print("SDK version:", azureml.core.VERSION)

SDK version: 1.38.0


In [14]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print('Workspace name: ' + ws.name,
      'Azure region: ' + ws.location,
      'Subscription id: ' + ws.subscription_id,
      'Resource group: ' + ws.resource_group, sep='\n')

Workspace name: machine_learning_workspace
Azure region: uksouth
Subscription id: 8864b28b-e6bd-4055-91e7-96394abaa575
Resource group: machine_learning_resource_group


In [20]:
# create a project directory

import os
project_folder = './pytorch-image'
os.makedirs(project_folder, exist_ok=True)

In [21]:
import shutil

shutil.copy('pytorch_train.py', project_folder)

'./pytorch-image/pytorch_train.py'

In [22]:
from azureml.core import Experiment

experiment_name = 'pytorch-image'
experiment = Experiment(ws, name=experiment_name)

In [29]:
%%writefile conda_dependencies.yml

channels:
- conda-forge
dependencies:
- python=3.6.2
- pip=21.3.1
- pip:
  - azureml-defaults
  - torch==1.6.0
  - torchvision==0.7.0
  - future==0.17.1
  - pillow

Overwriting conda_dependencies.yml


In [31]:
from azureml.core import Environment

pytorch_env = Environment.from_conda_specification(name = 'pytorch-1.6-gpu', file_path = './conda_dependencies.yml')

# Specify a GPU base image
pytorch_env.docker.enabled = True
pytorch_env.docker.base_image = 'mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04'

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [34]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=project_folder,
                      script='pytorch_train.py',
                      arguments=['--num_epochs', 30, '--output_dir', './outputs'],
                      compute_target=gpu_cluster,
                      environment=pytorch_env)

In [35]:
run = experiment.submit(src)
print(run)

Run(Experiment: pytorch-image,
Id: pytorch-image_1644746409_d0ae3c17,
Type: azureml.scriptrun,
Status: Preparing)
