In [19]:
!pip install azureml-core
!pip install azure-ml-component



# Connect to AzureML Workspace

AML workspace is the top-level resource for Azure Machine Learning, providing a centralized place to work with all the artifacts you create when you use Azure Machine Learning. The workspace holds all your experiments, compute targets, models, datastores, etc.

First, download the workspace credentials to `config.json` using the Azure portal. On upper-right corner, click on the "Visual Studio Enterprise Subscription" tab, and click on "Download config file" link.
![](images/config_download.png)

In [12]:
from azureml.core import Workspace, Experiment, Run, Datastore, Dataset
from azure.ml.component import Component, dsl

# This will prompt you to login to Azure
workspace = Workspace.from_config()

# Let's take a look at the workspace information
workspace

Workspace.create(name='kaggle', subscription_id='dba5253c-7f60-45fe-86c5-a01af9f846f0', resource_group='kaggle-rg')

# Uploading a dataset to Azure Blob Storage

To upload your dataset to Azure Blob Storage:
* Go to "data" tab, and click on "Create" button.
  ![](images/register_dataset_1.png)

* Give a unique name to your dataset. Select "Folder" type, and click on "Next" button.
  ![](images/register_dataset_2.png)

* Select "Upload files from local" option, and click on "Next" button.
  ![](images/register_dataset_3.png)

# Submit an experiment that uses your dataset

* First, select a base environment. It's easier to start with some default Ubuntu environments officially managed by Microsoft, with Pytorch and other ML libraries pre-installed. List of curated environments in AzureML: [Manage Azure Machine Learning environments with the CLI & SDK (v2)](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-environments-v2?view=azureml-api-2&viewFallbackFrom=azureml-api-1&tabs=cli#use-a-curated-environment)
* Then, select an experiment. In AzureML, experiments are like a folder that contains all the runs. You can create a new experiment, or select an existing one.
* Create a ScriptRun config that contains the information about the script to run, and the environment to use.

In [51]:
from azureml.core import Environment

# Get a python environment, based on the latest PyTorch image from Azure ML
environment = Environment.get(workspace, name="AzureML-pytorch-1.10-ubuntu18.04-py38-cuda11-gpu")

# Create/select an experiment

In [46]:
# Create a new experiment folder, one per project/model type
from azureml.core import Experiment

experiment_name = 'sample_experiment'
experiment = Experiment(workspace=workspace, name=experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
sample_experiment,kaggle,Link to Azure Machine Learning studio,Link to Documentation


# Choose your compute target

In "compute" tab, you can see compute targets. Click on them to see the price / GPU type / max cluster size, etc.
![](images/run_1.png)

In [47]:
# Print compute targets in the workspace
workspace.compute_targets

{'gpu-p100-x1': AmlCompute(workspace=Workspace.create(name='kaggle', subscription_id='dba5253c-7f60-45fe-86c5-a01af9f846f0', resource_group='kaggle-rg'), name=gpu-p100-x1, id=/subscriptions/dba5253c-7f60-45fe-86c5-a01af9f846f0/resourceGroups/kaggle-rg/providers/Microsoft.MachineLearningServices/workspaces/kaggle/computes/gpu-p100-x1, type=AmlCompute, provisioning_state=Succeeded, location=westus2, tags={}),
 'gpu-v100-x1': AmlCompute(workspace=Workspace.create(name='kaggle', subscription_id='dba5253c-7f60-45fe-86c5-a01af9f846f0', resource_group='kaggle-rg'), name=gpu-v100-x1, id=/subscriptions/dba5253c-7f60-45fe-86c5-a01af9f846f0/resourceGroups/kaggle-rg/providers/Microsoft.MachineLearningServices/workspaces/kaggle/computes/gpu-v100-x1, type=AmlCompute, provisioning_state=Succeeded, location=westus2, tags={}),
 'gpu-t4-lp': AmlCompute(workspace=Workspace.create(name='kaggle', subscription_id='dba5253c-7f60-45fe-86c5-a01af9f846f0', resource_group='kaggle-rg'), name=gpu-t4-lp, id=/subscr

In [48]:
from azureml.core import ScriptRunConfig
from azureml.core import Dataset

# Create a script config that will run the training script using the dataset
src = ScriptRunConfig(source_directory="./sample_code/",
                      script='main.sh',
                      compute_target='gpu-t4-lp',
                      environment=environment)

# Finallly, submit the experiment
Don't worry if your run contains bugs! You can always cancel them :)

In [49]:
# Submit the experiment
run = experiment.submit(config=src)