In [1]:
from azureml.core import Workspace, Experiment, ScriptRunConfig, Environment, Dataset, Datastore, ComputeTarget, ScriptRunConfig
import os
import azureml.core
from azureml.pipeline.steps import PythonScriptStep,EstimatorStep
from azureml.pipeline.core import Pipeline
from azureml.data import OutputFileDatasetConfig
import pandas as pd
from azureml.data.datapath import DataPath
import azureml.mlflow
import mlflow
from azureml.train.dnn import PyTorch
# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.20.0


In [2]:
# get workspace
workspace = Workspace.from_config()
workspace

Workspace.create(name='hal', subscription_id='91d27443-f037-45d9-bb0c-428256992df6', resource_group='robots')

In [3]:
# get compute target
compute_target = workspace.compute_targets['gandalf']

In [4]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.8
- pip:
  - azureml-core==1.18.0.post1
  - azureml-defaults==1.18.0
  - azureml-telemetry==1.18.0
  - azureml-train-restclients-hyperdrive==1.18.0
  - azureml-train-core==1.18.0
  - cmake
  - torch
  - mkl
  - future
  - numpy
  - scikit-learn
  - pandas
  - matplotlib
  - torchtext
  - azureml-mlflow
  - mlflow
  - azureml-contrib-fairness
  - fairlearn

Overwriting conda_dependencies.yml


In [5]:
# From a Conda specification file
env = Environment.from_conda_specification(name = "PyTorch-NLP-GPU-V1",
                                             file_path = "conda_dependencies.yml")

In [6]:
# get reviews datastore (where all the output datasets will live)
datastore = Datastore.get(workspace, 'tacoreviews')
datastore

{
  "name": "tacoreviews",
  "container_name": "tacoreviews",
  "account_name": "haldatasets",
  "protocol": "https",
  "endpoint": "core.windows.net"
}

In [7]:
prepared_reviews_ds = Dataset.get_by_name(workspace, name='tacoreviewsprep', version="9")
prepared_reviews_ds

{
  "source": [
    "('tacoreviews', 'prep/c5a19496-0bf7-485e-845e-5b42921a4448')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ],
  "registration": {
    "id": "786710d3-d1aa-4ce9-aa12-01b8540593d8",
    "name": "tacoreviewsprep",
    "version": 9,
    "workspace": "Workspace.create(name='hal', subscription_id='91d27443-f037-45d9-bb0c-428256992df6', resource_group='robots')"
  }
}

In [8]:
# Output Dataset
train_ds = OutputFileDatasetConfig(destination=(datastore, 'train/{run-id}')).register_on_complete(name='tacoreviewstrain')
train_ds

<azureml.data.output_dataset_config.OutputFileDatasetConfig at 0x7f639200a8d0>

In [55]:
experiment_name = 'nlp-sentiment-reviews-train'
src = ScriptRunConfig(
    source_directory='.',
    script='train.py',
    arguments=[
        "--source_path", prepared_reviews_ds.as_mount(),
        "--target_path", train_ds,
        "--epochs", 50,
        "--experiment_name", experiment_name ],
    compute_target=compute_target,
    environment=env
)

In [56]:
exp = Experiment(workspace, name=experiment_name)
run = exp.submit(src)
run.tag("optimizer", "SGD")
run.tag("loss", "CrossEntropyLoss")
run.tag("lr", 4)

Converting non-string tag to string: (lr: 4)
