# Работа с Azure ML Environments

## Соединение со Azure ML Workspace

Импорт необходимых модулей и проверка версии AzureML SDK:

In [1]:
import os

import azureml.core
from azureml.core import Workspace, Environment, Experiment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.train.estimator import Estimator
from azureml.train.sklearn import SKLearn
from azureml.widgets import RunDetails

# Check core SDK version number
print(f'SDK version: {azureml.core.VERSION}')

SDK version: 1.12.0


Устанавливаем соединение с Рабочей областью Azure ML:

In [2]:
ws = Workspace.from_config()

print('Connected Workspace:', ws.name, ws.resource_group, ws.location, sep = '\n\t')

Connected Workspace:
	ai-in-cloud-workspace
	ai-in-cloud-workshop-rg
	westeurope


## Получение списка доступных Azure ML Environments

Azure ML Environments делятся на:

- встроенные среды (начинаются с `AzureML`);
- собственные среды (не рекомендуется давать им имена, которые начинаются с `AzureML`).

Просмотрим список встроенных Сред со списком пакетов (packages), которые в них установлены: 

In [6]:
envs = Environment.list(workspace=ws)

print('Environments:')
for env in envs:
    if env.startswith('AzureML'):
        print(f'\t{env}')

Environments:
	AzureML-AutoML
	AzureML-PyTorch-1.0-GPU
	AzureML-Scikit-learn-0.20.3
	AzureML-TensorFlow-1.12-CPU
	AzureML-PyTorch-1.2-GPU
	AzureML-TensorFlow-2.0-GPU
	AzureML-TensorFlow-2.0-CPU
	AzureML-Chainer-5.1.0-GPU
	AzureML-TensorFlow-1.13-CPU
	AzureML-Minimal
	AzureML-Chainer-5.1.0-CPU
	AzureML-PyTorch-1.4-GPU
	AzureML-PySpark-MmlSpark-0.15
	AzureML-PyTorch-1.3-CPU
	AzureML-PyTorch-1.1-GPU
	AzureML-TensorFlow-1.10-GPU
	AzureML-PyTorch-1.2-CPU
	AzureML-TensorFlow-1.13-GPU
	AzureML-Hyperdrive-ForecastDNN
	AzureML-TensorFlow-1.10-CPU
	AzureML-PyTorch-1.3-GPU
	AzureML-PyTorch-1.4-CPU
	AzureML-Tutorial
	AzureML-PyTorch-1.0-CPU
	AzureML-PyTorch-1.1-CPU
	AzureML-TensorFlow-1.12-GPU
	AzureML-VowpalWabbit-8.8.0
	AzureML-AutoML-DNN-GPU
	AzureML-AutoML-GPU
	AzureML-Designer-VowpalWabbit
	AzureML-TensorFlow-2.2-GPU
	AzureML-TensorFlow-2.2-CPU
	AzureML-PyTorch-1.6-CPU
	AzureML-PyTorch-1.6-GPU
	AzureML-Sidecar
	AzureML-Dask-CPU
	AzureML-Dask-GPU
	AzureML-TensorFlow-2.1-GPU
	AzureML-PyTorch-1.

## Создание собственного Azure ML Environment

Определим списко необходимых пакетов и ML-фреймоворков (зависимости) и способ их установки (`conda` и `pip`):

In [8]:
env_packages = CondaDependencies.create(conda_packages=['scikit-learn','ipykernel','matplotlib', 'pandas'],
                                        pip_packages=['azureml-sdk','pyarrow'])

Создадим собственную Среду в Azure ML с необходимыми зависимостями:

In [9]:
# Set name
new_env_name = 'diabetes-experiment-env'

# Create a Python environment for the experiment
new_env = Environment(new_env_name)
new_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
new_env.docker.enabled = True # Use a docker container

# Add the dependencies to the environment
new_env.python.conda_dependencies = env_packages

print(f'Environment {new_env.name} was defined successfully.')

Environment diabetes-experiment-env was defined successfully.


Зарегистрируем Azure ML Environment:

In [10]:
new_env.register(workspace=ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/intelmpi2018.3-ubuntu16.04:20200723.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": true,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "diabetes-experiment-env",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                "co

Просмотрим зарегистрированную Среду:

In [12]:
print(envs[new_env_name])
print(envs[new_env_name].python.conda_dependencies.serialize_to_string())

Environment(Name: diabetes-experiment-env,
Version: 1)
channels:
- anaconda
- conda-forge
dependencies:
- python=3.6.2
- pip:
  - azureml-sdk
  - pyarrow
- scikit-learn
- ipykernel
- matplotlib
- pandas
name: azureml_5d419b151e9c1ce888d07b7a6d7737fd



## Использование созданного Environment

Подготовим входные данные и испольняемый скрипт:

In [13]:
experiment_name = 'new_env_demo_exp'

experiment_dir = 'new_env_demo'
os.makedirs(experiment_dir, exist_ok=True)
    
data_ds = ws.datasets.get('diabetes_db')
print(f'Used dataset {data_ds.name}: {data_ds.description}')

registered_env = Environment.get(ws, new_env_name)
print(f'Environment {registered_env.name} will be reused.')

Used dataset diabetes_db: Diabetes Disease Database
Environment diabetes-experiment-env will be reused.


In [None]:
# !wget -P new_env_demo https://raw.githubusercontent.com/codez0mb1e/AI-in-cloud-workshop/azure-ml-ru/module-4/diabetes-train-model-experiment/diabetes-train-model.py 

In [14]:
# Create an SKLearn estimator
estimator = SKLearn(source_directory=experiment_dir,
                    inputs=[data_ds.as_named_input('diabetes')], 
                    entry_script='train-model.py',
                    script_params = {'--reg_rate': 0.1},
                    compute_target='local',
                    environment_definition = registered_env # set enviroment here
                    )

# Create and run the experiment
experiment = Experiment(workspace = ws, name = experiment_name)
run = experiment.submit(config=estimator)

# Get run details
run.wait_for_completion(show_output=True)
RunDetails(run).show()



RunId: new_env_demo_exp_1599494769_5a39b844
Web View: https://ml.azure.com/experiments/new_env_demo_exp/runs/new_env_demo_exp_1599494769_5a39b844?wsid=/subscriptions/9aef4ce1-e591-4870-9443-0b0eb98df2aa/resourcegroups/ai-in-cloud-workshop-rg/workspaces/ai-in-cloud-workspace

Streaming azureml-logs/70_driver_log.txt

[2020-09-07T16:06:14.439366] Entering context manager injector.
[context_manager_injector.py] Command line Options: Namespace(inject=['ProjectPythonPath:context_managers.ProjectPythonPath', 'RunHistory:context_managers.RunHistory', 'TrackUserError:context_managers.TrackUserError'], invocation=['train-model.py', '--reg_rate', '0.1'])
Starting the daemon thread to refresh tokens in background for process with pid = 8
Entering Run History Context Manager.
Current directory:  /azureml-run
Preparing to call script [ train-model.py ] with arguments: ['--reg_rate', '0.1']
After variable expansion, calling script [ train-model.py ] with arguments: ['--reg_rate', '0.1']

Script type

{'runId': 'new_env_demo_exp_1599494769_5a39b844',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2020-09-07T16:06:13.582148Z',
 'endTimeUtc': '2020-09-07T16:06:41.862204Z',
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '9e32d060-4a48-49bc-bb49-ed2a304a43a3',
  'azureml.git.repository_uri': 'https://github.com/codez0mb1e/AI-in-cloud-workshop.git',
  'mlflow.source.git.repoURL': 'https://github.com/codez0mb1e/AI-in-cloud-workshop.git',
  'azureml.git.branch': 'azure-ml-ru',
  'mlflow.source.git.branch': 'azure-ml-ru',
  'azureml.git.commit': 'b6243cf4701110271eafb577dc4dcc38b8104349',
  'mlflow.source.git.commit': 'b6243cf4701110271eafb577dc4dcc38b8104349',
  'azureml.git.dirty': 'True'},
 'inputDatasets': [{'dataset': {'id': 'cb5c8811-b586-488e-a01b-0cb8c00be54e'}, 'consumptionDetails': {'type': 'RunInput', 'inputName': 'diabetes', 'mechanism': 'Direct'}}],
 'outputDatasets': [],
 'runDefinition': {'script': 'train-model.py',
  'scriptType'

Просмотрим результаты обучения модели:

In [15]:
metrics = run.get_metrics()
for key in metrics.keys():
    print(key, metrics.get(key))
        
print('\n')
for file in run.get_file_names():
    print(file)

Regularization Rate 0.1
Accuracy 0.7788888888888889
AUC 0.846851712258014
ROC aml://artifactId/ExperimentRun/dcid.new_env_demo_exp_1599494769_5a39b844/ROC_1599494783.png


ROC_1599494783.png
azureml-logs/60_control_log.txt
azureml-logs/70_driver_log.txt
logs/azureml/8_azureml.log
logs/azureml/dataprep/backgroundProcess.log
logs/azureml/dataprep/backgroundProcess_Telemetry.log
logs/azureml/dataprep/engine_spans_759936b0-accf-4206-8242-6b2c73d35a33.jsonl
logs/azureml/dataprep/python_span_759936b0-accf-4206-8242-6b2c73d35a33.jsonl
outputs/diabetes_model.pkl
