<img src='https://github.com/jtobelem-simplon/prepa-dp100/blob/master/images/top.png?raw=true'>

# Configuration (à lancer avant tous les notebooks)

In [1]:
# version de python
import platform
platform.python_version()

'3.7.8'

In [2]:
# la liste des packages installés (on peut vérifier la présence des dépendances azure)
!conda list

# packages in environment at /home/lab/anaconda3/envs/azure:
#
# Name                    Version                   Build  Channel
_libgcc_mutex             0.1                 conda_forge    conda-forge
_openmp_mutex             4.5                       1_gnu    conda-forge
_py-xgboost-mutex         2.0                       cpu_0    conda-forge
adal                      1.2.4                    pypi_0    pypi
applicationinsights       0.11.9                   pypi_0    pypi
argon2-cffi               20.1.0           py37h8f50634_1    conda-forge
async_generator           1.10                       py_0    conda-forge
attrs                     20.2.0             pyh9f0ad1d_0    conda-forge
azure-common              1.1.25                   pypi_0    pypi
azure-core                1.8.1                    pypi_0    pypi
azure-graphrbac           0.61.1                   pypi_0    pypi
azure-identity            1.4.0                    pypi_0    pypi
azure-mgmt-authorizat

In [3]:
# version de la SDK azureml
import azureml.core
print("Ready to use Azure ML", azureml.core.VERSION)

Ready to use Azure ML 1.14.0


Si le notebook est executé en dehors d'Azure, il faut télécharger le fichier config.json depuis le portail https://portal.azure.com/, et le mettre dans le workspace qui contient le notebook.

Si le notebook est exécuté directement depuis le workspace Azure, le fichier de config devrait déjà être là.

In [4]:
# connexion au workspace
from azureml.core import Workspace

ws = Workspace.from_config()
print(ws.name, "loaded")

jt-dp100 loaded


# Envoyer les données sur la plateforme

In [9]:
from azureml.core import Dataset

dataset_name = 'titanic train dataset'
description='titanic training data'
target_path='titanic-data/'

default_ds = ws.get_default_datastore()

if dataset_name not in ws.datasets:
    default_ds.upload_files(files=['data/titanic.csv'], target_path=target_path, overwrite=True, show_progress=True)

    #Create a tabular dataset from the path on the datastore (this may take a short while)
    tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'titanic-data/titanic.csv'))

    # Register the tabular dataset
    try:
        tab_data_set.register(workspace=ws, 
                                name=dataset_name,
                                description=description,
                                tags = {'format':'CSV'},
                                create_new_version=True)
        
        print('Dataset registered.')
    except Exception as ex:
        print(ex)
else:
    print('Dataset already registered.')

Uploading an estimated of 1 files
Uploading data/titanic.csv
Uploaded data/titanic.csv, 1 files out of an estimated total of 1
Uploaded 1 files
Dataset registered.


In [10]:
titanic_ds = ws.datasets.get("titanic train dataset")
titanic_ds.to_pandas_dataframe().head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


# Executez le script dans une expérience

In [13]:
from azureml.train.sklearn import SKLearn
from azureml.core import Experiment
from azureml.widgets import RunDetails

# Get the training dataset
titanic_ds = ws.datasets.get("titanic train dataset")

# Create an estimator
estimator = SKLearn(source_directory='script',
                    entry_script='titanic_training.py',
                    compute_target = 'local',
                    inputs=[titanic_ds.as_named_input('titanic')], # Pass the Dataset object as an input...
                    pip_packages=[] # extra needed packages
                   )

# Create an experiment
experiment_name = 'titanic-training'
experiment = Experiment(workspace = ws, name = experiment_name)

# Run the experiment
run = experiment.submit(config=estimator)
# Show the run details while running
RunDetails(run).show()
run.wait_for_completion()



_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

{'runId': 'titanic-training_1604063710_13494b5d',
 'target': 'local',
 'status': 'Finalizing',
 'startTimeUtc': '2020-10-30T13:15:15.348997Z',
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': 'fd001427-f51b-4cd7-a236-26d6b59e684a',
  'azureml.git.repository_uri': 'https://github.com/jtobelem-simplon/dp100-brief-titanic.git',
  'mlflow.source.git.repoURL': 'https://github.com/jtobelem-simplon/dp100-brief-titanic.git',
  'azureml.git.branch': 'master',
  'mlflow.source.git.branch': 'master',
  'azureml.git.commit': 'f5771dca70a5d065a52e485ab19e116fd3f55cc3',
  'mlflow.source.git.commit': 'f5771dca70a5d065a52e485ab19e116fd3f55cc3',
  'azureml.git.dirty': 'True'},
 'inputDatasets': [{'dataset': {'id': '06df9fc4-3e40-483d-adc4-c5b344cc5566'}, 'consumptionDetails': {'type': 'RunInput', 'inputName': 'titanic', 'mechanism': 'Direct'}}],
 'outputDatasets': [],
 'runDefinition': {'script': 'titanic_training.py',
  'command': [],
  'useAbsolutePath': False,
  'argumen

# Ne pas oublier à la fin de l'expérience!!
(si votre travail à utilisé une instance de calcul)

<img src='https://github.com/jtobelem-simplon/prepa-dp100/blob/master/images/down.png?raw=true'>



## stoppe une machine à partir de son nom

In [None]:
compute_name = "XXXX"

if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]

    print('try to stop compute', compute.name)
        compute.stop(show_output=True)

else :
    print('compute target not found', compute.name)

## Liste tous les compute pour vérifier qu'elles sont éteintes

In [5]:
from azureml.core.compute import ComputeTarget, AmlCompute, ComputeInstance

# liste tous les compute pour vérifier qu'elles sont éteintes
for compute in ComputeTarget.list(ws):
    if type(compute) is ComputeInstance:
        print(compute.name, compute.get_status())


ds11-v2-jt {
  "errors": [],
  "creationTime": "2020-10-27T10:22:13.480810+00:00",
  "createdBy": {
    "userObjectId": "c72f668f-a536-4f37-8b23-8d0859ac17f8",
    "userTenantId": "0840dabf-0881-4071-9392-f25b2728592f",
    "userName": "jtobelem"
  },
  "modifiedTime": "2020-10-27T10:32:19.280185+00:00",
  "state": "Stopped",
  "vmSize": "STANDARD_DS11_V2"
}


# Ressources

[doc SDK azure](https://docs.microsoft.com/en-us/azure/developer/python/azure-sdk-overview)

[doc SDK azureML](https://docs.microsoft.com/fr-fr/python/api/overview/azure/ml/install?view=azure-ml-py)

[api azure](https://docs.microsoft.com/en-us/python/api/azureml-core)

[parcours d'apprentissage microsoft](https://docs.microsoft.com/fr-fr/learn/paths/build-ai-solutions-with-azure-ml-service/)

[le repository microsoft](https://github.com/MicrosoftDocs/mslearn-aml-labs.git)