# Creation gestion de reçources

## Créer workspace

In [None]:
from azureml.core import Workspace
    
ws = Workspace.create(name='aml-workspace', 
                    subscription_id='123456-abc-123...',
                    resource_group='aml-resources',
                    create_resource_group=True,
                    location='eastus'
                    )

## Réutiliser workspace deja crée

In [None]:
import azureml.core
from azureml.core import Workspace

ws = Workspace.get(name="Preparation-Flo-AI102-Clermont",
                   subscription_id= "0252a218-2d27-4d77-a4f1-b638272c95e0",
                   resource_group="cloud-shell-storage-westeurope")



# pour ecrire le fichier config
#  ws.write_config(path="./file-path", file_name="ws_config.json")
# le fichier config peut aussi etre recuperé sur le portail azure (dans le workspace cliquer )
"""
 le  fichier config sera du ytpe :
 {
    "subscription_id": "<subscription-id>",
    "resource_group": "<resource-group>",
    "workspace_name": "<workspace-name>"
}"""

Si utilisation d'un fichier config pour charger le workspace

In [3]:
ws = Workspace.from_config()
print(ws.name, "loaded")

Preparation-AI102-Florian loaded


In [None]:
from azureml.core import Experiment
# create an experiment variable
experiment = Experiment(workspace = ws, name = "my-experiment")

# start the experiment
run = experiment.start_logging()

# experiment code goes here
##########################
##########################
# load the dataset and count the rows
data = pd.read_csv('data.csv')
row_count = (len(data))
# Log the row count
run.log('observations', row_count)


# end the experiment
run.complete()

On peut utiliser:

    log : enregistrez une seule valeur nommée.
    log_list : enregistrez une liste nommée de valeurs.
    log_row : enregistrez une ligne avec plusieurs colonnes.
    log_table : enregistrez un dictionnaire sous forme de table.
    log_image : enregistrez un fichier image ou un tracé.



Pour log tout un tas de truc ... entrainement models etc : utiliser  du mlflow [mlflow for azure](https://learn.microsoft.com/fr-fr/azure/machine-learning/how-to-log-view-metrics?tabs=interactive)

In [None]:
#Récupération et affichage des métriques journalisées
from azureml.widgets import RunDetails
RunDetails(run).show()

#  récupérer les métriques en tant que json
import json
metrics = run.get_metrics()
print(json.dumps(metrics, indent=2))

# Pour transferer les fichiers de journaux de la VM ou a lieu l'experience 
# vers le dossier de sorties de l’exécution(independant de la VM)
run.upload_file(name='outputs/sample.csv', path_or_stream='./sample.csv')

Un "script d’expérience" = un **objet Run** + context dans un fichier .py

In [None]:
# Voici un script d'experience :
# on l'appelera experiment.py

from azureml.core import Run
import pandas as pd
import matplotlib.pyplot as plt
import os

# Get the experiment run context
run = Run.get_context()
# load the diabetes dataset
data = pd.read_csv('data.csv')
# Count the rows and log the result
row_count = (len(data))
run.log('observations', row_count)
# Save a sample of the data
os.makedirs('outputs', exist_ok=True)
data.sample(100).to_csv("outputs/sample.csv", index=False, header=True)
# Complete the run
run.complete()

On execute un script (.py contenant l'objet run) dans une **Experience** 

In [None]:
from azureml.core import Experiment, ScriptRunConfig

# Create a script config
script_config = ScriptRunConfig(source_directory=experiment_folder,
                                script='experiment.py') 

# submit the experiment
experiment = Experiment(workspace = ws, name = 'my-experiment')
run = experiment.submit(config=script_config)
run.wait_for_completion(show_output=True)

In [None]:
# Avec name='outputs/*' on enregistre des choses dans le  run's output folder

run.log_image(name='outputs/label distribution', plot=fig) # fig est mon image
run.upload_file(name='outputs/sample.csv', path_or_stream='./sample.csv')

## Trois manieres de voir les details de l'experiment

Avec un widget dans le notebook

In [None]:
from azureml.widgets import RunDetails
RunDetails(run).show()

En regardant les metriques et les fichiers enregistrés dans le dossier outputs pendant le run

(On pourrait ecrir les metriques dans un fichier json aussi si on voulait)

In [None]:
import json
# Get logged metrics
print("Metrics:")
metrics = run.get_metrics()
for metric_name in metrics:
    print(metric_name, ":", metrics[metric_name])

# Get output files
print("\nFiles:")
files = run.get_file_names()
for file in files:
    print(file)

En telechargeant le dossier outputs

In [None]:
import os

download_folder = 'downloaded-files'
# Download files in the "outputs" folder
run.download_files(prefix='outputs', output_directory=download_folder)

# Verify the files have been downloaded
for root, directories, filenames in os.walk(download_folder): 
    for filename in filenames:  
        print (os.path.join(root,filename))

On peut aussi telecharger tout les log

In [None]:
import os
log_folder = 'downloaded-logs'
# Download all files
run.get_all_logs(destination=log_folder)

On peut aussi avoir des details sur l'experiment 

In [None]:
run.get_details_with_logs()

Jusque la on a executé des experiments depuis notre notebook  
Mais on peut ecrire des experiments dans des fichiers .py et les appeler  
Ici on a ecrit un script et les données qui vont avec, on les a mis dans le dossier

In [None]:
experiment_folder='chemin vers le dossier'

from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.runconfig import DockerConfiguration
from azureml.widgets import RunDetails
# Create a Python environment for the experiment (from a .yml file)
env = Environment.from_conda_specification("experiment_env", "environment.yml")
# Create a script config
script_config = ScriptRunConfig(source_directory=experiment_folder,
                                script='diabetes_experiment.py',
                                environment=env,
                                docker_runtime_config=DockerConfiguration(use_docker=True))

# submit the experiment
experiment = Experiment(workspace=ws, name='mslearn-diabetes')
run = experiment.submit(config=script_config)
RunDetails(run).show()
run.wait_for_completion()

Pour plus de details voir 
[Notebook4 officiel azure](./mslearn-dp100/04%20-%20Run%20Experiments.ipynb)

Pour utiliser mlflow:  
pip show azureml-mlflow

In [None]:
from azureml.core import Experiment
import pandas as pd
import mlflow

# Set the MLflow tracking URI to the workspace
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

# Create an Azure ML experiment in your workspace
experiment = Experiment(workspace=ws, name='mslearn-diabetes-mlflow')
mlflow.set_experiment(experiment.name)

# start the MLflow experiment
with mlflow.start_run():
    print("Starting experiment:", experiment.name)
    # Load data
    data = pd.read_csv('data/diabetes.csv')
    # Count the rows and log the result
    row_count = (len(data))
    mlflow.log_metric('observations', row_count)
    print("Run complete")

Pour plus de details voir 
[Notebook4 officiel azure](./mslearn-dp100/04%20-%20Run%20Experiments.ipynb)

**Entrainer un model**  
Etape 1 faire un script d'entrainement

    -Etape 1 : faire un script d'entrainement
    -Etape 2 : soumettre le  script d'entrainement
    -Etape 3 : Enregistrer/inscrire(=versionner le model) dans le workspace
Pour plus de details voir 
[Notebook5 officiel azure](./mslearn-dp100/05%20-%20Train%20Models.ipynb)

Telecharger localement un model du run

In [None]:
# "run" is a reference to a completed experiment run
# List the files generated by the experiment
for file in run.get_file_names():
    print(file)
# Download a named file
run.download_file(name='outputs/model.pkl', output_file_path='model.pkl')

In [None]:
# Pour un Model enregistré en local
from azureml.core import Model
model = Model.register(workspace=ws,
                       model_name='classification_model',
                       model_path='model.pkl', # local path
                       description='A classification model',
                       tags={'data-format': 'CSV'},
                       model_framework=Model.Framework.SCIKITLEARN,
                       model_framework_version='0.20.3')


# Pour un model enregistré au cours d'un run:
run.register_model( model_name='classification_model',
                    model_path='outputs/model.pkl', # run outputs path
                    description='A classification model',
                    tags={'data-format': 'CSV'},
                    model_framework=Model.Framework.SCIKITLEARN,
                    model_framework_version='0.20.3')

In [None]:
# Pour avoir la liste des models inscrits:
from azureml.core import Model
for model in Model.list(ws):
    # Get model name and auto-generated version
    print(model.name, 'version:', model.version)

# Data

In [None]:
from azureml.core import Workspace, Datastore
# Get the default datastore  - usually it's the blobspace 
default_ds = ws.get_default_datastore()
#pour voir la liste des datastores:
for ds_name in ws.datastores:
    print(ds_name)

In [None]:
from azureml.core import Workspace, Datastore
ws = Workspace.from_config()
# Register a new datastore
blob_ds = Datastore.register_azure_blob_container(workspace=ws, 
                                                  datastore_name='blob_data', 
                                                  container_name='data_container',
                                                  account_name='az_store_acct',
                                                  account_key='123456abcde789…')

blob_store = Datastore.get(ws, datastore_name='blob_data')
# Et pour choisir un nouveau datastore par default:
ws.set_default_datastore('blob_data')

Envoyer des données au storage (ici blob)

In [None]:
from azureml.core import Dataset
from azureml.data.datapath import DataPath
default_ds = ws.get_default_datastore()
Dataset.File.upload_directory(src_dir='data',   # Chemin local 
                              target=DataPath(default_ds, 'diabetes-data/')  # Chemin az

**Créer et inscrir un jeu de données**

    - Tabulaire

In [None]:
#Create a tabular dataset from the path on the datastore (this may take a short while)
tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'diabetes-data/*.csv'))
# Register the tabular dataset
tab_data_set = tab_data_set.register(workspace=ws, name='csv_table')
# Register with more details
tab_data_set = tab_data_set.register(workspace=ws, 
                                    name='diabetes dataset',
                                    description='diabetes data',
                                    tags = {'format':'CSV'},
                                    create_new_version=True)


    - De Fichiers

In [None]:
# Create dataset
file_data_set = Dataset.File.from_files(path=(default_ds, 'diabetes-data/*.csv'))
# Register
file_data_set = file_data_set.register(workspace=ws, name='csv_file')
# Register more details
file_data_set = file_data_set.register(workspace=ws,
                                        name='diabetes file dataset',
                                        description='diabetes files',
                                        tags = {'format':'CSV'},
                                        create_new_version=True)

Retrive Datasets

In [None]:
# Version is by default the latest
img_ds = Dataset.get_by_name(workspace=ws,name='img_files', version=2)

## Passer des données tabulaires à un script d'entrainement
Il y a deux manieres de faire:

    - Utiliser un argument de script  

In [None]:
# ScriptRunConfig
env = Environment('my_env')
packages = CondaDependencies.create(conda_packages=['pip'],
                                    pip_packages=['azureml-defaults',
                                                  'azureml-dataprep[pandas]'])
env.python.conda_dependencies = packages

script_config = ScriptRunConfig(source_directory='my_dir',
                                script='script.py',
                                arguments=['--ds', tab_ds], 
                                environment=env) 

In [None]:
# fichier script.py
from azureml.core import Run, Dataset

parser.add_argument('--ds', type=str, dest='dataset_id')
args = parser.parse_args()

run = Run.get_context()
ws = run.experiment.workspace
dataset = Dataset.get_by_id(ws, id=args.dataset_id)
data = dataset.to_pandas_dataframe()

# Cette façon retrouve le dataset a partir de son ID

    - Utiliser une entrée nommée
(le nom du jeu de données qui sera recuperé grace au contexte ws)


In [None]:
env = Environment('my_env')
packages = CondaDependencies.create(conda_packages=['pip'],
                                    pip_packages=['azureml-defaults',
                                                  'azureml-dataprep[pandas]'])
env.python.conda_dependencies = packages

script_config = ScriptRunConfig(source_directory='my_dir',
                                script='script.py',
                                arguments=['--ds', tab_ds.as_named_input('my_dataset')],
                                environment=env)

In [None]:
from azureml.core import Run

parser.add_argument('--ds', type=str, dest='ds_id')
args = parser.parse_args()

run = Run.get_context()
dataset = run.input_datasets['my_dataset']
data = dataset.to_pandas_dataframe()


## Passer des fichiers à un script d'entrainement
On doit specifier le mode de transmition des fichiers:  
     - **as_download()**
telecharge les données sur la cible de calcul  
     - **as_mount()**
stream les données du storage vers la cible de calcul  


Il y a deux manieres de faire:

    - Avec un argument de script  

In [None]:
#Scriptrunconfig
env = Environment('my_env')
packages = CondaDependencies.create(conda_packages=['pip'],
                                    pip_packages=['azureml-defaults',
                                                  'azureml-dataprep[pandas]'])
env.python.conda_dependencies = packages

script_config = ScriptRunConfig(source_directory='my_dir',
                                script='script.py',
                                arguments=['--ds', file_ds.as_download()],
                                environment=env) 

In [None]:
# script.py
from azureml.core import Run
import glob

parser.add_argument('--ds', type=str, dest='ds_ref')
args = parser.parse_args()
run = Run.get_context()

imgs = glob.glob(args.ds_ref + "/*.jpg")

    - Utiliser une entrée nommée
(le nom du jeu de données qui sera recuperé grace au contexte ws)


In [None]:
#Scriptrunconfig
env = Environment('my_env')
packages = CondaDependencies.create(conda_packages=['pip'],
                                    pip_packages=['azureml-defaults',
                                                  'azureml-dataprep[pandas]'])
env.python.conda_dependencies = packages

script_config = ScriptRunConfig(source_directory='my_dir',
                                script='script.py',
                                arguments=['--ds', file_ds.as_named_input('my_ds').as_download()],
                                environment=env)

In [None]:
# script.py
from azureml.core import Run
import glob

parser.add_argument('--ds', type=str, dest='ds_ref')
args = parser.parse_args()
run = Run.get_context()

dataset = run.input_datasets['my_ds']
imgs= glob.glob(dataset + "/*.jpg")

## Environements virtuels 

On crée un fichier .yml :

     - exemple d'un fichier conda.yml:

In [None]:
"""
name: py_env
dependencies:
  - numpy
  - pandas
  - scikit-learn
  - pip:
    - azureml-defaults
    """

Creation de l'environnement **sur base du fichier .yml**

In [None]:
from azureml.core import Environment
env = Environment.from_conda_specification(name='training_environment',
                                           file_path='./conda.yml')

Creation de l'environnement **en specifiant les packages**

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

env = Environment('training_environment')
deps = CondaDependencies.create(conda_packages=['scikit-learn','pandas','numpy'],
                                pip_packages=['azureml-defaults'])
env.python.conda_dependencies = deps

On utilise un env crée
Par default **cet env sera dans un container Docker**  
mais on peut passer use_docker=False
alors l'environnement sera crée directement sur la cible de calcul

In [None]:
from azureml.core import Experiment, ScriptRunConfig
from azureml.core.runconfig import DockerConfiguration

docker_config = DockerConfiguration(use_docker=True)
script_config = ScriptRunConfig(source_directory='my_folder',
                                script='my_script.py',
                                environment=env,
                                docker_runtime_config=docker_config)

In [None]:
# On peut aussi modifier l'image docker de base avec 
env.docker.base_image='my-base-image'
env.docker.base_image_registry='myregistry.azurecr.io/myimage

# Ou meme une image construit a la minute
env.docker.base_image = None
env.docker.base_dockerfile = './Dockerfile'

# On peut aussi forcer l'utilisation d'une install python des librairies   ***presente dans le conteneur***

env.python.user_managed_dependencies=True
env.python.interpreter_path = '/opt/miniconda/bin/python'

Enregistrer un env pour le réutiliuser

In [None]:
# Enregistrement
env.register(workspace=ws)
# lister les env existants
from azureml.core import Environment
env_names = Environment.list(workspace=ws)
for env_name in env_names:
    print('Name:',env_name)

In [None]:
# Recuperation et réutilisation
from azureml.core import Environment, ScriptRunConfig
training_env = Environment.get(workspace=ws, name='training_environment')
script_config = ScriptRunConfig(source_directory='my_folder',
                                script='my_script.py',
                                environment=training_env

## Créer Cible de calcul

**cluster de calcul**

In [None]:
# Ici on crée un cluster de calcul :
from azureml.core import Workspace
from azureml.core.compute import ComputeTarget, AmlCompute
# Load the workspace from the saved config file
ws = Workspace.from_config()
# Specify a name for the compute (unique within the workspace)
compute_name = 'aml-cluster'
# Define compute configuration
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2',
                                                       min_nodes=0, max_nodes=4,
                                                       vm_priority='dedicated')
# Create the compute
aml_cluster = ComputeTarget.create(ws, compute_name, compute_config)
aml_cluster.wait_for_completion(show_output=True)

In [None]:
from azureml.core import Workspace
from azureml.core.compute import ComputeTarget, DatabricksCompute
# Load the workspace from the saved config file
ws = Workspace.from_config()
# Specify a name for the compute (unique within the workspace)
compute_name = 'db_cluster'
# Define configuration for existing Azure Databricks cluster
db_workspace_name = 'db_workspace'
db_resource_group = 'db_resource_group'
db_access_token = '1234-abc-5678-defg-90...'
db_config = DatabricksCompute.attach_configuration(resource_group=db_resource_group,
                                                   workspace_name=db_workspace_name,
                                                   access_token=db_access_token)

# Atache une cible de calcul
databricks_compute = ComputeTarget.attach(ws, compute_name, db_config)
databricks_compute.wait_for_completion(True)

In [None]:
# On a donc 
ComputeTarget.create(ws, compute_name, compute_config)
# et
ComputeTarget.attach(ws, compute_name, db_config)

In [None]:
try:
    aml_cluster = ComputeTarget(workspace=ws, name=compute_name)
    print('Found existing cluster.')
except ComputeTargetException:

## Choix de la cible de calcul

In [None]:
from azureml.core import Environment, ScriptRunConfig

compute_name = 'aml-cluster'
training_env = Environment.get(workspace=ws, name='training_environment')

script_config = ScriptRunConfig(source_directory='my_dir',
                                script='script.py',
                                environment=training_env,
                                compute_target=compute_name) # ON passe le nom de la cible de calcul


    
compute_name = "aml-cluster"
training_cluster = ComputeTarget(workspace=ws, name=compute_name)
script_config = ScriptRunConfig(source_directory='my_dir',
                                script='script.py',
                                environment=training_env,
                                compute_target=training_cluster) # Sinon on passe un Compute Target

## Pipeline

     - PythonScriptStep : Exécute un script Python spécifié.
     - DataTransferStep : Utilise Azure Data Factory pour copier des données entre des magasins de données.
     - DatabricksStep : Exécute un notebook, un script ou un fichier JAR compilé sur un cluster Databricks.
     - AdlaStep : Exécute une tâche U-SQL dans Azure Data Lake Analytics.
     - ParallelRunStep : Exécute un script Python comme tâche distribuée sur plusieurs nœuds de calcul.


Transmition des données entre les etapes du pipeline

In [None]:
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep

# Get a dataset for the initial data
raw_ds = Dataset.get_by_name(ws, 'raw_dataset')

# Define a PipelineData object to pass data between steps
data_store = ws.get_default_datastore()
prepped_data = OutputFileDatasetConfig('prepped')     ########### C'est cet objet qui défini le nom du dossier temp pour la transmition des données    

# Step to run a Python script
step1 = PythonScriptStep(name = 'prepare data',
                         source_directory = 'scripts',
                         script_name = 'data_prep.py',
                         compute_target = 'aml-cluster',
                         # Script arguments include PipelineData
                         arguments = ['--raw-ds', raw_ds.as_named_input('raw_data'),
                                      '--out_folder', prepped_data])  ### on le passe comme argument

# Step to run an estimator
step2 = PythonScriptStep(name = 'train model',
                         source_directory = 'scripts',
                         script_name = 'train_model.py',
                         compute_target = 'aml-cluster',
                         # Pass as script argument
                         arguments=['--training-data', prepped_data.as_input()])  ### on le passe comme argument

from azureml.pipeline.core import Pipeline
from azureml.core import Experiment
# Construct the pipeline
train_pipeline = Pipeline(workspace = ws, steps = [step1,step2])

# Create an experiment and run the pipeline
experiment = Experiment(workspace = ws, name = 'training-pipeline')
pipeline_run = experiment.submit(train_pipeline)

In [None]:
# code in data_prep.py (un des deux script)
from azureml.core import Run
import argparse
import os

# Get the experiment run context
run = Run.get_context()

# Get arguments
parser = argparse.ArgumentParser()
parser.add_argument('--raw-ds', type=str, dest='raw_dataset_id')

#####
#Ici a utilisé l'argparser pour
# faire une reference à OutputFileDatasetConfig
# et l'utiliser comme dossier local
parser.add_argument('--out_folder', type=str, dest='folder')
args = parser.parse_args()
output_folder = args.folder
#####


# Get input dataset as dataframe
raw_df = run.input_datasets['raw_data'].to_pandas_dataframe()

# code to prep data (in this case, just select specific columns)
prepped_df = raw_df[['col1', 'col2', 'col3']]

# Save prepped data to the PipelineData location
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, 'prepped_data.csv')
prepped_df.to_csv(output_path)

## long long pipelines and re-use of intermediary step

In [None]:
step1 = PythonScriptStep(name = 'prepare data',
                         source_directory = 'scripts',
                         script_name = 'data_prep.py',
                         compute_target = 'aml-cluster',
                         runconfig = run_config,
                         inputs=[raw_ds.as_named_input('raw_data')],
                         outputs=[prepped_data],
                         arguments = ['--folder', prepped_data],
                         # Disable step reuse
                         allow_reuse = True)   ### Choose true or false to re use

# Choose true to re-use
# Attention reuse => resultats obsolete si modif des données pas pris en compte


In [None]:
# Forcing all steps to run (and not look at rhe reuse configuration )
pipeline_run = experiment.submit(train_pipeline, regenerate_outputs=True)

After you have created a pipeline, you can **publish** it to create a REST endpoint through which **the pipeline can be run on demand**.

In [None]:
published_pipeline = pipeline.publish(name='training_pipeline',
                                          description='Model training pipeline',
                                          version='1.0')

Alternatively, you can **call** the publish method **on a successful run of the pipeline**:

In [None]:
# Get the most recent run of the pipeline
pipeline_experiment = ws.experiments.get('training-pipeline')
run = list(pipeline_experiment.get_runs())[0]

# Publish the pipeline from the run
published_pipeline = run.publish_pipeline(name='training_pipeline',
                                          description='Model training pipeline',
                                          version='1.0')

You can also determine the URI of its **endpoint** like this

In [None]:
rest_endpoint = published_pipeline.endpoint
print(rest_endpoint)

To initiate a published endpoint, you make an **HTTP request to its REST endpoint**,  
 passing an **authorization header** with a token for a service principal with permission to run the pipeline, **and a JSON payload** specifying the experiment name.  
 The pipeline is run asynchronously, so the response from a successful REST call includes the run ID. You can use this to track the run in Azure Machine Learning studio.

In [None]:
# For example, the following Python code makes a REST request to run a pipeline and displays the returned run ID.
import requests
response = requests.post(rest_endpoint,
                         headers=auth_header,
                         json={"ExperimentName": "run_training_pipeline"})
run_id = response.json()["Id"]
print(run_id)

## Defining parameters for a pipeline
     - You must define parameters for a pipeline before publishing it.

In [None]:
from azureml.pipeline.core.graph import PipelineParameter

reg_param = PipelineParameter(name='reg_rate', default_value=0.01)   #########
...
step2 = PythonScriptStep(name = 'train model',
                         source_directory = 'scripts',
                         script_name = 'data_prep.py',
                         compute_target = 'aml-cluster',
                         # Pass parameter as script argument
                         arguments=['--in_folder', prepped_data,
                                    '--reg', reg_param],
                         inputs=[prepped_data])

In [None]:
# After you publish a parameterized pipeline, you can pass parameter values in the JSON payload 
response = requests.post(rest_endpoint,
                         headers=auth_header,
                         json={"ExperimentName": "run_training_pipeline",
                               "ParameterAssignments": {"reg_rate": 0.1}})

## Scheduling a pipeline for periodic interval

In [None]:
from azureml.pipeline.core import ScheduleRecurrence, Schedule

daily = ScheduleRecurrence(frequency='Day', interval=1)          ###########
pipeline_schedule = Schedule.create(ws, name='Daily Training',
                                        description='trains model every day',
                                        pipeline_id=published_pipeline.id,
                                        experiment_name='Training_Pipeline',
                                        recurrence=daily)        ################

## Triggering a pipeline run on data changes

In [None]:
from azureml.core import Datastore
from azureml.pipeline.core import Schedule

training_datastore = Datastore(workspace=ws, name='blob_data')
pipeline_schedule = Schedule.create(ws, name='Reactive Training',
                                    description='trains model on data change',
                                    pipeline_id=published_pipeline.id,
                                    experiment_name='Training_Pipeline',
                                    datastore=training_datastore,
                                    path_on_datastore='data/training')

# Un Autre Pipeline de A à Z

**Load workspace**

In [None]:
from azureml.core import Workspace
ws = Workspace.from_config()

**Import / Prepare Data**

In [None]:
from azureml.core import Dataset
from azureml.data.datapath import DataPath
# Choose datastore to use
default_ds = ws.get_default_datastore()
# Uploader des fichiers locaux
Dataset.File.upload_directory(src_dir='data', # local folder
                              target=DataPath(default_ds, 'diabetes-data/')
                              )
# Creer un dataset tabulaire a partir des fichiers uploadés
tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'diabetes-data/*.csv'))
# enregistrer le jeu de données tabulaire
tab_data_set = tab_data_set.register(workspace=ws, 
                        name='diabetes dataset',
                        description='diabetes data',
                        tags = {'format':'CSV'},
                        create_new_version=True)
print('Dataset registered.')

**Scriptes (un pour chaque étape du pipeline)**

**Script 1 - Preparation des données**

In [None]:
%%writefile $experiment_folder/prep_diabetes.py
# Import libraries
import os
import argparse
import pandas as pd
from azureml.core import Run
from sklearn.preprocessing import MinMaxScaler

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument("--input-data", type=str, dest='raw_dataset_id', help='raw dataset')
parser.add_argument('--prepped-data', type=str, dest='prepped_data', default='prepped_data', help='Folder for results')
args = parser.parse_args()
save_folder = args.prepped_data

# Get the experiment run context
run = Run.get_context()

# load the data (passed as an input dataset)
print("Loading Data...")
diabetes = run.input_datasets['raw_data'].to_pandas_dataframe()

# Log raw row count
row_count = (len(diabetes))
run.log('raw_rows', row_count)

# remove nulls
diabetes = diabetes.dropna()

# Normalize the numeric columns
scaler = MinMaxScaler()
num_cols = ['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree']
diabetes[num_cols] = scaler.fit_transform(diabetes[num_cols])

# Log processed rows
row_count = (len(diabetes))
run.log('processed_rows', row_count)

# Save the prepped data
print("Saving Data...")
os.makedirs(save_folder, exist_ok=True)
save_path = os.path.join(save_folder,'data.csv')
diabetes.to_csv(save_path, index=False, header=True)

# End the run
run.complete()

**Scripte 2  -   Training**

In [None]:
%%writefile $experiment_folder/train_diabetes.py
# Import libraries
from azureml.core import Run, Model
import argparse
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument("--training-data", type=str, dest='training_data', help='training data')
args = parser.parse_args()
training_data = args.training_data

# Get the experiment run context
run = Run.get_context()

# load the prepared data file in the training folder
print("Loading Data...")
file_path = os.path.join(training_data,'data.csv')
diabetes = pd.read_csv(file_path)

# Separate features and labels
X, y = diabetes[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, diabetes['Diabetic'].values

# Split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

# Train adecision tree model
print('Training a decision tree model...')
model = DecisionTreeClassifier().fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)
run.log('Accuracy', np.float(acc))

# calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test,y_scores[:,1])
print('AUC: ' + str(auc))
run.log('AUC', np.float(auc))

# plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])
fig = plt.figure(figsize=(6, 4))
# Plot the diagonal 50% line
plt.plot([0, 1], [0, 1], 'k--')
# Plot the FPR and TPR achieved by our model
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
run.log_image(name = "ROC", plot = fig)
plt.show()

# Save the trained model in the outputs folder
print("Saving model...")
os.makedirs('outputs', exist_ok=True)
model_file = os.path.join('outputs', 'diabetes_model.pkl')
joblib.dump(value=model, filename=model_file)

# Register the model
print('Registering model...')
Model.register(workspace=run.experiment.workspace,
               model_path = model_file,
               model_name = 'diabetes_model',
               tags={'Training context':'Pipeline'},
               properties={'AUC': np.float(auc), 'Accuracy': np.float(acc)})


run.complete()

**Create/Get Compute target**

In [None]:
cluster_name = "your-compute-cluster"
# Get 
pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
# Or create
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
pipeline_cluster.wait_for_completion(show_output=True)

**Write .yaml**

In [None]:
%%writefile $experiment_folder/experiment_env.yml
name: experiment_env
dependencies:
- python=3.6.2
- scikit-learn
- ipykernel
- matplotlib
- pandas
- pip
- pip:
  - azureml-defaults
  - pyarrow

**Create/Get env + RunConfig**

In [None]:
from azureml.core.runconfig import RunConfiguration
# Cretate env from .yml
experiment_env = Environment.from_conda_specification("experiment_env", experiment_folder + "/experiment_env.yml")
# Register the environment 
experiment_env.register(workspace=ws)
registered_env = Environment.get(ws, 'experiment_env')
# Create runconfig ( to use accross the pipelines)
pipeline_run_config = RunConfiguration()
pipeline_run_config.target = pipeline_cluster
pipeline_run_config.environment = registered_env


**Create the Pipeline (steps)**

In [None]:
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.steps import PythonScriptStep
# Get the training dataset
diabetes_ds = ws.datasets.get("diabetes dataset")
# Create an OutputFileDatasetConfig (temporary Data Reference) for data passed from step 1 to step 2
prepped_data = OutputFileDatasetConfig("prepped_data")
# Step 1, Run the data prep script
prep_step = PythonScriptStep(name = "Prepare Data",
                                source_directory = experiment_folder,
                                script_name = "prep_diabetes.py",
                                arguments = ['--input-data', diabetes_ds.as_named_input('raw_data'),
                                             '--prepped-data', prepped_data],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)
# Step 2, run the training script
train_step = PythonScriptStep(name = "Train and Register Model",
                                source_directory = experiment_folder,
                                script_name = "train_diabetes.py",
                                arguments = ['--training-data', prepped_data.as_input()],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)

**Build the Pipeline**

In [None]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails
# Construct the pipeline
pipeline_steps = [prep_step, train_step]
pipeline = Pipeline(workspace=ws, steps=pipeline_steps,)
print("Pipeline is built.")
# Create an experiment and run the pipeline
experiment = Experiment(workspace=ws, name = 'mslearn-diabetes-pipeline')
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
print("Pipeline submitted for execution.")
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)

**Retrive scores and metrics**

In [None]:
for run in pipeline_run.get_children():
    print(run.name, ':')
    metrics = run.get_metrics()
    for metric_name in metrics:
        print('\t',metric_name, ":", metrics[metric_name])

pipeline sucessfull=> le model est register dans  le workspace à la fin du script d'entrainement (ecrit dans le script)  
**Check Registered  model**

In [None]:
from azureml.core import Model
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

**Publier le Pipeline**

In [None]:
# publie from the run as a REST Service
published_pipeline = pipeline_run.publish_pipeline(          
    name="diabetes-training-pipeline", description="Trains diabetes model", version="1.0")

# Recuperer Endpoint
rest_endpoint = published_pipeline.endpoint
print(rest_endpoint)

**Rappeler le Endpoint du pipeline**

In [None]:
# Autenthification
from azureml.core.authentication import InteractiveLoginAuthentication
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()
print("Authentication header ready.")

# Appel au Endpoint 
import requests
experiment_name = 'mslearn-diabetes-pipeline'
rest_endpoint = published_pipeline.endpoint
response = requests.post(rest_endpoint, 
                         headers=auth_header, 
                         json={"ExperimentName": experiment_name})
run_id = response.json()["Id"]
run_id

# Grace à ''run_id'' on peut voir le deroulement de notre pipeline 
from azureml.pipeline.core.run import PipelineRun
published_pipeline_run = PipelineRun(ws.experiments[experiment_name], run_id)
published_pipeline_run.wait_for_completion(show_output=True)

**Schedule Pipeline**

In [None]:
from azureml.pipeline.core import ScheduleRecurrence, Schedule
# Submit the Pipeline every Monday at 00:00 UTC
recurrence = ScheduleRecurrence(frequency="Week", interval=1, week_days=["Monday"], time_of_day="00:00")
weekly_schedule = Schedule.create(ws, name="weekly-diabetes-training", 
                                  description="Based on time",
                                  pipeline_id=published_pipeline.id, 
                                  experiment_name='mslearn-diabetes-pipeline', 
                                  recurrence=recurrence)
# Voir les schedules
schedules = Schedule.list(ws)

# Check latest run :
pipeline_experiment = ws.experiments.get('mslearn-diabetes-pipeline')
latest_run = list(pipeline_experiment.get_runs())[0]
latest_run.get_details()

**Relance déclanché par changement de données**

In [None]:
from azureml.core import Datastore
from azureml.pipeline.core import Schedule
training_datastore = Datastore(workspace=ws, name='blob_data')
pipeline_schedule = Schedule.create(ws, name='Reactive Training',
                                    description='trains model on data change',
                                    pipeline_id=published_pipeline.id,
                                    experiment_name='Training_Pipeline',
                                    datastore=training_datastore,
                                    path_on_datastore='data/training')   ###### Dossier surveillé pour la relance

# Deployer Pipelines - Temps Réel

Sur:
- calcul local
- une instance de calcul Azure Machine Learning,
- une instance ACI (Azure Container Instance),
- cluster AKS (Azure Kubernetes Services),
- une fonction Azure Functions
- module Internet des objets (IoT)

En uttilisant un **conteneur Docker**

**1 - Inscrire Model**

In [None]:
from azureml.core import Model
# Inscrire un model depuis disque local
classification_model = Model.register(workspace=ws,
                       model_name='classification_model',
                       model_path='model.pkl', # local path
                       description='A classification model')

# Inscrire un model apres un run(entrainement)
run.register_model( model_name='classification_model',
                    model_path='outputs/model.pkl', # run outputs path
                    description='A classification model')

**2 - Script d'entrée**

    init() : appelée lorsque le service est initialisé.
    run(raw_data) : appelée lorsque de nouvelles données sont envoyées au service.


In [None]:
# dans un .py
import json
import joblib
import numpy as np
import os

# Called when the service is loaded
def init():
    global model
    # Get the path to the registered model file and load it
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl')
    model = joblib.load(model_path)

# Called when a request is received
def run(raw_data):
    # Get the input data as a numpy array
    data = np.array(json.loads(raw_data)['data'])
    # Get a prediction from the model
    predictions = model.predict(data)
    # Return the predictions as any JSON serializable format
    return predictions.tolist()

**3 - Créer un environnement**

In [None]:
from azureml.core import Environment

service_env = Environment(name='service-env')
python_packages = ['scikit-learn', 'numpy'] # whatever packages your entry script uses
for package in python_packages:
    service_env.python.conda_dependencies.add_pip_package(package)

**4 - Combiner en un Inference Config**

In [None]:
from azureml.core.model import InferenceConfig

classifier_inference_config = InferenceConfig(source_directory = 'service_files',
                                              entry_script="score.py",
                                              environment=service_env)

**5 - Config de Déploiement**

Si on veux utiliser AKS

In [None]:
from azureml.core.compute import ComputeTarget, AksCompute

cluster_name = 'aks-cluster'
compute_config = AksCompute.provisioning_configuration(location='westeurop')
production_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
production_cluster.wait_for_completion(show_output=True)


from azureml.core.webservice import AksWebservice
classifier_deploy_config = AksWebservice.deploy_configuration(cpu_cores = 1,
                                                              memory_gb = 1)

Si on veux utiliser ACI

In [None]:
from azureml.core.webservice import AciWebservice
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)
#et Pour en Local : 
azureml.core.webservice.LocalWebservice

**6 - Deployer**

In [None]:
from azureml.core.model import Model
model = ws.models['classification_model']
service = Model.deploy(workspace=ws,
                       name = 'classifier-service',
                       models = [model],
                       inference_config = classifier_inference_config,
                       deployment_config = classifier_deploy_config,
                       deployment_target = production_cluster)
service.wait_for_deployment(show_output = True)


# Ou sinon 
service_name = "diabetes-service"
service = Model.deploy(ws, service_name, [model], inference_config, deployment_config, overwrite=True)

In [None]:
import json
# An array of new data cases
x_new = [[0.1,2.3,4.1,2.0],
         [0.2,1.8,3.9,2.1]]
# Convert the array to a serializable list in a JSON document
json_data = json.dumps({"data": x_new})
# Call the web service, passing the input data
response = service.run(input_data = json_data)###################  On envoi les données pour inference 
# Get the predictions
predictions = json.loads(response)
# Print the predicted class for each case.
for i in range(len(x_new)):
    print (x_new[i], predictions[i])

**Authentification**
en production  
deux types d’authentification :

    - Clé : les demandes sont authentifiées en spécifiant la clé associée au service.
    - Jeton : les demandes sont authentifiées en fournissant un jeton JWT (JSON Web Token).


In [None]:
endpoint = service.scoring_uri
print(endpoint)

**Utiliser le service dans un autre programme**

In [None]:
import requests
import json
# An array of new data cases
x_new = [[0.1,2.3,4.1,2.0],
         [0.2,1.8,3.9,2.1]]
# Convert the array to a serializable list in a JSON document
json_data = json.dumps({"data": x_new})


# Set the content type in the request headers
request_headers = { 'Content-Type':'application/json' }
# Call the service
response = requests.post(url = endpoint,
                         data = json_data,
                         headers = request_headers)


# Get the predictions from the JSON response
predictions = json.loads(response.json())

# Print the predicted class for each case.
for i in range(len(x_new)):
    print (x_new[i]), predictions[i] )

In [None]:
print(service.state)

print(service.get_logs())

**Deployer sur un conteneur en local**

In [None]:
from azureml.core.webservice import LocalWebservice

deployment_config = LocalWebservice.deploy_configuration(port=8890)
service = Model.deploy(ws, 'test-svc', [model], inference_config, deployment_config)

**Pour debuger le service :**

In [None]:
# Tester le service
print(service.run(input_data = json_data))

######
#Resolution des problemes
###### 
service.reload()
print(service.run(input_data = json_data))

# Deployer Pipelines - Inference de lots

**1 - Inscrire un Model**

In [None]:
# Model  Local 
from azureml.core import Model
classification_model = Model.register(workspace=your_workspace,
                                      model_name='classification_model',
                                      model_path='model.pkl', # local path
                                      description='A classification model')

# Model entrainé lors d'un run
run.register_model( model_name='classification_model',
                    model_path='outputs/model.pkl', # run outputs path
                    description='A classification model')

**2 - Créer un  script de scoring**  
Le service d’inférence de lot nécessite un script de scoring pour charger le modèle et l’utiliser pour prédire de nouvelles valeurs. Il doit inclure deux fonctions :

    - init() : appelée quand le pipeline est initialisé.
    - run(mini_batch) : appelée pour chaque lot de données à traiter

In [None]:
import os
import numpy as np
from azureml.core import Model
import joblib

def init():
    # Runs when the pipeline step is initialized
    global model

    # load the model
    model_path = Model.get_model_path('classification_model')
    model = joblib.load(model_path)

def run(mini_batch):
    # This runs for each batch
    resultList = []

    # process each file in the batch
    for f in mini_batch:
        # Read comma-delimited data into an array
        data = np.genfromtxt(f, delimiter=',')
        # Reshape into a 2-dimensional array for model input
        prediction = model.predict(data.reshape(1, -1))
        # Append prediction to results
        resultList.append("{}: {}".format(os.path.basename(f), prediction[0]))
    return resultList

**3 -  Créer un pipeline avec un ParallelRunStep et ParallelRunConfig**

In [None]:
from azureml.pipeline.steps import ParallelRunConfig, ParallelRunStep
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.core import Pipeline

# Get the batch dataset for input
batch_data_set = ws.datasets['batch-data']

# Set the output location
output_dir = OutputFileDatasetConfig(name='inferences')

# Define the parallel run step step configuration
parallel_run_config = ParallelRunConfig(
    source_directory='batch_scripts',
    entry_script="batch_scoring_script.py",
    mini_batch_size="5",
    error_threshold=10,
    output_action="append_row",
    environment=batch_env,
    compute_target=aml_cluster,
    node_count=4)

# Create the parallel run step
parallelrun_step = ParallelRunStep(
    name='batch-score',
    parallel_run_config=parallel_run_config,
    inputs=[batch_data_set.as_named_input('batch_data')],
    output=output_dir,
    arguments=[],
    allow_reuse=True
)
# Create the pipeline
pipeline = Pipeline(workspace=ws, steps=[parallelrun_step])

**4 - Exécuter le pipeline et récupérer la sortie de l’étape**

In [None]:
from azureml.core import Experiment

# Run the pipeline as an experiment
pipeline_run = Experiment(ws, 'batch_prediction_pipeline').submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

# Get the outputs from the first (and only) step
prediction_run = next(pipeline_run.get_children())
prediction_output = prediction_run.get_output_data('inferences')
prediction_output.download(local_path='results')

# Find the parallel_run_step.txt file
for root, dirs, files in os.walk('results'):
    for file in files:
        if file.endswith('parallel_run_step.txt'):
            result_file = os.path.join(root,file)

# Load and display the results
df = pd.read_csv(result_file, delimiter=":", header=None)
df.columns = ["File", "Prediction"]
print(df)

**5 - Publication du Pipeline**  


In [None]:
published_pipeline = pipeline_run.publish_pipeline(name='Batch_Prediction_Pipeline',
                                                   description='Batch pipeline',
                                                   version='1.0')
rest_endpoint = published_pipeline.endpoint

In [None]:
# Utiliser avec un endpoint
import requests
response = requests.post(rest_endpoint,
                         headers=auth_header,
                         json={"ExperimentName": "Batch_Prediction"})
run_id = response.json()["Id"]

In [None]:
# Planifier execution
from azureml.pipeline.core import ScheduleRecurrence, Schedule
weekly = ScheduleRecurrence(frequency='Week', interval=1)
pipeline_schedule = Schedule.create(ws, name='Weekly Predictions',
                                        description='batch inferencing',
                                        pipeline_id=published_pipeline.id,
                                        experiment_name='Batch_Prediction',
                                        recurrence=weekly)

##  Recherche d' hyperparamètres
Parametre discret :
- qnormal
- quniform
- qlognormal
- qloguniform

Paramettre continu:
- normal
- uniforme
- lognormal
- loguniform

**Espace de recherche**

In [None]:
from azureml.train.hyperdrive import choice, normal
param_space = {
                 '--batch_size': choice(16, 32, 64),
                 '--learning_rate': normal(10, 3)
              }

**Échantillonnage par grille**  GridSearchCV

In [None]:
from azureml.train.hyperdrive import GridParameterSampling, choice
param_space = {
                 '--batch_size': choice(16, 32, 64),
                 '--learning_rate': choice(0.01, 0.1, 1.0)
              }
              
param_sampling = GridParameterSampling(param_space)

**Échantillonnage par grille**  RandomSearchCV

In [None]:
from azureml.train.hyperdrive import RandomParameterSampling, choice, normal
param_space = {
                 '--batch_size': choice(16, 32, 64),
                 '--learning_rate': normal(10, 3)
              }
              
param_sampling = RandomParameterSampling(param_space)

**Echantillonage Bayesien**

In [None]:
from azureml.train.hyperdrive import BayesianParameterSampling, choice, uniform
param_space = {
                 '--batch_size': choice(16, 32, 64),
                 '--learning_rate': uniform(0.05, 0.1)
              }

param_sampling = BayesianParameterSampling(param_space)

**Early Stoping** Strategie Bandit  
arrêter une exécution si la mesure de performance cible affiche des performances inférieures d’une marge spécifiée à celles de la meilleure exécution jusqu’à présent.


In [None]:
from azureml.train.hyperdrive import BanditPolicy
early_termination_policy = BanditPolicy(slack_amount = 0.2,
                                        evaluation_interval=1,
                                        delay_evaluation=5)

**Early Stoping** Strategie Arret Mediane  
abandonne les exécutions où la mesure de performance cible est inférieure à la valeur médiane des moyennes en cours d’exécution pour toutes les exécutions.

In [None]:
from azureml.train.hyperdrive import MedianStoppingPolicy
early_termination_policy = MedianStoppingPolicy(evaluation_interval=1,
                                                delay_evaluation=5)

**Early Stoping** Strategie Sélection de Troncation  
annule les x % des exécutions les moins performantes à chaque intervalle d’évaluation en fonction de la valeur de truncation_percentage que vous spécifiez pour X.

In [None]:
from azureml.train.hyperdrive import TruncationSelectionPolicy
early_termination_policy = TruncationSelectionPolicy(truncation_percentage=10,
                                                     evaluation_interval=1,
                                                     delay_evaluation=5)

## Experiment Fine Tuning Hyperparametres

**Script de formation .py**

In [None]:
import argparse
import joblib
from azureml.core import Run
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# Get regularization hyperparameter
parser = argparse.ArgumentParser()
parser.add_argument('--regularization', type=float, dest='reg_rate', default=0.01)
args = parser.parse_args()
reg = args.reg_rate

# Get the experiment run context
run = Run.get_context()

# load the training dataset
data = run.input_datasets['training_data'].to_pandas_dataframe()

# Separate features and labels, and split for training/validatiom
X = data[['feature1','feature2','feature3','feature4']].values
y = data['label'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

# Train a logistic regression model with the reg hyperparameter
model = LogisticRegression(C=1/reg, solver="liblinear").fit(X_train, y_train)

# calculate and log accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
run.log('Accuracy', np.float(acc))

# Save the trained model
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model, filename='outputs/model.pkl')

run.complete()

**Configuration et exécution d’une expérience hyperdrive**

In [None]:
from azureml.core import Experiment
from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal

# Assumes ws, script_config and param_sampling are already defined

hyperdrive = HyperDriveConfig(run_config=script_config,
                              hyperparameter_sampling=param_sampling,
                              policy=None,
                              primary_metric_name='Accuracy',
                              primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                              max_total_runs=6,
                              max_concurrent_runs=4)

experiment = Experiment(workspace = ws, name = 'hyperdrive_training')
hyperdrive_run = experiment.submit(config=hyperdrive)

**Surveillance et examen des exécutions d’hyperdrive**

In [None]:
#  récupérer les mesures journalisées 
for child_run in run.get_children():
    print(child_run.id, child_run.get_metrics())

In [None]:
# répertorier toutes les exécutions dans l’ordre décroissant des performances
for child_run in hyperdrive_run.get_children_sorted_by_primary_metric():
    print(child_run)

In [None]:
# récupérer l’exécution la plus performante
best_run = hyperdrive_run.get_best_run_by_primary_metric()