# __Automated M__L

In [13]:
import json
import azureml.core
from azureml.core import Workspace, Experiment, Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
import joblib 
from azureml.train.automl import AutoMLConfig
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice.aci import AciWebservice, Webservice

print("SDK version:", azureml.core.VERSION)

SDK version: 1.20.0


## __Dataset__

### Overview

The tasks is to predict parkinsons based on speech. Below are the features of this dataset:

- MDVP:Fo(Hz) - Average vocal fundamental frequency
- MDVP:Fhi(Hz) - Maximum vocal fundamental frequency
- MDVP:Flo(Hz) - Minimum vocal fundamental frequency
- MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP - Several measures of variation in fundamental frequency
- MDVP:Shimmer,MDVP:Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,MDVP:APQ,Shimmer:DDA - Several measures of variation in amplitude
- NHR,HNR - Two measures of ratio of noise to tonal components in the voice
- RPDE,D2 - Two nonlinear dynamical complexity measures
- DFA - Signal fractal scaling exponent
- spread1,spread2,PPE - Three nonlinear measures of fundamental frequency variation
- status - Health status of the subject (one) - Parkinson's, (zero) - healthy

In [14]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

quick-starts-ws-138653
aml-quickstarts-138653
southcentralus
f9d5a085-54dc-4215-9ba6-dad5d86e60a0


In [15]:
ws = Workspace.from_config()
experiment_name = 'parkinsons-automl-experiment'
experiment = Experiment(ws, experiment_name)

In [16]:
for ds in ws.datasets:
    print(ds)

parkinsons


In [17]:
data = ws.datasets["parkinsons"]
data

{
  "source": [
    "('workspaceblobstore', 'UI/02-12-2021_022751_UTC/parkinsons.csv')"
  ],
  "definition": [
    "GetDatastoreFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ],
  "registration": {
    "id": "f470273a-704d-46c1-9cfb-93567ebafa15",
    "name": "parkinsons",
    "version": 1,
    "workspace": "Workspace.create(name='quick-starts-ws-138653', subscription_id='f9d5a085-54dc-4215-9ba6-dad5d86e60a0', resource_group='aml-quickstarts-138653')"
  }
}

## __AutoML Configuration__

In [20]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.
cluster_name = "udacity-project"
# verify that cluster does not exist already
try:
    compute_cluster = ComputeTarget(ws, cluster_name)
    print("Found new cluster")
except ComputeTargetException:
    print("Create new cluster")
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)

    compute_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
    print("Computer cluster created")
    
compute_cluster.wait_for_completion(show_output=True, min_node_count=2, timeout_in_minutes=10)

Found new cluster
Succeeded.....................................................................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


## __Exploring Dataset__

The task is to predict if Parkinson's Disease was detected or not, status 1 means Parkinson's or 0 means healthy as the classification target. This dataset contains 23 voice measurements to predict Parkinson's. 

In [30]:
key = "parkinsons"
dataset = ws.datasets[key]

In [31]:
dataset.take(10).to_pandas_dataframe()

Unnamed: 0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),...,Shimmer:DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE,status
0,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,0.426,...,0.06545,0.02211,21.033,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654,1
1,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,0.626,...,0.09403,0.01929,19.085,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674,1
2,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,0.482,...,0.0827,0.01309,20.651,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634,1
3,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,0.517,...,0.08771,0.01353,20.644,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975,1
4,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,0.584,...,0.1047,0.01767,19.649,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335,1
5,120.552,131.162,113.787,0.00968,8e-05,0.00463,0.0075,0.01388,0.04701,0.456,...,0.06985,0.01222,21.378,0.415564,0.825069,-4.242867,0.299111,2.18756,0.357775,1
6,120.267,137.244,114.82,0.00333,3e-05,0.00155,0.00202,0.00466,0.01608,0.14,...,0.02337,0.00607,24.886,0.59604,0.764112,-5.634322,0.257682,1.854785,0.211756,1
7,107.332,113.84,104.315,0.0029,3e-05,0.00144,0.00182,0.00431,0.01567,0.134,...,0.02487,0.00344,26.892,0.63742,0.763262,-6.167603,0.183721,2.064693,0.163755,1
8,95.73,132.068,91.754,0.00551,6e-05,0.00293,0.00332,0.0088,0.02093,0.191,...,0.03218,0.0107,21.812,0.615551,0.773587,-5.498678,0.327769,2.322511,0.231571,1
9,95.056,120.103,91.226,0.00532,6e-05,0.00268,0.00332,0.00803,0.02838,0.255,...,0.04324,0.01022,21.862,0.547037,0.798463,-5.011879,0.325996,2.432792,0.271362,1


## __AutoML Configuration__

In [42]:
from azureml.train.automl import AutoMLConfig

automl_settings = {
    "experiment_timeout_minutes": 30,
    "iterations": 40,
    "max_concurrent_iterations": 4,
    "n_cross_validations": 3,
    "primary_metric": "accuracy"
}

automl_config = AutoMLConfig(compute_target=compute_cluster,
                            task= "classification",
                            training_data=dataset,
                            label_column_name="status",
                            enable_early_stopping=True,
                            debug_log="automl_errors.log",
                            **automl_settings
                            )

In [43]:
remote_run = experiment.submit(automl_config, show_output=True)

Running on remote.
No run_configuration provided, running on udacity-project with default configuration
Running on remote compute: udacity-project
Parent Run ID: AutoML_7edc4a83-4a64-401d-bda3-b8f3eb703f63

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Cross validation
STATUS:       DONE
DESCRIPTION:  Each iteration of the trained model was validated through cross-validation.
              
DETAILS:      
+---------------------------------+
|Number of folds                  |
|10                               |
+---------------------------------+

****************************************************************************************************

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are b