### Author: Kubam Ivo
### Date: 7/18/2020

In this tutorial, run your scikit-learn training scripts at enterprise scale by using the Azure Machine Learning SKlearn estimator class.

The example scripts in this article are used to classify iris flower images to build a machine learning model based on scikit-learn's iris dataset.

In [24]:
# Importing packages

import os
import urllib
import shutil
import azureml

from azureml.core import Experiment
from azureml.core import Workspace, Run

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException


In [25]:
# Initialising Workspace

ws = Workspace.from_config()

In [26]:
# Create a machine learning experiment

project_folder = './sklearn-iris'
os.makedirs(project_folder, exist_ok=True)

experiment = Experiment(workspace=ws,name='sklearn-iris')

In [27]:
# Importing script and dataset 
shutil.copy('./train_iris.py', project_folder)
shutil.copy('./iris',project_folder)

'./sklearn-iris/iris'

In [28]:
# Create or get a compute target
cluster_name = "mlcompute"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           max_nodes=4)

    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

Found existing compute target


In [29]:
# Create a scikit -learn estimator
from azureml.train.sklearn import SKLearn

script_params = {
    '--kernel': 'linear',
    '--penalty': 1.0,
}

estimator = SKLearn(source_directory=project_folder, 
                    script_params=script_params,
                    compute_target=compute_target,
                    entry_script='train_iris.py',
                    pip_packages=['joblib==0.13.2']
                   )



In [30]:
# Submit a run
run = experiment.submit(estimator)
run.wait_for_completion(show_output=True)

RunId: sklearn-iris_1595146164_f6d591a1
Web View: https://ml.azure.com/experiments/sklearn-iris/runs/sklearn-iris_1595146164_f6d591a1?wsid=/subscriptions/a1839f8e-ad8f-4825-ab97-e9628255f2bb/resourcegroups/azureml/workspaces/ml_practice

Streaming azureml-logs/20_image_build_log.txt

2020/07/19 08:09:36 Downloading source code...
2020/07/19 08:09:37 Finished downloading source code
2020/07/19 08:09:37 Creating Docker network: acb_default_network, driver: 'bridge'
2020/07/19 08:09:38 Successfully set up Docker network: acb_default_network
2020/07/19 08:09:38 Setting up Docker configuration...
2020/07/19 08:09:38 Successfully set up Docker configuration
2020/07/19 08:09:38 Logging in to registry: mlpractice98dcf28c.azurecr.io
2020/07/19 08:09:39 Successfully logged into mlpractice98dcf28c.azurecr.io
2020/07/19 08:09:39 Executing step ID: acb_step_0. Timeout(sec): 5400, Working directory: '', Network: 'acb_default_network'
2020/07/19 08:09:39 Scanning for dependencies...
2020/07/19 08:09:

ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "User program failed with NameError: name 'null' is not defined",
        "detailsUri": "https://aka.ms/azureml-known-errors",
        "details": [],
        "debugInfo": {
            "type": "NameError",
            "message": "name 'null' is not defined",
            "stackTrace": "  File \"/mnt/batch/tasks/shared/LS_root/jobs/ml_practice/azureml/sklearn-iris_1595146164_f6d591a1/mounts/workspaceblobstore/azureml/sklearn-iris_1595146164_f6d591a1/azureml-setup/context_manager_injector.py\", line 148, in execute_with_context\n    runpy.run_path(sys.argv[0], globals(), run_name=\"__main__\")\n  File \"/azureml-envs/azureml_618d79f5f9d88bbf887dccf51537da4b/lib/python3.6/runpy.py\", line 263, in run_path\n    pkg_name=pkg_name, script_name=fname)\n  File \"/azureml-envs/azureml_618d79f5f9d88bbf887dccf51537da4b/lib/python3.6/runpy.py\", line 96, in _run_module_code\n    mod_name, mod_spec, pkg_name, script_name)\n  File \"/azureml-envs/azureml_618d79f5f9d88bbf887dccf51537da4b/lib/python3.6/runpy.py\", line 85, in _run_code\n    exec(code, run_globals)\n  File \"train_iris.py\", line 1, in <module>\n    {\"cells\":[{\"cell_type\":\"code\",\"source\":[\"# Modified from https://www.geeksforgeeks.org/multiclass-classification-using-scikit-learn/\\r\\n\",\"\\r\\n\",\"import argparse\\r\\n\",\"import os\\r\\n\",\"from azureml.core import Dataset\\r\\n\",\"# importing necessary libraries\\r\\n\",\"import numpy as np\\r\\n\",\"\\r\\n\",\"from sklearn import datasets\\r\\n\",\"from sklearn.metrics import confusion_matrix\\r\\n\",\"from sklearn.model_selection import train_test_split\\r\\n\",\"\\r\\n\",\"import joblib\\r\\n\",\"\\r\\n\",\"from azureml.core.run import Run\\r\\n\",\"run = Run.get_context()\\r\\n\",\"\\r\\n\",\"\\r\\n\",\"def main():\\r\\n\",\"    parser = argparse.ArgumentParser()\\r\\n\",\"\\r\\n\",\"    parser.add_argument('--kernel', type=str, default='linear',\\r\\n\",\"                        help='Kernel type to be used in the algorithm')\\r\\n\",\"    parser.add_argument('--penalty', type=float, default=1.0,\\r\\n\",\"                        help='Penalty parameter of the error term')\\r\\n\",\"\\r\\n\",\"    args = parser.parse_args()\\r\\n\",\"    run.log('Kernel type', np.str(args.kernel))\\r\\n\",\"    run.log('Penalty', np.float(args.penalty))\\r\\n\",\"\\r\\n\",\"    # loading the iris dataset\\r\\n\",\"    # Getting a dataset\\r\\n\",\"\\r\\n\",\"    dataset_name = 'iris'\\r\\n\",\"\\r\\n\",\"    iris = Dataset.get_by_name(workspace= ws, name=dataset_name) \\r\\n\",\"\\r\\n\",\"    #load a tabularDataset by name\\r\\n\",\"    iris_df = iris.to_pandas_dataframe()\\r\\n\",\"\\r\\n\",\"    # X -> features, y -> label\\r\\n\",\"   \\r\\n\",\"    X = iris_df.iloc[:,:3]\\r\\n\",\"    y = iris_df.iloc[:,4]\\r\\n\",\"\\r\\n\",\"    # dividing X, y into train and test data\\r\\n\",\"    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\\r\\n\",\"\\r\\n\",\"    # training a linear SVM classifier\\r\\n\",\"    from sklearn.svm import SVC\\r\\n\",\"    svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)\\r\\n\",\"    svm_predictions = svm_model_linear.predict(X_test)\\r\\n\",\"\\r\\n\",\"    # model accuracy for X_test\\r\\n\",\"    accuracy = svm_model_linear.score(X_test, y_test)\\r\\n\",\"    print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy))\\r\\n\",\"    run.log('Accuracy', np.float(accuracy))\\r\\n\",\"    # creating a confusion matrix\\r\\n\",\"    cm = confusion_matrix(y_test, svm_predictions)\\r\\n\",\"    print(cm)\\r\\n\",\"\\r\\n\",\"    os.makedirs('outputs', exist_ok=True)\\r\\n\",\"    # files saved in the \\\"outputs\\\" folder are automatically uploaded into run history\\r\\n\",\"    joblib.dump(svm_model_linear, 'outputs/model.joblib')\\r\\n\",\"\\r\\n\",\"\\r\\n\",\"if __name__ == '__main__':\\r\\n\",\"    main()\\r\\n\"],\"outputs\":[],\"execution_count\":null,\"metadata\":{}}],\"metadata\":{\"kernelspec\":{\"name\":\"python3-azureml\",\"language\":\"python\",\"display_name\":\"Python 3.6 - AzureML\"},\"language_info\":{\"name\":\"python\",\"version\":\"3.6.9\",\"mimetype\":\"text/x-python\",\"codemirror_mode\":{\"name\":\"ipython\",\"version\":3},\"pygments_lexer\":\"ipython3\",\"nbconvert_exporter\":\"python\",\"file_extension\":\".py\"},\"kernel_info\":{\"name\":\"python3-azureml\"},\"nteract\":{\"version\":\"nteract-front-end@1.0.0\"}},\"nbformat\":4,\"nbformat_minor\":2}\n"
        },
        "messageParameters": {}
    },
    "time": "0001-01-01T00:00:00.000Z"
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"User program failed with NameError: name 'null' is not defined\",\n        \"detailsUri\": \"https://aka.ms/azureml-known-errors\",\n        \"details\": [],\n        \"debugInfo\": {\n            \"type\": \"NameError\",\n            \"message\": \"name 'null' is not defined\",\n            \"stackTrace\": \"  File \\\"/mnt/batch/tasks/shared/LS_root/jobs/ml_practice/azureml/sklearn-iris_1595146164_f6d591a1/mounts/workspaceblobstore/azureml/sklearn-iris_1595146164_f6d591a1/azureml-setup/context_manager_injector.py\\\", line 148, in execute_with_context\\n    runpy.run_path(sys.argv[0], globals(), run_name=\\\"__main__\\\")\\n  File \\\"/azureml-envs/azureml_618d79f5f9d88bbf887dccf51537da4b/lib/python3.6/runpy.py\\\", line 263, in run_path\\n    pkg_name=pkg_name, script_name=fname)\\n  File \\\"/azureml-envs/azureml_618d79f5f9d88bbf887dccf51537da4b/lib/python3.6/runpy.py\\\", line 96, in _run_module_code\\n    mod_name, mod_spec, pkg_name, script_name)\\n  File \\\"/azureml-envs/azureml_618d79f5f9d88bbf887dccf51537da4b/lib/python3.6/runpy.py\\\", line 85, in _run_code\\n    exec(code, run_globals)\\n  File \\\"train_iris.py\\\", line 1, in <module>\\n    {\\\"cells\\\":[{\\\"cell_type\\\":\\\"code\\\",\\\"source\\\":[\\\"# Modified from https://www.geeksforgeeks.org/multiclass-classification-using-scikit-learn/\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"import argparse\\\\r\\\\n\\\",\\\"import os\\\\r\\\\n\\\",\\\"from azureml.core import Dataset\\\\r\\\\n\\\",\\\"# importing necessary libraries\\\\r\\\\n\\\",\\\"import numpy as np\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"from sklearn import datasets\\\\r\\\\n\\\",\\\"from sklearn.metrics import confusion_matrix\\\\r\\\\n\\\",\\\"from sklearn.model_selection import train_test_split\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"import joblib\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"from azureml.core.run import Run\\\\r\\\\n\\\",\\\"run = Run.get_context()\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"def main():\\\\r\\\\n\\\",\\\"    parser = argparse.ArgumentParser()\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"    parser.add_argument('--kernel', type=str, default='linear',\\\\r\\\\n\\\",\\\"                        help='Kernel type to be used in the algorithm')\\\\r\\\\n\\\",\\\"    parser.add_argument('--penalty', type=float, default=1.0,\\\\r\\\\n\\\",\\\"                        help='Penalty parameter of the error term')\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"    args = parser.parse_args()\\\\r\\\\n\\\",\\\"    run.log('Kernel type', np.str(args.kernel))\\\\r\\\\n\\\",\\\"    run.log('Penalty', np.float(args.penalty))\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"    # loading the iris dataset\\\\r\\\\n\\\",\\\"    # Getting a dataset\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"    dataset_name = 'iris'\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"    iris = Dataset.get_by_name(workspace= ws, name=dataset_name) \\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"    #load a tabularDataset by name\\\\r\\\\n\\\",\\\"    iris_df = iris.to_pandas_dataframe()\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"    # X -> features, y -> label\\\\r\\\\n\\\",\\\"   \\\\r\\\\n\\\",\\\"    X = iris_df.iloc[:,:3]\\\\r\\\\n\\\",\\\"    y = iris_df.iloc[:,4]\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"    # dividing X, y into train and test data\\\\r\\\\n\\\",\\\"    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"    # training a linear SVM classifier\\\\r\\\\n\\\",\\\"    from sklearn.svm import SVC\\\\r\\\\n\\\",\\\"    svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)\\\\r\\\\n\\\",\\\"    svm_predictions = svm_model_linear.predict(X_test)\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"    # model accuracy for X_test\\\\r\\\\n\\\",\\\"    accuracy = svm_model_linear.score(X_test, y_test)\\\\r\\\\n\\\",\\\"    print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy))\\\\r\\\\n\\\",\\\"    run.log('Accuracy', np.float(accuracy))\\\\r\\\\n\\\",\\\"    # creating a confusion matrix\\\\r\\\\n\\\",\\\"    cm = confusion_matrix(y_test, svm_predictions)\\\\r\\\\n\\\",\\\"    print(cm)\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"    os.makedirs('outputs', exist_ok=True)\\\\r\\\\n\\\",\\\"    # files saved in the \\\\\\\"outputs\\\\\\\" folder are automatically uploaded into run history\\\\r\\\\n\\\",\\\"    joblib.dump(svm_model_linear, 'outputs/model.joblib')\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"\\\\r\\\\n\\\",\\\"if __name__ == '__main__':\\\\r\\\\n\\\",\\\"    main()\\\\r\\\\n\\\"],\\\"outputs\\\":[],\\\"execution_count\\\":null,\\\"metadata\\\":{}}],\\\"metadata\\\":{\\\"kernelspec\\\":{\\\"name\\\":\\\"python3-azureml\\\",\\\"language\\\":\\\"python\\\",\\\"display_name\\\":\\\"Python 3.6 - AzureML\\\"},\\\"language_info\\\":{\\\"name\\\":\\\"python\\\",\\\"version\\\":\\\"3.6.9\\\",\\\"mimetype\\\":\\\"text/x-python\\\",\\\"codemirror_mode\\\":{\\\"name\\\":\\\"ipython\\\",\\\"version\\\":3},\\\"pygments_lexer\\\":\\\"ipython3\\\",\\\"nbconvert_exporter\\\":\\\"python\\\",\\\"file_extension\\\":\\\".py\\\"},\\\"kernel_info\\\":{\\\"name\\\":\\\"python3-azureml\\\"},\\\"nteract\\\":{\\\"version\\\":\\\"nteract-front-end@1.0.0\\\"}},\\\"nbformat\\\":4,\\\"nbformat_minor\\\":2}\\n\"\n        },\n        \"messageParameters\": {}\n    },\n    \"time\": \"0001-01-01T00:00:00.000Z\"\n}"
    }
}

In [None]:
## Tune Model Hyperparamenters

from azureml.train.hyperdrive.runconfig import HyperDriveRunConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice

param_sampling = RandomParameterSampling({
    "--Kernel": choice('linear', 'rbf', 'poly', 'sigmoid'),
    "--penalty": choice(0.5,1, 1.5)
})

hyperdrive_run_config = HyperDriveRunConfig(estimator=estimator, 
                                            hyperparameter_sampling = param_sampling,
                                            primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
                                            max_total_runs = 12,
                                            max_concurrent_runs = 4)

In [None]:
# Start the HyperDrive run
hyperdrive_run = experiment.submit(hyperdrive_run_config)