In [1]:
from azureml.core import Workspace, Experiment, Dataset, Environment
from azureml.train.estimator import Estimator, Mpi

ws = Workspace.from_config()

In [2]:
ENVIRONMENT_NAME = 'lightgbm-cli'
lgbm_env = Environment.get(ws, ENVIRONMENT_NAME)

In [3]:
training_cluster = ws.compute_targets['training-cluster']
valid_ds = ws.datasets['renewal_test_csv']
train_ds = ws.datasets['renewal_train_csv']

### 2. Creating Estimator object


In [17]:
n_nodes = 3

static_parameters = {
    '--train_data': train_ds.as_named_input('train').as_mount(),
    '--valid_data': valid_ds.as_named_input('valid').as_mount(),
    '--task': 'train',
    '--conf_file': 'train.conf',
    '--metric': 'auc,binary_logloss,binary_error,mean_absolute_error',
    '--num_machines': n_nodes,
    '--label_column': "name:i_year1_renewal_flag",
    '--num_iterations': 100,
    '--tree-learner': 'voting'
}

lgbm_est = Estimator(source_directory='../code/',
                     entry_script='train.py', 
                     script_params=parameters,
                     compute_target=training_cluster,
                     environment_definition=lgbm_env,
                     distributed_training=Mpi(),
                     node_count=n_nodes
                     )

### 3. Specifying Hyperdrive Settings
As with the previous step, the train.py script will accept [any parameters that LightGBM accepts.](https://lightgbm.readthedocs.io/en/latest/Parameters.html) These will be passed to the command line tool and run in parallel.

In [28]:
from azureml.train.hyperdrive import HyperDriveConfig, choice, uniform, randint, RandomParameterSampling, BanditPolicy, PrimaryMetricGoal

hyperdrive_params = RandomParameterSampling({
    '--learning_rate': uniform(0.01, 2),
    '--max_depth': choice(0,200,300,400),
    '--boosting': choice('gbdt', 'rf', 'dart', 'goss'),
    '--bagging_freq': randint(10),
    '--lambda_l1': uniform(0.01, 2),
    '--lambda_l2':uniform(0.01, 2)
})

etp = BanditPolicy(evaluation_interval=5, slack_factor=0.2, delay_evaluation=10)


hd_runconfig = HyperDriveConfig(estimator=lgbm_est, 
                                   hyperparameter_sampling=hyperdrive_params, 
                                   primary_metric_name='validation_auc', 
                                   primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                                   max_total_runs=20,
                                   policy=etp,
                                   max_concurrent_runs=2)

In [29]:
experiment = Experiment(ws, 'lightgbm-parallel-hyperdrive')
run = experiment.submit(hd_runconfig)

In [30]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [15]:
run.wait_for_completion(show_output=True)

RunId: lightgbm-parallel_1586375317_8e03268c
Web View: https://ml.azure.com/experiments/lightgbm-parallel/runs/lightgbm-parallel_1586375317_8e03268c?wsid=/subscriptions/dcdc374c-3ce4-4e43-92ad-10070b3b2941/resourcegroups/smart-assistant-ds/workspaces/smart-assistant-ws

Streaming azureml-logs/55_azureml-execution-tvmps_3fa1cb7b522beebafb9441aac3630f0ec0d0d06b7e9a282df5733198e21affa8_d.txt

2020-04-08T19:49:27Z Starting output-watcher...
2020-04-08T19:49:27Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_c22c7c14d44029266de3fb209ead42a1
Digest: sha256:0ffad2f61a24208a04e8d41b26ca3161d6bd0817bbb4d95ed1371c35ebce3f1b
Status: Image is up to date for smartassistaa760ea9a.azurecr.io/azureml/azureml_c22c7c14d44029266de3fb209ead42a1:latest
e5499b62ea2192ae8850eb8dbd1d6a6f89ba633311129dd150354ad8b8804c70
2020/04/08 19:49:31 Version: 3.0.01172.0001 Branch: master Commit: d33e301a
2020/04/08 19:49:31 /de

ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "User program failed with RuntimeError: Lightgbm exited with exit code: -13",
        "detailsUri": "https://aka.ms/azureml-known-errors",
        "details": [],
        "debugInfo": {
            "type": "RuntimeError",
            "message": "Lightgbm exited with exit code: -13",
            "stackTrace": "  File \"/mnt/batch/tasks/shared/LS_root/jobs/smart-assistant-ws/azureml/lightgbm-parallel_1586375317_8e03268c/mounts/workspaceblobstore/azureml/lightgbm-parallel_1586375317_8e03268c/azureml-setup/context_manager_injector.py\", line 127, in execute_with_context\n    runpy.run_path(sys.argv[0], globals(), run_name=\"__main__\")\n  File \"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/runpy.py\", line 263, in run_path\n    pkg_name=pkg_name, script_name=fname)\n  File \"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/runpy.py\", line 96, in _run_module_code\n    mod_name, mod_spec, pkg_name, script_name)\n  File \"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/runpy.py\", line 85, in _run_code\n    exec(code, run_globals)\n  File \"train.py\", line 172, in <module>\n    main()\n  File \"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/site-packages/click/core.py\", line 829, in __call__\n    return self.main(*args, **kwargs)\n  File \"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/site-packages/click/core.py\", line 782, in main\n    rv = self.invoke(ctx)\n  File \"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/site-packages/click/core.py\", line 1066, in invoke\n    return ctx.invoke(self.callback, **ctx.params)\n  File \"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/site-packages/click/core.py\", line 610, in invoke\n    return callback(*args, **kwargs)\n  File \"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/site-packages/click/decorators.py\", line 21, in new_func\n    return f(get_current_context(), *args, **kwargs)\n  File \"train.py\", line 64, in main\n    run_lgbm(command_line)\n  File \"train.py\", line 158, in run_lgbm\n    raise RuntimeError(f'Lightgbm exited with exit code: {s.returncode}')\n"
        },
        "messageParameters": {}
    },
    "time": "0001-01-01T00:00:00.000Z"
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"User program failed with RuntimeError: Lightgbm exited with exit code: -13\",\n        \"detailsUri\": \"https://aka.ms/azureml-known-errors\",\n        \"details\": [],\n        \"debugInfo\": {\n            \"type\": \"RuntimeError\",\n            \"message\": \"Lightgbm exited with exit code: -13\",\n            \"stackTrace\": \"  File \\\"/mnt/batch/tasks/shared/LS_root/jobs/smart-assistant-ws/azureml/lightgbm-parallel_1586375317_8e03268c/mounts/workspaceblobstore/azureml/lightgbm-parallel_1586375317_8e03268c/azureml-setup/context_manager_injector.py\\\", line 127, in execute_with_context\\n    runpy.run_path(sys.argv[0], globals(), run_name=\\\"__main__\\\")\\n  File \\\"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/runpy.py\\\", line 263, in run_path\\n    pkg_name=pkg_name, script_name=fname)\\n  File \\\"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/runpy.py\\\", line 96, in _run_module_code\\n    mod_name, mod_spec, pkg_name, script_name)\\n  File \\\"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/runpy.py\\\", line 85, in _run_code\\n    exec(code, run_globals)\\n  File \\\"train.py\\\", line 172, in <module>\\n    main()\\n  File \\\"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/site-packages/click/core.py\\\", line 829, in __call__\\n    return self.main(*args, **kwargs)\\n  File \\\"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/site-packages/click/core.py\\\", line 782, in main\\n    rv = self.invoke(ctx)\\n  File \\\"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/site-packages/click/core.py\\\", line 1066, in invoke\\n    return ctx.invoke(self.callback, **ctx.params)\\n  File \\\"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/site-packages/click/core.py\\\", line 610, in invoke\\n    return callback(*args, **kwargs)\\n  File \\\"/azureml-envs/azureml_f93775a230cd65f7f04349c78d61d976/lib/python3.7/site-packages/click/decorators.py\\\", line 21, in new_func\\n    return f(get_current_context(), *args, **kwargs)\\n  File \\\"train.py\\\", line 64, in main\\n    run_lgbm(command_line)\\n  File \\\"train.py\\\", line 158, in run_lgbm\\n    raise RuntimeError(f'Lightgbm exited with exit code: {s.returncode}')\\n\"\n        },\n        \"messageParameters\": {}\n    },\n    \"time\": \"0001-01-01T00:00:00.000Z\"\n}"
    }
}