From 2dd5b9e3254ea4a771fe0cda7b65fd07e08d8765 Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Wed, 10 Jul 2019 12:31:00 -0700 Subject: [PATCH] update API doc for single-node distributed training --- python/sparkdl/horovod/runner_base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/sparkdl/horovod/runner_base.py b/python/sparkdl/horovod/runner_base.py index 7435706d..f668b1cb 100644 --- a/python/sparkdl/horovod/runner_base.py +++ b/python/sparkdl/horovod/runner_base.py @@ -47,7 +47,7 @@ def __init__(self, np): which maps to a GPU on a GPU cluster or a CPU core on a CPU cluster. Accepted values are: - - If -1, this will spawn a subprocess on the driver node to run the Horovod job locally. + - If <0, this will spawn -np subprocesses on the driver node to run Horovod locally. Training stdout and stderr messages go to the notebook cell output, and are also available in driver logs in case the cell output is truncated. This is useful for debugging and we recommend testing your code under this mode first. However, be @@ -63,8 +63,6 @@ def __init__(self, np): - If 0, this will use all task slots on the cluster to launch the job. """ self.num_processor = np - if self.num_processor < -1: - raise ValueError("Invalid number of processes: np = %s" % str(self.num_processor)) def run(self, main, **kwargs): """