diff --git a/src/torchrunx/integrations/cli.py b/src/torchrunx/integrations/cli.py index ac5a616c..37350b4d 100644 --- a/src/torchrunx/integrations/cli.py +++ b/src/torchrunx/integrations/cli.py @@ -55,8 +55,8 @@ def add_torchrunx_argument_group(parser: ArgumentParser) -> None: group.add_argument( "--agent-timeout", type=int, - default=180, - help="Agent communication timeout in seconds. Default: 180.", + default=60, + help="Agent communication timeout in seconds. Default: 60.", ) group.add_argument( diff --git a/src/torchrunx/launcher.py b/src/torchrunx/launcher.py index c4a96e30..7cf21a11 100644 --- a/src/torchrunx/launcher.py +++ b/src/torchrunx/launcher.py @@ -63,7 +63,7 @@ class Launcher: Use GLOO for CPU backend. ``None`` for no process group.""" worker_timeout: int = 600 """Worker process group timeout (seconds).""" - agent_timeout: int = 180 + agent_timeout: int = 60 """Agent communication timeout (seconds).""" copy_env_vars: tuple[str, ...] = DEFAULT_ENV_VARS_FOR_COPY """Environment variables to copy from the launcher process to workers.