From cb9112b57df5b5f0a47319ca2434aeff9eaa970c Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Tue, 23 Mar 2021 01:53:00 +0200 Subject: [PATCH 1/7] typings & few docs --- catalyst/core/engine.py | 44 ++++++++++++++++++++++++++------------- catalyst/engines/torch.py | 31 +++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/catalyst/core/engine.py b/catalyst/core/engine.py index 567cf70253..ea92d4e616 100644 --- a/catalyst/core/engine.py +++ b/catalyst/core/engine.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Any, Dict, Generator from abc import ABC, abstractmethod from contextlib import contextmanager @@ -6,7 +6,7 @@ @contextmanager -def nullcontext(enter_result=None): +def nullcontext(enter_result: Any = None) -> Generator[Any]: """Context handler.""" yield enter_result @@ -37,8 +37,7 @@ def rank(self) -> int: @property @abstractmethod def world_size(self) -> int: - """Process world size for distributed training.""" - # only for ddp + """Process world size for distributed training.""" pass @property @@ -49,26 +48,28 @@ def is_ddp(self) -> bool: @property def is_master_process(self) -> bool: """Checks if a process is master process. - Should be implemented only for DDP setup in other cases should always return True. + Should be implemented only for distributed training (ddp). + For non distributed training should always return `True`. Returns: - `True` if current process is a master process, otherwise `False`. + `True` if current process is a master process in other cases return `False`. """ return True @property def is_worker_process(self) -> bool: """Checks if a process is worker process. - Should be implemented only for DDP setup in other cases should always return False. + Should be implemented only for distributed training (ddp). + For non distributed training should always return `False`. Returns: - `True` if current process is a worker process, otherwise `False`. + `True` if current process is a worker process in other cases return `False`. """ return False @abstractmethod def sync_device(self, tensor_or_module: Any) -> Any: - """Moves ``tensor_or_module`` to Engine's deivce. + """Moves ``tensor_or_module`` to Engine's device. Args: tensor_or_module: tensor to mode @@ -89,23 +90,35 @@ def init_components( @abstractmethod def deinit_components(self): - """Deinits the runs components.""" - # only for ddp + """Deinits the runs components. + In distributed mode should destroy process group. + """ pass @abstractmethod def zero_grad(self, loss, model, optimizer) -> None: - """Abstraction over ``model.zero_grad()`` step.""" + """Abstraction over ``model.zero_grad()`` step. + Should be overloaded in cases when required to set arguments + for ``model.zero_grad()`` like `set_to_none=True` or + you need to use custom scheme which replaces/improves + `.zero_grad()` method. + """ pass @abstractmethod def backward_loss(self, loss, model, optimizer) -> None: - """Abstraction over ``loss.backward()`` step.""" + """Abstraction over ``loss.backward()`` step. + Should be overloaded in cases when required loss scaling. + Examples - APEX and AMP. + """ pass @abstractmethod def optimizer_step(self, loss, model, optimizer) -> None: - """Abstraction over ``optimizer.step()`` step.""" + """Abstraction over ``optimizer.step()`` step. + Should be overloaded in cases when required gradient scaling. + Example - AMP. + """ pass @abstractmethod @@ -174,7 +187,8 @@ def load_checkpoint(self, path: str) -> Dict: pass def autocast(self, *args, **kwargs): - """AMP scaling context. Default autocast context does not scale anything. + """AMP scaling context. + Default autocast context does not scale anything. Args: *args: some args diff --git a/catalyst/engines/torch.py b/catalyst/engines/torch.py index 0a8f135bcd..beb7939416 100644 --- a/catalyst/engines/torch.py +++ b/catalyst/engines/torch.py @@ -23,6 +23,37 @@ class DeviceEngine(IEngine): Args: device (str, optional): use device, default is `"cpu"`. + + Runner example: + + .. code-block:: python + + from catalyst import dl + + class MyRunner(dl.IRunner): + # ... + def get_engine(self): + return dl.DeviceEngine("cuda:1") + # ... + + Config example: + + .. code-block:: yaml + + args: + logs: ... + + model: + _target_: ... + ... + + engine: + _target_: DeviceEngine + device: cuda:1 + + stages: + ... + """ def __init__(self, device: str = None): From 3738a947ac07d6122da51422a021706364689d5c Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Tue, 23 Mar 2021 02:10:31 +0200 Subject: [PATCH 2/7] typing fix; disabled `dist.barrier()` in optimizer step for ddp --- catalyst/core/engine.py | 4 ++-- catalyst/engines/torch.py | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/catalyst/core/engine.py b/catalyst/core/engine.py index ea92d4e616..cf832c15a0 100644 --- a/catalyst/core/engine.py +++ b/catalyst/core/engine.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Generator +from typing import Any, Dict from abc import ABC, abstractmethod from contextlib import contextmanager @@ -6,7 +6,7 @@ @contextmanager -def nullcontext(enter_result: Any = None) -> Generator[Any]: +def nullcontext(enter_result: Any = None): """Context handler.""" yield enter_result diff --git a/catalyst/engines/torch.py b/catalyst/engines/torch.py index beb7939416..f885d78e58 100644 --- a/catalyst/engines/torch.py +++ b/catalyst/engines/torch.py @@ -71,7 +71,7 @@ def rank(self) -> int: @property def world_size(self) -> int: - """Process world size for distributed training.""" + """Process world size for distributed training.""" return 1 def sync_device( @@ -293,7 +293,14 @@ def is_worker_process(self) -> bool: return self._rank > 0 def setup_process(self, rank: int = -1, world_size: int = 1): - """Initialize DDP variables and processes.""" + """Initialize DDP variables and processes. + + Args: + rank: process rank. Default is `-1`. + world_size: number of devices in netwok to expect for train. + Default is `1`. + + """ self._rank = rank self._world_size = world_size os.environ["MASTER_ADDR"] = str(self.address) @@ -367,7 +374,7 @@ def backward_loss(self, loss, model, optimizer) -> None: def optimizer_step(self, loss, model, optimizer) -> None: """Abstraction over ``optimizer.step()`` step.""" optimizer.step() - dist.barrier() + # dist.barrier() __all__ = ["DeviceEngine", "DataParallelEngine", "DistributedDataParallelEngine"] From cc212939ee9aaecc1dc3de0e88e5b178cbc7ff3f Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Fri, 26 Mar 2021 01:30:42 +0200 Subject: [PATCH 3/7] docs --- catalyst/engines/amp.py | 97 ++++++++++++++++++++++-- catalyst/engines/apex.py | 153 ++++++++++++++++++++++++++++++++------ catalyst/engines/torch.py | 72 ++++++++++++++++-- 3 files changed, 285 insertions(+), 37 deletions(-) diff --git a/catalyst/engines/amp.py b/catalyst/engines/amp.py index ac4e940c60..2330ad2493 100644 --- a/catalyst/engines/amp.py +++ b/catalyst/engines/amp.py @@ -10,6 +10,35 @@ class AMPEngine(DeviceEngine): Args: device: used device, default is `"cuda"`. + + Examples: + + .. code-block:: python + + from catalyst import dl + + class MyRunner(dl.IRunner): + # ... + def get_engine(self): + return dl.AMPEngine("cuda:1") + # ... + + .. code-block:: yaml + + args: + logs: ... + + model: + _target_: ... + ... + + engine: + _target_: AMPEngine + device: cuda:1 + + stages: + ... + """ def __init__(self, device: str = "cuda"): @@ -36,7 +65,36 @@ def autocast(self): class DataParallelAMPEngine(AMPEngine): - """AMP multi-gpu training device engine.""" + """AMP multi-gpu training device engine. + + Examples: + + .. code-block:: python + + from catalyst import dl + + class MyRunner(dl.IRunner): + # ... + def get_engine(self): + return dl.DataParallelAMPEngine() + # ... + + .. code-block:: yaml + + args: + logs: ... + + model: + _target_: ... + ... + + engine: + _target_: DataParallelAMPEngine + + stages: + ... + + """ def __init__(self): """Init.""" @@ -71,10 +129,39 @@ class DistributedDataParallelAMPEngine(DistributedDataParallelEngine): """Distributed AMP multi-gpu training device engine. Args: - address: process address to use (required for PyTorch backend), default is `"localhost"`. - port: process port to listen (required for PyTorch backend), default is `"12345"`. - backend: multiprocessing backend to use, default is `"nccl"`. - world_size: number of processes. + address (str): process address to use (required for PyTorch backend), default is `"localhost"`. + port (str or int): process port to listen (required for PyTorch backend), default is `"12345"`. + backend (str): multiprocessing backend to use, default is `"nccl"`. + world_size (int): number of processes. + + Examples: + + .. code-block:: python + + from catalyst import dl + + class MyRunner(dl.IRunner): + # ... + def get_engine(self): + return dl.DistributedDataParallelAMPEngine(port=12345) + # ... + + .. code-block:: yaml + + args: + logs: ... + + model: + _target_: ... + ... + + engine: + _target_: DistributedDataParallelAMPEngine + port: 12345 + + stages: + ... + """ def __init__( diff --git a/catalyst/engines/apex.py b/catalyst/engines/apex.py index 0edd6dcd4c..e9369effd8 100644 --- a/catalyst/engines/apex.py +++ b/catalyst/engines/apex.py @@ -125,24 +125,56 @@ class APEXEngine(DeviceEngine): """Apex single training device engine. Args: - device: use device, default is `"cuda"`. - opt_level: optimization level, should be one of "O0", "O1", "O2", "O3" or "O4". + device (str of int): use device, default is `"cuda"`. + opt_level (str): optimization level, should be one of ``"O0"``, + ``"O1"``, ``"O2"`` or ``"O3"``. - - "O0" - no-op training - - "O1" - mixed precision (FP16) training (default) - - "O2" - "almost" mixed precision training - - "O3" - another implementation of mixed precision training + - ``"O0"`` - no-op training + - ``"O1"`` - mixed precision (FP16) training (default) + - ``"O2"`` - "almost" mixed precision training + - ``"O3"`` - another implementation of mixed precision training Details about levels can be found here: https://nvidia.github.io/apex/amp.html#opt-levels - keep_batchnorm_fp32: To enhance precision and enable cudnn batchnorm + keep_batchnorm_fp32 (bool): To enhance precision and enable CUDNN batchnorm (which improves performance), it’s often beneficial to keep batchnorm weights in FP32 even if the rest of the model is FP16. - loss_scale: If loss_scale is a float value, - use this value as the static (fixed) loss scale. If loss_scale is the string "dynamic", + loss_scale (float or str): If loss_scale is a float value, + use this value as the static (fixed) loss scale + If loss_scale is the string "dynamic", adaptively adjust the loss scale over time. Dynamic loss scale adjustments are performed by Amp automatically. + + Examples: + + .. code-block:: python + + from catalyst import dl + + class MyRunner(dl.IRunner): + # ... + def get_engine(self): + return dl.APEXEngine(opt_level="O1", keep_batchnorm_fp32=False) + # ... + + .. code-block:: yaml + + args: + logs: ... + + model: + _target_: ... + ... + + engine: + _target_: APEXEngine + opt_level: O1 + keep_batchnorm_fp32: false + + stages: + ... + """ def __init__( @@ -264,7 +296,49 @@ def unpack_checkpoint( class DataParallelApexEngine(APEXEngine): - """Apex multi-gpu training device engine.""" + """Apex multi-gpu training device engine. + + Args: + opt_level (str): optimization level, should be one of ``"O0"``, + ``"O1"``, ``"O2"`` or ``"O3"``. + + - ``"O0"`` - no-op training + - ``"O1"`` - mixed precision (FP16) training (default) + - ``"O2"`` - "almost" mixed precision training + - ``"O3"`` - another implementation of mixed precision training + + Details about levels can be found here: + https://nvidia.github.io/apex/amp.html#opt-levels + + Examples: + + .. code-block:: python + + from catalyst import dl + + class MyRunner(dl.IRunner): + # ... + def get_engine(self): + return dl.DataParallelApexEngine(opt_level="O1") + # ... + + .. code-block:: yaml + + args: + logs: ... + + model: + _target_: ... + ... + + engine: + _target_: DataParallelApexEngine + opt_level: O1 + + stages: + ... + + """ def __init__(self, opt_level: str = "O1"): """Init.""" @@ -303,29 +377,60 @@ class DistributedDataParallelApexEngine(DistributedDataParallelEngine): """Distributed Apex MultiGPU training device engine. Args: - address: process address to use (required for PyTorch backend), default is `"localhost"`. - port: process port to listen (required for PyTorch backend), default is `"12345"`. - backend: multiprocessing backend to use, default is `"nccl"`. - world_size: number of processes. - opt_level: optimization level, should be one of "O0", "O1", "O2", "O3" or "O4". - - - "O0" - no-op training - - "O1" - mixed precision (FP16) training (default) - - "O2" - "almost" mixed precision training - - "O3" - another implementation of mixed precision training + address (str): process address to use (required for PyTorch backend), default is `"localhost"`. + port (str or int): process port to listen (required for PyTorch backend), default is `"12345"`. + backend (str): multiprocessing backend to use, default is `"nccl"`. + world_size (int): number of processes. + opt_level (str): optimization level, should be one of ``"O0"``, + ``"O1"``, ``"O2"`` or ``"O3"``. + + - ``"O0"`` - no-op training + - ``"O1"`` - mixed precision (FP16) training (default) + - ``"O2"`` - "almost" mixed precision training + - ``"O3"`` - another implementation of mixed precision training Details about levels can be found here: https://nvidia.github.io/apex/amp.html#opt-levels - keep_batchnorm_fp32: To enhance precision and enable cudnn batchnorm + keep_batchnorm_fp32 (bool): To enhance precision and enable cudnn batchnorm (which improves performance), it’s often beneficial to keep batchnorm weights in FP32 even if the rest of the model is FP16. - loss_scale: If loss_scale is a float value, - use this value as the static (fixed) loss scale. If loss_scale is the string "dynamic", + loss_scale (float or str): If loss_scale is a float value, + use this value as the static (fixed) loss scale. + If loss_scale is the string "dynamic", adaptively adjust the loss scale over time. Dynamic loss scale adjustments are performed by Amp automatically. - delay_all_reduce: boolean flag for delayed all reduce + delay_all_reduce (bool): boolean flag for delayed all reduce, default is `True`. + + Examples: + + .. code-block:: python + + from catalyst import dl + + class MyRunner(dl.IRunner): + # ... + def get_engine(self): + return dl.DistributedDataParallelApexEngine(port=12345, opt_level="O1") + # ... + + .. code-block:: yaml + + args: + logs: ... + + model: + _target_: ... + ... + + engine: + _target_: DistributedDataParallelApexEngine + port: 12345 + opt_level: O1 + + stages: + ... """ def __init__( diff --git a/catalyst/engines/torch.py b/catalyst/engines/torch.py index f885d78e58..40149746b0 100644 --- a/catalyst/engines/torch.py +++ b/catalyst/engines/torch.py @@ -24,7 +24,7 @@ class DeviceEngine(IEngine): Args: device (str, optional): use device, default is `"cpu"`. - Runner example: + Examples: .. code-block:: python @@ -36,8 +36,6 @@ def get_engine(self): return dl.DeviceEngine("cuda:1") # ... - Config example: - .. code-block:: yaml args: @@ -198,7 +196,36 @@ def load_checkpoint(self, path: str): class DataParallelEngine(DeviceEngine): - """MultiGPU training device engine.""" + """MultiGPU training device engine. + + Examples: + + .. code-block:: python + + from catalyst import dl + + class MyRunner(dl.IRunner): + # ... + def get_engine(self): + return dl.DataParallelEngine() + # ... + + .. code-block:: yaml + + args: + logs: ... + + model: + _target_: ... + ... + + engine: + _target_: DataParallelEngine + + stages: + ... + + """ def __init__(self): """Init""" @@ -233,10 +260,39 @@ class DistributedDataParallelEngine(DeviceEngine): """Distributed MultiGPU training device engine. Args: - address: process address to use (required for PyTorch backend), default is `"localhost"`. - port: process port to listen (required for PyTorch backend), default is `"12345"`. - backend: multiprocessing backend to use, default is `"nccl"`. - world_size: number of processes. + address (str): process address to use (required for PyTorch backend), default is `"localhost"`. + port (str or int): process port to listen (required for PyTorch backend), default is `"12345"`. + backend (str): multiprocessing backend to use, default is `"nccl"`. + world_size (int): number of processes. + + Examples: + + .. code-block:: python + + from catalyst import dl + + class MyRunner(dl.IRunner): + # ... + def get_engine(self): + return dl.DistributedDataParallelEngine(port=12345) + # ... + + .. code-block:: yaml + + args: + logs: ... + + model: + _target_: ... + ... + + engine: + _target_: DistributedDataParallelEngine + port: 12345 + + stages: + ... + """ def __init__( From d701e9cc2e2f4ae7c903fb4a1a2b6f0148bdd120 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sun, 28 Mar 2021 20:11:57 +0300 Subject: [PATCH 4/7] docs: fixed long lines with docs --- catalyst/engines/amp.py | 11 +++++++---- catalyst/engines/apex.py | 37 ++++++++++++++++++++++--------------- catalyst/engines/torch.py | 13 ++++++++----- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/catalyst/engines/amp.py b/catalyst/engines/amp.py index 52191ce334..1d0f850e3b 100644 --- a/catalyst/engines/amp.py +++ b/catalyst/engines/amp.py @@ -133,10 +133,13 @@ class DistributedDataParallelAMPEngine(DistributedDataParallelEngine): """Distributed AMP multi-gpu training device engine. Args: - address (str): process address to use (required for PyTorch backend), default is `"localhost"`. - port (str or int): process port to listen (required for PyTorch backend), default is `"12345"`. - backend (str): multiprocessing backend to use, default is `"nccl"`. - world_size (int): number of processes. + address: process address to use + (required for PyTorch backend), default is `"localhost"`. + port: process port to listen + (required for PyTorch backend), default is `"12345"`. + backend: multiprocessing backend to use, + default is `"nccl"`. + world_size: number of processes. Examples: diff --git a/catalyst/engines/apex.py b/catalyst/engines/apex.py index e09ab5e7e7..c46609afad 100644 --- a/catalyst/engines/apex.py +++ b/catalyst/engines/apex.py @@ -125,8 +125,8 @@ class APEXEngine(DeviceEngine): """Apex single training device engine. Args: - device (str of int): use device, default is `"cuda"`. - opt_level (str): optimization level, should be one of ``"O0"``, + device: use device, default is `"cuda"`. + opt_level: optimization level, should be one of ``"O0"``, ``"O1"``, ``"O2"`` or ``"O3"``. - ``"O0"`` - no-op training @@ -136,11 +136,11 @@ class APEXEngine(DeviceEngine): Details about levels can be found here: https://nvidia.github.io/apex/amp.html#opt-levels - keep_batchnorm_fp32 (bool): To enhance precision and enable CUDNN batchnorm + keep_batchnorm_fp32: To enhance precision and enable CUDNN batchnorm (which improves performance), it’s often beneficial to keep batchnorm weights in FP32 even if the rest of the model is FP16. - loss_scale (float or str): If loss_scale is a float value, + loss_scale: If loss_scale is a float value, use this value as the static (fixed) loss scale If loss_scale is the string "dynamic", adaptively adjust the loss scale over time. @@ -299,7 +299,7 @@ class DataParallelApexEngine(APEXEngine): """Apex multi-gpu training device engine. Args: - opt_level (str): optimization level, should be one of ``"O0"``, + opt_level: optimization level, should be one of ``"O0"``, ``"O1"``, ``"O2"`` or ``"O3"``. - ``"O0"`` - no-op training @@ -377,11 +377,14 @@ class DistributedDataParallelApexEngine(DistributedDataParallelEngine): """Distributed Apex MultiGPU training device engine. Args: - address (str): process address to use (required for PyTorch backend), default is `"localhost"`. - port (str or int): process port to listen (required for PyTorch backend), default is `"12345"`. - backend (str): multiprocessing backend to use, default is `"nccl"`. - world_size (int): number of processes. - opt_level (str): optimization level, should be one of ``"O0"``, + address: process address to use + (required for PyTorch backend), default is `"localhost"`. + port: process port to listen + (required for PyTorch backend), default is `"12345"`. + backend: multiprocessing backend to use, + default is `"nccl"`. + world_size: number of processes. + opt_level: optimization level, should be one of ``"O0"``, ``"O1"``, ``"O2"`` or ``"O3"``. - ``"O0"`` - no-op training @@ -392,16 +395,17 @@ class DistributedDataParallelApexEngine(DistributedDataParallelEngine): Details about levels can be found here: https://nvidia.github.io/apex/amp.html#opt-levels - keep_batchnorm_fp32 (bool): To enhance precision and enable cudnn batchnorm - (which improves performance), + keep_batchnorm_fp32: To enhance precision and + enable CUDNN batchnorm (which improves performance), it’s often beneficial to keep batchnorm weights in FP32 even if the rest of the model is FP16. - loss_scale (float or str): If loss_scale is a float value, + loss_scale: If loss_scale is a float value, use this value as the static (fixed) loss scale. If loss_scale is the string "dynamic", adaptively adjust the loss scale over time. Dynamic loss scale adjustments are performed by Amp automatically. - delay_all_reduce (bool): boolean flag for delayed all reduce, default is `True`. + delay_all_reduce (bool): boolean flag for delayed all reduce, + default is `True`. Examples: @@ -412,7 +416,10 @@ class DistributedDataParallelApexEngine(DistributedDataParallelEngine): class MyRunner(dl.IRunner): # ... def get_engine(self): - return dl.DistributedDataParallelApexEngine(port=12345, opt_level="O1") + return dl.DistributedDataParallelApexEngine( + port=12345, + opt_level="O1" + ) # ... .. code-block:: yaml diff --git a/catalyst/engines/torch.py b/catalyst/engines/torch.py index 2b7f48778e..fc34bb7e48 100644 --- a/catalyst/engines/torch.py +++ b/catalyst/engines/torch.py @@ -22,7 +22,7 @@ class DeviceEngine(IEngine): """Single training device engine. Args: - device (str, optional): use device, default is `"cpu"`. + device: use device, default is `"cpu"`. Examples: @@ -264,10 +264,13 @@ class DistributedDataParallelEngine(DeviceEngine): """Distributed MultiGPU training device engine. Args: - address (str): process address to use (required for PyTorch backend), default is `"localhost"`. - port (str or int): process port to listen (required for PyTorch backend), default is `"12345"`. - backend (str): multiprocessing backend to use, default is `"nccl"`. - world_size (int): number of processes. + address: process address to use + (required for PyTorch backend), default is `"localhost"`. + port: process port to listen + (required for PyTorch backend), default is `"12345"`. + backend: multiprocessing backend to use, + default is `"nccl"`. + world_size: number of processes. Examples: From 8d4a73d493042221e977556941642e40302ad8c0 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sun, 28 Mar 2021 20:15:26 +0300 Subject: [PATCH 5/7] docs fixes --- catalyst/core/engine.py | 10 ++++++++++ catalyst/engines/torch.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/catalyst/core/engine.py b/catalyst/core/engine.py index cf832c15a0..9fb0241ecf 100644 --- a/catalyst/core/engine.py +++ b/catalyst/core/engine.py @@ -102,6 +102,11 @@ def zero_grad(self, loss, model, optimizer) -> None: for ``model.zero_grad()`` like `set_to_none=True` or you need to use custom scheme which replaces/improves `.zero_grad()` method. + + Args: + loss: tensor with loss value. + model: model module. + optimizer: model optimizer. """ pass @@ -110,6 +115,11 @@ def backward_loss(self, loss, model, optimizer) -> None: """Abstraction over ``loss.backward()`` step. Should be overloaded in cases when required loss scaling. Examples - APEX and AMP. + + Args: + loss: tensor with loss value. + model: model module. + optimizer: model optimizer. """ pass diff --git a/catalyst/engines/torch.py b/catalyst/engines/torch.py index fc34bb7e48..d843665f4b 100644 --- a/catalyst/engines/torch.py +++ b/catalyst/engines/torch.py @@ -357,7 +357,7 @@ def is_worker_process(self) -> bool: def setup_process(self, rank: int = -1, world_size: int = 1): """Initialize DDP variables and processes. - + Args: rank: process rank. Default is `-1`. world_size: number of devices in netwok to expect for train. From fbfed7a72bfc180cc82c59c14908cf2ff33bd662 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sun, 28 Mar 2021 20:19:58 +0300 Subject: [PATCH 6/7] optimizer args --- catalyst/core/engine.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/catalyst/core/engine.py b/catalyst/core/engine.py index 9fb0241ecf..a59c6ba11a 100644 --- a/catalyst/core/engine.py +++ b/catalyst/core/engine.py @@ -128,6 +128,11 @@ def optimizer_step(self, loss, model, optimizer) -> None: """Abstraction over ``optimizer.step()`` step. Should be overloaded in cases when required gradient scaling. Example - AMP. + + Args: + loss: tensor with loss value. + model: model module. + optimizer: model optimizer. """ pass From 5991ce7642488f9f78c42c9aff3e285550e68f2c Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sun, 28 Mar 2021 20:21:05 +0300 Subject: [PATCH 7/7] removed empty line --- catalyst/engines/torch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/catalyst/engines/torch.py b/catalyst/engines/torch.py index d843665f4b..176c9d1c34 100644 --- a/catalyst/engines/torch.py +++ b/catalyst/engines/torch.py @@ -362,7 +362,6 @@ def setup_process(self, rank: int = -1, world_size: int = 1): rank: process rank. Default is `-1`. world_size: number of devices in netwok to expect for train. Default is `1`. - """ self._rank = rank self._world_size = world_size