Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding flags to expose gradient clipping args in Trainer #361

Merged
merged 10 commits into from
Aug 3, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"dataset_name": "CIFAR10",
"val_size": 0,
"class_groupings": ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9)),
"num_outputs": 10,
}

for seed in range(10):
Expand Down
10 changes: 10 additions & 0 deletions src/renate/benchmark/experimentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ def execute_experiment_job(
accelerator: defaults.SUPPORTED_ACCELERATORS_TYPE = defaults.ACCELERATOR,
devices: int = defaults.DEVICES,
deterministic_trainer: bool = True,
gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
job_name: str = defaults.JOB_NAME,
strategy: str = defaults.DISTRIBUTED_STRATEGY,
precision: str = defaults.PRECISION,
Expand Down Expand Up @@ -216,6 +218,8 @@ def execute_experiment_job(
strategy=strategy,
precision=precision,
save_state=save_state,
gradient_clip_val=gradient_clip_val,
gradient_clip_algorithm=gradient_clip_algorithm,
)
_execute_experiment_job_remotely(
job_name=job_name,
Expand All @@ -235,6 +239,8 @@ def execute_experiment_job(
accelerator=accelerator,
devices=devices,
deterministic_trainer=deterministic_trainer,
gradient_clip_val=gradient_clip_val,
gradient_clip_algorithm=gradient_clip_algorithm,
seed=seed,
requirements_file=requirements_file,
role=role,
Expand Down Expand Up @@ -267,6 +273,8 @@ def _execute_experiment_job_locally(
strategy: str,
precision: str,
save_state: bool,
gradient_clip_val: Optional[float],
gradient_clip_algorithm: Optional[str],
) -> None:
"""Runs an experiment, combining hyperparameter tuning and model for multiple updates.

Expand Down Expand Up @@ -359,6 +367,8 @@ def _execute_experiment_job_locally(
precision=precision,
strategy=strategy,
deterministic_trainer=deterministic_trainer,
gradient_clip_algorithm=gradient_clip_algorithm,
gradient_clip_val=gradient_clip_val,
)
move_to_uri(output_state_url, input_state_url)
if save_state:
Expand Down
13 changes: 13 additions & 0 deletions src/renate/cli/parsing_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,19 @@ def _standard_arguments() -> Dict[str, Dict[str, Any]]:
"argument_group": OPTIONAL_ARGS_GROUP,
"true_type": bool,
},
"gradient_clip_val": {
"type": lambda x: None if x in ["None", None] else x,
"default": defaults.GRADIENT_CLIP_VAL,
"help": "The value at which to clip gradients. None disables clipping.",
"argument_group": OPTIONAL_ARGS_GROUP,
},
"gradient_clip_algorithm": {
"type": lambda x: None if x in ["None", None] else x,
"default": defaults.GRADIENT_CLIP_ALGORITHM,
"help": "Gradient clipping algorithm to use.",
"choices": ["norm", "value", None],
"argument_group": OPTIONAL_ARGS_GROUP,
},
"prepare_data": {
"type": str,
"default": "True",
Expand Down
2 changes: 2 additions & 0 deletions src/renate/cli/run_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ def run(self):
devices=args.devices,
precision=args.precision,
strategy=args.strategy,
gradient_clip_algorithm=args.gradient_clip_algorithm,
gradient_clip_val=args.gradient_clip_val,
early_stopping_enabled=args.early_stopping,
deterministic_trainer=args.deterministic_trainer,
loss_fn=loss_fn,
Expand Down
2 changes: 2 additions & 0 deletions src/renate/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
VOLUME_SIZE = 60
DISTRIBUTED_STRATEGY = "ddp"
PRECISION = "32"
GRADIENT_CLIP_VAL = None
GRADIENT_CLIP_ALGORITHM = None

LEARNER = "ER"
INSTANCE_COUNT = 1
Expand Down
10 changes: 10 additions & 0 deletions src/renate/training/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def run_training_job(
strategy: str = defaults.DISTRIBUTED_STRATEGY,
precision: str = defaults.PRECISION,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
job_name: str = defaults.JOB_NAME,
) -> Optional[Tuner]:
"""Starts updating the model including hyperparameter optimization.
Expand Down Expand Up @@ -179,6 +181,8 @@ def run_training_job(
devices=devices,
strategy=strategy,
precision=precision,
gradient_clip_algorithm=gradient_clip_algorithm,
gradient_clip_val=gradient_clip_val,
deterministic_trainer=deterministic_trainer,
)
submit_remote_job(
Expand Down Expand Up @@ -213,6 +217,8 @@ def run_training_job(
strategy=strategy,
precision=precision,
deterministic_trainer=deterministic_trainer,
gradient_clip_algorithm=gradient_clip_algorithm,
gradient_clip_val=gradient_clip_val,
job_name=job_name,
)

Expand Down Expand Up @@ -527,6 +533,8 @@ def _execute_training_and_tuning_job_locally(
deterministic_trainer: bool,
strategy: str,
precision: str,
gradient_clip_algorithm: Optional[str],
gradient_clip_val: Optional[float],
):
"""Executes the training job locally.

Expand All @@ -547,6 +555,8 @@ def _execute_training_and_tuning_job_locally(
config_space["strategy"] = strategy
config_space["precision"] = precision
config_space["deterministic_trainer"] = deterministic_trainer
config_space["gradient_clip_val"] = gradient_clip_val
config_space["gradient_clip_algorithm"] = gradient_clip_algorithm
if input_state_url is not None:
config_space["input_state_url"] = input_state_url

Expand Down
18 changes: 17 additions & 1 deletion src/renate/updaters/avalanche/model_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
import logging
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Type
from typing import Any, Callable, Dict, List, Optional, Type, Union

import torch
import torchmetrics
Expand Down Expand Up @@ -274,6 +274,8 @@ def __init__(
precision: str = defaults.PRECISION,
seed: int = defaults.SEED,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"batch_size": batch_size,
Expand Down Expand Up @@ -306,6 +308,8 @@ def __init__(
devices=devices,
strategy=strategy,
precision=precision,
gradient_clip_val=gradient_clip_val,
gradient_clip_algorithm=gradient_clip_algorithm,
)


Expand Down Expand Up @@ -338,6 +342,8 @@ def __init__(
precision: str = defaults.PRECISION,
seed: int = defaults.SEED,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"batch_size": batch_size,
Expand Down Expand Up @@ -369,6 +375,8 @@ def __init__(
devices=devices,
strategy=strategy,
precision=precision,
gradient_clip_val=gradient_clip_val,
gradient_clip_algorithm=gradient_clip_algorithm,
)


Expand Down Expand Up @@ -402,6 +410,8 @@ def __init__(
strategy: Optional[str] = defaults.DISTRIBUTED_STRATEGY,
precision: str = defaults.PRECISION,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"batch_size": batch_size,
Expand Down Expand Up @@ -434,6 +444,8 @@ def __init__(
devices=devices,
strategy=strategy,
precision=precision,
gradient_clip_val=gradient_clip_val,
gradient_clip_algorithm=gradient_clip_algorithm,
)


Expand Down Expand Up @@ -466,6 +478,8 @@ def __init__(
precision: str = defaults.PRECISION,
seed: int = defaults.SEED,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"memory_size": memory_size,
Expand Down Expand Up @@ -497,4 +511,6 @@ def __init__(
devices=devices,
strategy=strategy,
precision=precision,
gradient_clip_val=gradient_clip_val,
gradient_clip_algorithm=gradient_clip_algorithm,
)
22 changes: 21 additions & 1 deletion src/renate/updaters/experimental/er.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
import abc
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Tuple
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import torch
import torchmetrics
Expand Down Expand Up @@ -552,6 +552,8 @@ def __init__(
precision: str = defaults.PRECISION,
seed: int = defaults.SEED,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"memory_size": memory_size,
Expand Down Expand Up @@ -590,6 +592,8 @@ def __init__(
strategy=strategy,
precision=precision,
deterministic_trainer=deterministic_trainer,
gradient_clip_algorithm=gradient_clip_algorithm,
gradient_clip_val=gradient_clip_val,
)


Expand Down Expand Up @@ -629,6 +633,8 @@ def __init__(
precision: str = defaults.PRECISION,
seed: int = defaults.SEED,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it fair to assume this is Optional[float]?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed.

gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"memory_size": memory_size,
Expand Down Expand Up @@ -668,6 +674,8 @@ def __init__(
strategy=strategy,
precision=precision,
deterministic_trainer=deterministic_trainer,
gradient_clip_algorithm=gradient_clip_algorithm,
gradient_clip_val=gradient_clip_val,
)


Expand Down Expand Up @@ -708,6 +716,8 @@ def __init__(
precision: str = defaults.PRECISION,
seed: int = defaults.SEED,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"memory_size": memory_size,
Expand Down Expand Up @@ -748,6 +758,8 @@ def __init__(
strategy=strategy,
precision=precision,
deterministic_trainer=deterministic_trainer,
gradient_clip_algorithm=gradient_clip_algorithm,
gradient_clip_val=gradient_clip_val,
)


Expand Down Expand Up @@ -791,6 +803,8 @@ def __init__(
precision: str = defaults.PRECISION,
seed: int = defaults.SEED,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"memory_size": memory_size,
Expand Down Expand Up @@ -834,6 +848,8 @@ def __init__(
strategy=strategy,
precision=precision,
deterministic_trainer=deterministic_trainer,
gradient_clip_algorithm=gradient_clip_algorithm,
gradient_clip_val=gradient_clip_val,
)


Expand Down Expand Up @@ -883,6 +899,8 @@ def __init__(
precision: str = defaults.PRECISION,
seed: int = defaults.SEED,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"memory_size": memory_size,
Expand Down Expand Up @@ -932,4 +950,6 @@ def __init__(
strategy=strategy,
precision=precision,
deterministic_trainer=deterministic_trainer,
gradient_clip_algorithm=gradient_clip_algorithm,
gradient_clip_val=gradient_clip_val,
)
6 changes: 5 additions & 1 deletion src/renate/updaters/experimental/fine_tuning.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
from functools import partial
from typing import Callable, Dict, List, Optional
from typing import Callable, Dict, List, Optional, Union

import torch
import torchmetrics
Expand Down Expand Up @@ -42,6 +42,8 @@ def __init__(
precision: str = defaults.PRECISION,
seed: int = defaults.SEED,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"batch_size": batch_size,
Expand Down Expand Up @@ -73,4 +75,6 @@ def __init__(
deterministic_trainer=deterministic_trainer,
strategy=strategy,
precision=precision,
gradient_clip_algorithm=gradient_clip_algorithm,
gradient_clip_val=gradient_clip_val,
)
6 changes: 5 additions & 1 deletion src/renate/updaters/experimental/gdumb.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Tuple
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import torch
import torchmetrics
Expand Down Expand Up @@ -132,6 +132,8 @@ def __init__(
precision: str = defaults.PRECISION,
seed: int = defaults.SEED,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"memory_size": memory_size,
Expand Down Expand Up @@ -166,4 +168,6 @@ def __init__(
strategy=strategy,
precision=precision,
deterministic_trainer=deterministic_trainer,
gradient_clip_algorithm=gradient_clip_algorithm,
gradient_clip_val=gradient_clip_val,
)
6 changes: 5 additions & 1 deletion src/renate/updaters/experimental/joint.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
import os
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Tuple
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import torch
import torchmetrics
Expand Down Expand Up @@ -121,6 +121,8 @@ def __init__(
precision: str = defaults.PRECISION,
seed: int = defaults.SEED,
deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER,
gradient_clip_val: Union[int, float, None] = defaults.GRADIENT_CLIP_VAL,
gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM,
):
learner_kwargs = {
"batch_size": batch_size,
Expand Down Expand Up @@ -151,4 +153,6 @@ def __init__(
strategy=strategy,
precision=precision,
deterministic_trainer=deterministic_trainer,
gradient_clip_algorithm=gradient_clip_algorithm,
gradient_clip_val=gradient_clip_val,
)
Loading
Loading