diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 6f56e0b1..deb80e8f 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -89,11 +89,7 @@ def load_config(scenario_file, model_file, updater_file, dataset_file): for seed in range(args.num_repetitions): if args.backend == "local": experiment_outputs_url = ( - Path("tmp") - / "renate-integration-tests" - / args.test_suite - / args.job_name - / str(seed) + Path("tmp") / "renate-integration-tests" / args.job_name / str(seed) ) role = None working_directory = str(Path("tmp") / "renate_working_dir") diff --git a/examples/benchmarking/class_incremental_learning_cifar10_der.py b/examples/benchmarking/class_incremental_learning_cifar10_der.py index fc8938cc..74c182a7 100644 --- a/examples/benchmarking/class_incremental_learning_cifar10_der.py +++ b/examples/benchmarking/class_incremental_learning_cifar10_der.py @@ -22,6 +22,7 @@ "dataset_name": "CIFAR10", "val_size": 0, "class_groupings": ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9)), + "num_outputs": 10, } for seed in range(10): diff --git a/src/renate/benchmark/experimentation.py b/src/renate/benchmark/experimentation.py index b4a0d72b..31722498 100644 --- a/src/renate/benchmark/experimentation.py +++ b/src/renate/benchmark/experimentation.py @@ -146,6 +146,8 @@ def execute_experiment_job( accelerator: defaults.SUPPORTED_ACCELERATORS_TYPE = defaults.ACCELERATOR, devices: int = defaults.DEVICES, deterministic_trainer: bool = True, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, job_name: str = defaults.JOB_NAME, strategy: str = defaults.DISTRIBUTED_STRATEGY, precision: str = defaults.PRECISION, @@ -216,6 +218,8 @@ def execute_experiment_job( strategy=strategy, precision=precision, save_state=save_state, + gradient_clip_val=gradient_clip_val, + gradient_clip_algorithm=gradient_clip_algorithm, ) _execute_experiment_job_remotely( job_name=job_name, @@ -235,6 +239,8 @@ def execute_experiment_job( accelerator=accelerator, devices=devices, deterministic_trainer=deterministic_trainer, + gradient_clip_val=gradient_clip_val, + gradient_clip_algorithm=gradient_clip_algorithm, seed=seed, requirements_file=requirements_file, role=role, @@ -267,6 +273,8 @@ def _execute_experiment_job_locally( strategy: str, precision: str, save_state: bool, + gradient_clip_val: Optional[float], + gradient_clip_algorithm: Optional[str], ) -> None: """Runs an experiment, combining hyperparameter tuning and model for multiple updates. @@ -359,6 +367,8 @@ def _execute_experiment_job_locally( precision=precision, strategy=strategy, deterministic_trainer=deterministic_trainer, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, ) move_to_uri(output_state_url, input_state_url) if save_state: diff --git a/src/renate/cli/parsing_functions.py b/src/renate/cli/parsing_functions.py index 82d20612..995e7d22 100644 --- a/src/renate/cli/parsing_functions.py +++ b/src/renate/cli/parsing_functions.py @@ -311,6 +311,19 @@ def _standard_arguments() -> Dict[str, Dict[str, Any]]: "argument_group": OPTIONAL_ARGS_GROUP, "true_type": bool, }, + "gradient_clip_val": { + "type": lambda x: None if x == "None" else x, + "default": defaults.GRADIENT_CLIP_VAL, + "help": "The value at which to clip gradients. None disables clipping.", + "argument_group": OPTIONAL_ARGS_GROUP, + }, + "gradient_clip_algorithm": { + "type": lambda x: None if x == "None" else x, + "default": defaults.GRADIENT_CLIP_ALGORITHM, + "help": "Gradient clipping algorithm to use.", + "choices": ["norm", "value", None], + "argument_group": OPTIONAL_ARGS_GROUP, + }, "prepare_data": { "type": str, "default": "True", diff --git a/src/renate/cli/run_training.py b/src/renate/cli/run_training.py index bb08f050..a2d18293 100644 --- a/src/renate/cli/run_training.py +++ b/src/renate/cli/run_training.py @@ -169,6 +169,8 @@ def run(self): devices=args.devices, precision=args.precision, strategy=args.strategy, + gradient_clip_algorithm=args.gradient_clip_algorithm, + gradient_clip_val=args.gradient_clip_val, early_stopping_enabled=args.early_stopping, deterministic_trainer=args.deterministic_trainer, loss_fn=loss_fn, diff --git a/src/renate/defaults.py b/src/renate/defaults.py index 27eccd04..9a53861d 100644 --- a/src/renate/defaults.py +++ b/src/renate/defaults.py @@ -33,6 +33,8 @@ VOLUME_SIZE = 60 DISTRIBUTED_STRATEGY = "ddp" PRECISION = "32" +GRADIENT_CLIP_VAL = None +GRADIENT_CLIP_ALGORITHM = None LEARNER = "ER" INSTANCE_COUNT = 1 diff --git a/src/renate/training/training.py b/src/renate/training/training.py index b528fba7..89c0cbb8 100644 --- a/src/renate/training/training.py +++ b/src/renate/training/training.py @@ -93,6 +93,8 @@ def run_training_job( strategy: str = defaults.DISTRIBUTED_STRATEGY, precision: str = defaults.PRECISION, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, job_name: str = defaults.JOB_NAME, ) -> Optional[Tuner]: """Starts updating the model including hyperparameter optimization. @@ -179,6 +181,8 @@ def run_training_job( devices=devices, strategy=strategy, precision=precision, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, deterministic_trainer=deterministic_trainer, ) submit_remote_job( @@ -213,6 +217,8 @@ def run_training_job( strategy=strategy, precision=precision, deterministic_trainer=deterministic_trainer, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, job_name=job_name, ) @@ -527,6 +533,8 @@ def _execute_training_and_tuning_job_locally( deterministic_trainer: bool, strategy: str, precision: str, + gradient_clip_algorithm: Optional[str], + gradient_clip_val: Optional[float], ): """Executes the training job locally. @@ -547,6 +555,8 @@ def _execute_training_and_tuning_job_locally( config_space["strategy"] = strategy config_space["precision"] = precision config_space["deterministic_trainer"] = deterministic_trainer + config_space["gradient_clip_val"] = gradient_clip_val + config_space["gradient_clip_algorithm"] = gradient_clip_algorithm if input_state_url is not None: config_space["input_state_url"] = input_state_url diff --git a/src/renate/updaters/avalanche/model_updater.py b/src/renate/updaters/avalanche/model_updater.py index 6f2839a0..68438f61 100644 --- a/src/renate/updaters/avalanche/model_updater.py +++ b/src/renate/updaters/avalanche/model_updater.py @@ -274,6 +274,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "batch_size": batch_size, @@ -306,6 +308,8 @@ def __init__( devices=devices, strategy=strategy, precision=precision, + gradient_clip_val=gradient_clip_val, + gradient_clip_algorithm=gradient_clip_algorithm, ) @@ -338,6 +342,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "batch_size": batch_size, @@ -369,6 +375,8 @@ def __init__( devices=devices, strategy=strategy, precision=precision, + gradient_clip_val=gradient_clip_val, + gradient_clip_algorithm=gradient_clip_algorithm, ) @@ -402,6 +410,8 @@ def __init__( strategy: Optional[str] = defaults.DISTRIBUTED_STRATEGY, precision: str = defaults.PRECISION, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "batch_size": batch_size, @@ -434,6 +444,8 @@ def __init__( devices=devices, strategy=strategy, precision=precision, + gradient_clip_val=gradient_clip_val, + gradient_clip_algorithm=gradient_clip_algorithm, ) @@ -466,6 +478,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "memory_size": memory_size, @@ -497,4 +511,6 @@ def __init__( devices=devices, strategy=strategy, precision=precision, + gradient_clip_val=gradient_clip_val, + gradient_clip_algorithm=gradient_clip_algorithm, ) diff --git a/src/renate/updaters/experimental/er.py b/src/renate/updaters/experimental/er.py index a0d4e331..f9e6da71 100644 --- a/src/renate/updaters/experimental/er.py +++ b/src/renate/updaters/experimental/er.py @@ -552,6 +552,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "memory_size": memory_size, @@ -590,6 +592,8 @@ def __init__( strategy=strategy, precision=precision, deterministic_trainer=deterministic_trainer, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, ) @@ -629,6 +633,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "memory_size": memory_size, @@ -668,6 +674,8 @@ def __init__( strategy=strategy, precision=precision, deterministic_trainer=deterministic_trainer, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, ) @@ -708,6 +716,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "memory_size": memory_size, @@ -748,6 +758,8 @@ def __init__( strategy=strategy, precision=precision, deterministic_trainer=deterministic_trainer, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, ) @@ -791,6 +803,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "memory_size": memory_size, @@ -834,6 +848,8 @@ def __init__( strategy=strategy, precision=precision, deterministic_trainer=deterministic_trainer, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, ) @@ -883,6 +899,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "memory_size": memory_size, @@ -932,4 +950,6 @@ def __init__( strategy=strategy, precision=precision, deterministic_trainer=deterministic_trainer, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, ) diff --git a/src/renate/updaters/experimental/fine_tuning.py b/src/renate/updaters/experimental/fine_tuning.py index d9139269..f31295dd 100644 --- a/src/renate/updaters/experimental/fine_tuning.py +++ b/src/renate/updaters/experimental/fine_tuning.py @@ -42,6 +42,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "batch_size": batch_size, @@ -73,4 +75,6 @@ def __init__( deterministic_trainer=deterministic_trainer, strategy=strategy, precision=precision, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, ) diff --git a/src/renate/updaters/experimental/gdumb.py b/src/renate/updaters/experimental/gdumb.py index 5c953706..d0d69656 100644 --- a/src/renate/updaters/experimental/gdumb.py +++ b/src/renate/updaters/experimental/gdumb.py @@ -132,6 +132,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "memory_size": memory_size, @@ -166,4 +168,6 @@ def __init__( strategy=strategy, precision=precision, deterministic_trainer=deterministic_trainer, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, ) diff --git a/src/renate/updaters/experimental/joint.py b/src/renate/updaters/experimental/joint.py index 6ab5e52e..cf907560 100644 --- a/src/renate/updaters/experimental/joint.py +++ b/src/renate/updaters/experimental/joint.py @@ -121,6 +121,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "batch_size": batch_size, @@ -151,4 +153,6 @@ def __init__( strategy=strategy, precision=precision, deterministic_trainer=deterministic_trainer, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, ) diff --git a/src/renate/updaters/experimental/offline_er.py b/src/renate/updaters/experimental/offline_er.py index 2666bd7c..17c3e0e1 100644 --- a/src/renate/updaters/experimental/offline_er.py +++ b/src/renate/updaters/experimental/offline_er.py @@ -167,6 +167,8 @@ def __init__( precision: str = defaults.PRECISION, seed: int = defaults.SEED, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): learner_kwargs = { "memory_size": memory_size, @@ -202,4 +204,6 @@ def __init__( strategy=strategy, precision=precision, deterministic_trainer=deterministic_trainer, + gradient_clip_algorithm=gradient_clip_algorithm, + gradient_clip_val=gradient_clip_val, ) diff --git a/src/renate/updaters/model_updater.py b/src/renate/updaters/model_updater.py index 8f4db296..c340abde 100644 --- a/src/renate/updaters/model_updater.py +++ b/src/renate/updaters/model_updater.py @@ -238,6 +238,9 @@ class ModelUpdater(abc.ABC): The value is passed to the trainer as described `here `_. + gradient_clip_val: Gradient clipping value used in PyTorch Lightning. Defaults to not + clipping by using a value of None. + gradient_clip_algorithm: Method to clip gradients (norm or value) used in PyTorch Lightning. """ def __init__( @@ -268,6 +271,8 @@ def __init__( strategy: Optional[str] = defaults.DISTRIBUTED_STRATEGY, precision: str = defaults.PRECISION, deterministic_trainer: bool = defaults.DETERMINISTIC_TRAINER, + gradient_clip_val: Optional[float] = defaults.GRADIENT_CLIP_VAL, + gradient_clip_algorithm: Optional[str] = defaults.GRADIENT_CLIP_ALGORITHM, ): self._learner_kwargs = learner_kwargs or {} self._learner_kwargs["loss_fn"] = loss_fn @@ -336,6 +341,8 @@ def __init__( self._logger = logger self._num_epochs_trained = 0 self._deterministic_trainer = deterministic_trainer + self._gradient_clip_algorithm = gradient_clip_algorithm + self._gradient_clip_val = gradient_clip_val @abc.abstractmethod def update( @@ -424,6 +431,8 @@ def _fit_learner( deterministic=self._deterministic_trainer, strategy=strategy, precision=self._precision, + gradient_clip_val=self._gradient_clip_val, + gradient_clip_algorithm=self._gradient_clip_algorithm, ) trainer.fit(learner) self._num_epochs_trained = trainer.current_epoch