diff --git a/cooper/__init__.py b/cooper/__init__.py index 9828907d..1768af57 100644 --- a/cooper/__init__.py +++ b/cooper/__init__.py @@ -16,7 +16,7 @@ warnings.warn("Could not retrieve Cooper version!") from cooper.cmp import CMPState, ConstrainedMinimizationProblem, LagrangianStore -from cooper.constraints import ConstraintGroup, ConstraintState, ConstraintType +from cooper.constraints import Constraint, ConstraintState, ConstraintType from cooper.formulations import FormulationType from . import formulations, multipliers, optim, utils diff --git a/cooper/cmp.py b/cooper/cmp.py index 356fc056..187e9578 100644 --- a/cooper/cmp.py +++ b/cooper/cmp.py @@ -5,7 +5,7 @@ import torch -from cooper.constraints import ConstraintGroup, ConstraintMeasurement, ConstraintState +from cooper.constraints import Constraint, ConstraintMeasurement, ConstraintState # Formulation, and some other classes below, are inspired by the design of the # TensorFlow Constrained Optimization (TFCO) library: @@ -48,7 +48,7 @@ class CMPState: def __init__( self, loss: Optional[torch.Tensor] = None, - observed_constraints: Sequence[tuple[ConstraintGroup, ConstraintState]] = (), + observed_constraints: Sequence[tuple[Constraint, ConstraintState]] = (), misc: Optional[dict] = None, ): self.loss = loss @@ -87,13 +87,13 @@ def populate_primal_lagrangian(self) -> LagrangianStore: current_primal_lagrangian = 0.0 if self.loss is None else torch.clone(self.loss) current_primal_constraint_measurements = [] - for constraint_group, constraint_state in contributing_constraints: - primal_constraint_contrib, primal_measurement = constraint_group.compute_constraint_primal_contribution( + for constraint, constraint_state in contributing_constraints: + primal_lagrangian_contribution, primal_measurement = constraint.compute_constraint_primal_contribution( constraint_state ) current_primal_constraint_measurements.append(primal_measurement) - if primal_constraint_contrib is not None: - current_primal_lagrangian = current_primal_lagrangian + primal_constraint_contrib + if primal_lagrangian_contribution is not None: + current_primal_lagrangian = current_primal_lagrangian + primal_lagrangian_contribution # Modify "private" attributes to accumulate Lagrangian values over successive # calls to `populate_primal_lagrangian` @@ -136,19 +136,19 @@ def populate_dual_lagrangian(self) -> LagrangianStore: current_dual_lagrangian = 0.0 current_dual_constraint_measurements = [] - for constraint_group, constraint_state in contributing_constraints: - dual_lagrangian_contrib, dual_measurement = constraint_group.compute_constraint_dual_contribution( + for constraint, constraint_state in contributing_constraints: + dual_lagrangian_contribution, dual_measurement = constraint.compute_constraint_dual_contribution( constraint_state ) current_dual_constraint_measurements.append(dual_measurement) - if dual_lagrangian_contrib is not None: - current_dual_lagrangian = current_dual_lagrangian + dual_lagrangian_contrib + if dual_lagrangian_contribution is not None: + current_dual_lagrangian = current_dual_lagrangian + dual_lagrangian_contribution # Extracting the violation from the dual_constraint_measurement ensures that it is # the "strict" violation, if available. _, strict_constraint_features = constraint_state.extract_constraint_features() - constraint_group.update_strictly_feasible_indices_( - strict_violation=dual_lagrangian_contrib.violation, + constraint.update_strictly_feasible_indices_( + strict_violation=dual_measurement.violation, strict_constraint_features=strict_constraint_features, ) @@ -234,8 +234,8 @@ def backward(self) -> None: def __repr__(self) -> str: _string = f"CMPState(\n loss={self.loss},\n observed_constraints=[" - for constraint_group, constraint_state in self.observed_constraints: - _string += f"\n\t{constraint_group} -> {constraint_state}," + for constraint, constraint_state in self.observed_constraints: + _string += f"\n\t{constraint} -> {constraint_state}," _string += f"\n ]\n misc={self.misc}\n)" return _string diff --git a/cooper/constraints/__init__.py b/cooper/constraints/__init__.py index 18d3c759..925977e2 100644 --- a/cooper/constraints/__init__.py +++ b/cooper/constraints/__init__.py @@ -1,3 +1,3 @@ -from .constraint_group import ConstraintGroup +from .constraint import Constraint from .constraint_state import ConstraintMeasurement, ConstraintState, ConstraintType from .slacks import ConstantSlack, DenseSlack, ExplicitSlack, IndexedSlack, SlackVariable diff --git a/cooper/constraints/constraint_group.py b/cooper/constraints/constraint.py similarity index 74% rename from cooper/constraints/constraint_group.py rename to cooper/constraints/constraint.py index 99810d82..dfaf2edd 100644 --- a/cooper/constraints/constraint_group.py +++ b/cooper/constraints/constraint.py @@ -8,8 +8,8 @@ from cooper.multipliers import IndexedMultiplier, Multiplier, PenaltyCoefficient -class ConstraintGroup: - """Constraint Group.""" +class Constraint: + """Constraint.""" # TODO(gallego-posada): Add documentation @@ -50,7 +50,7 @@ def sanity_check_multiplier(self, multiplier: Multiplier, constraint_type: Const if multiplier.constraint_type != constraint_type: raise ValueError( f"Constraint type of provided multiplier is {multiplier.constraint_type} \ - which is inconsistent with {constraint_type} set for the constraint group." + which is inconsistent with {constraint_type} set for the constraint." ) def sanity_check_penalty_coefficient(self, penalty_coefficient: PenaltyCoefficient) -> None: @@ -58,9 +58,9 @@ def sanity_check_penalty_coefficient(self, penalty_coefficient: PenaltyCoefficie raise ValueError("All entries of the penalty coefficient must be non-negative.") def update_penalty_coefficient(self, constraint_state: ConstraintState) -> None: - """Update the penalty coefficient of the constraint group.""" + """Update the penalty coefficient of the constraint.""" if self.penalty_coefficient is None: - raise ValueError("Constraint group does not have a penalty coefficient.") + raise ValueError("Constraint does not have a penalty coefficient.") else: self.penalty_coefficient.update_value( constraint_state=constraint_state, @@ -112,32 +112,8 @@ def update_strictly_feasible_indices_( self.multiplier.strictly_feasible_indices = strictly_feasible_indices - def state_dict(self): - state_dict = {"constraint_type": self.constraint_type, "formulation": self.formulation.state_dict()} - for attr_name, attr in [("multiplier", self.multiplier), ("penalty_coefficient", self.penalty_coefficient)]: - state_dict[attr_name] = attr.state_dict() if attr is not None else None - return state_dict - - def load_state_dict(self, state_dict): - self.constraint_type = state_dict["constraint_type"] - self.formulation.load_state_dict(state_dict["formulation"]) - - if state_dict["multiplier"] is not None and self.multiplier is None: - raise ValueError("Cannot load multiplier state dict since existing multiplier is `None`.") - elif state_dict["multiplier"] is None and self.multiplier is not None: - raise ValueError("Multiplier exists but state dict is `None`.") - elif state_dict["multiplier"] is not None and self.multiplier is not None: - self.multiplier.load_state_dict(state_dict["multiplier"]) - - if state_dict["penalty_coefficient"] is not None and self.penalty_coefficient is None: - raise ValueError("Cannot load penalty_coefficient state dict since existing penalty_coefficient is `None`.") - elif state_dict["penalty_coefficient"] is None and self.penalty_coefficient is not None: - raise ValueError("Penalty coefficient exists but state dict is `None`.") - elif state_dict["penalty_coefficient"] is not None and self.penalty_coefficient is not None: - self.penalty_coefficient.load_state_dict(state_dict["penalty_coefficient"]) - def __repr__(self): - repr = f"ConstraintGroup(constraint_type={self.constraint_type}, formulation={self.formulation}" + repr = f"Constraint(constraint_type={self.constraint_type}, formulation={self.formulation}" if self.multiplier is not None: repr += f", multiplier={self.multiplier}" if self.penalty_coefficient is not None: diff --git a/cooper/constraints/constraint_state.py b/cooper/constraints/constraint_state.py index 04a1d7cb..b0801481 100644 --- a/cooper/constraints/constraint_state.py +++ b/cooper/constraints/constraint_state.py @@ -13,21 +13,21 @@ class ConstraintType(Enum): @dataclass class ConstraintState: - """State of a constraint group describing the current constraint violation. + """State of a constraint describing the current constraint violation. Args: violation: Measurement of the constraint violation at some value of the primal parameters. This is expected to be differentiable with respect to the primal parameters. constraint_features: The features of the (differentiable) constraint. This is - used to evaluate the Lagrange multiplier associated with a constraint group. + used to evaluate the Lagrange multiplier associated with a constraint. For example, an `IndexedMultiplier` expects the indices of the constraints whose Lagrange multipliers are to be retrieved; while an `ImplicitMultiplier` expects general tensor-valued features for the constraints. This field is not used for `DenseMultiplier`//s. This can be used in conjunction with an `IndexedMultiplier` to indicate the measurement of the violation for only a subset of the constraints within a - `ConstraintGroup`. + `Constraint`. strict_violation: Measurement of the constraint violation which may be non-differentiable with respect to the primal parameters. When provided, the (necessarily differentiable) `violation` is used to compute the gradient diff --git a/cooper/constraints/slacks.py b/cooper/constraints/slacks.py index f46c14d1..d616b4a5 100644 --- a/cooper/constraints/slacks.py +++ b/cooper/constraints/slacks.py @@ -60,7 +60,7 @@ class ExplicitSlack(SlackVariable): """ An explicit slack holds a :py:class:`~torch.nn.parameter.Parameter` which contains (explicitly) the value of the slack variable with a - :py:class:`~cooper.constraints.ConstraintGroup` in a + :py:class:`~cooper.constraints.Constraint` in a :py:class:`~cooper.cmp.ConstrainedMinimizationProblem`. Args: @@ -105,10 +105,10 @@ class DenseSlack(ExplicitSlack): """Simplest kind of trainable slack variable. :py:class:`~cooper.constraints.slacks.DenseSlack`\\s are suitable for low to - mid-scale :py:class:`~cooper.constraints.ConstraintGroup`\\s for which all the + mid-scale :py:class:`~cooper.constraints.Constraint`\\s for which all the constraints in the group are measured constantly. - For large-scale :py:class:`~cooper.constraints.ConstraintGroup`\\s (for example, + For large-scale :py:class:`~cooper.constraints.Constraint`\\s (for example, one constraint per training example) you may consider using an :py:class:`~cooper.constraints.slacks.IndexedSlack`. """ @@ -121,12 +121,12 @@ def forward(self): class IndexedSlack(ExplicitSlack): """Indexed slacks extend the functionality of :py:class:`~cooper.constraints.slacks.DenseSlack`\\s to cases where the number of - constraints in the :py:class:`~cooper.constraints.ConstraintGroup` is too large. + constraints in the :py:class:`~cooper.constraints.Constraint` is too large. This situation may arise, for example, when imposing point-wise constraints over all the training samples in a learning task. In such cases, it might be computationally prohibitive to measure the value for all - the constraints in the :py:class:`~cooper.constraints.ConstraintGroup` and one may + the constraints in the :py:class:`~cooper.constraints.Constraint` and one may typically resort to sampling. :py:class:`~cooper.constraints.slacks.IndexedSlack`\\s enable time-efficient retrieval of the slack variables for the sampled constraints only, and memory-efficient sparse gradients (on GPU). diff --git a/cooper/formulations/utils.py b/cooper/formulations/utils.py index 152a3a69..5b53e2e9 100644 --- a/cooper/formulations/utils.py +++ b/cooper/formulations/utils.py @@ -14,7 +14,7 @@ def compute_primal_weighted_violation( Args: constraint_factor_value: The value of the multiplier or penalty coefficient for the - constraint group. + constraint. violation: Tensor of constraint violations. """ @@ -57,7 +57,7 @@ def compute_dual_weighted_violation( Bertsekas (2016). Args: - multiplier_value: The value of the multiplier for the constraint group. + multiplier_value: The value of the multiplier for the constraint. violation: Tensor of constraint violations. penalty_coefficient_value: Tensor of penalty coefficient values. """ @@ -92,7 +92,7 @@ def compute_quadratic_augmented_contribution( constraint_type: ConstraintType, ) -> Optional[torch.Tensor]: r""" - Computes the quadratic penalty for a constraint group. + Computes the quadratic penalty for a constraint. When the constraint is an inequality constraint, the quadratic penalty is computed following Eq 17.65 in Numerical Optimization by Nocedal and Wright (2006). Denoting diff --git a/cooper/multipliers/__init__.py b/cooper/multipliers/__init__.py index 4e3aebed..8c7b8cda 100644 --- a/cooper/multipliers/__init__.py +++ b/cooper/multipliers/__init__.py @@ -13,7 +13,7 @@ def evaluate_constraint_factor( module: ConstraintFactor, constraint_features: torch.Tensor, violation: torch.Tensor ) -> torch.Tensor: """Evaluate the Lagrange multiplier or penalty coefficient associated with a - constraint group. + constraint. Args: module: Multiplier or penalty coefficient module. @@ -35,7 +35,7 @@ def evaluate_constraint_factor( if not value.requires_grad and value.numel() == 1 and violation.numel() > 1: # Expand the value of the penalty coefficient to match the shape of the violation. # This enables the use of a single penalty coefficient for all constraints in a - # constraint group. + # constraint. # We only do this for penalty coefficients an not multipliers because we expect # a one-to-one mapping between multiplier values and constraints. If multiplier # sharing is desired, this should be done explicitly by the user. diff --git a/cooper/multipliers/multipliers.py b/cooper/multipliers/multipliers.py index 06780003..28a070aa 100644 --- a/cooper/multipliers/multipliers.py +++ b/cooper/multipliers/multipliers.py @@ -28,12 +28,12 @@ class ExplicitMultiplier(Multiplier): """ An explicit multiplier holds a :py:class:`~torch.nn.parameter.Parameter` which contains (explicitly) the value of the Lagrange multipliers associated with a - :py:class:`~cooper.constraints.ConstraintGroup` in a + :py:class:`~cooper.constraints.Constraint` in a :py:class:`~cooper.cmp.ConstrainedMinimizationProblem`. .. warning:: When `restart_on_feasible=True`, the entries of the multiplier which correspond - to feasible constraints in the :py:class:`~cooper.constraints.ConstraintGroup` + to feasible constraints in the :py:class:`~cooper.constraints.Constraint` are reset to a default value (typically zero) by the :py:meth:`~cooper.multipliers.ExplicitMultiplier.post_step_` method. Note that we do **not** perform any modification to the dual optimizer associated with @@ -163,10 +163,10 @@ class DenseMultiplier(ExplicitMultiplier): """Simplest kind of trainable Lagrange multiplier. :py:class:`~cooper.multipliers.DenseMultiplier`\\s are suitable for low to mid-scale - :py:class:`~cooper.constraints.ConstraintGroup`\\s for which all the constraints + :py:class:`~cooper.constraints.Constraint`\\s for which all the constraints in the group are measured constantly. - For large-scale :py:class:`~cooper.constraints.ConstraintGroup`\\s (for example, + For large-scale :py:class:`~cooper.constraints.Constraint`\\s (for example, one constraint per training example) you may consider using an :py:class:`~cooper.multipliers.IndexedMultiplier`. """ @@ -182,12 +182,12 @@ def __repr__(self): class IndexedMultiplier(ExplicitMultiplier): """Indexed multipliers extend the functionality of :py:class:`~cooper.multipliers.DenseMultiplier`\\s to cases where the number of - constraints in the :py:class:`~cooper.constraints.ConstraintGroup` is too large. + constraints in the :py:class:`~cooper.constraints.Constraint` is too large. This situation may arise, for example, when imposing point-wise constraints over all the training samples in a learning task. In such cases, it might be computationally prohibitive to measure the value for all - the constraints in the :py:class:`~cooper.constraints.ConstraintGroup` and one may + the constraints in the :py:class:`~cooper.constraints.Constraint` and one may typically resort to sampling. :py:class:`~cooper.multipliers.IndexedMultiplier`\\s enable time-efficient retrieval of the multipliers for the sampled constraints only, and memory-efficient sparse gradients (on GPU). @@ -244,7 +244,7 @@ def __repr__(self): class ImplicitMultiplier(Multiplier): """An implicit multiplier is a :py:class:`~torch.nn.Module` that computes the value of a Lagrange multiplier associated with a - :py:class:`~cooper.constraints.ConstraintGroup` based on "features" for each + :py:class:`~cooper.constraints.Constraint` based on "features" for each constraint. The multiplier is _implicitly_ represented by the features of its associated constraint as well as the computation that takes place in the :py:meth:`~cooper.multipliers.ImplicitMultiplier.forward` method. diff --git a/cooper/optim/constrained_optimizers/alternating_optimizer.py b/cooper/optim/constrained_optimizers/alternating_optimizer.py index 9bcafbd9..4a466aa7 100644 --- a/cooper/optim/constrained_optimizers/alternating_optimizer.py +++ b/cooper/optim/constrained_optimizers/alternating_optimizer.py @@ -64,17 +64,17 @@ def step(self): pass def update_penalty_coefficients(self, cmp_state: CMPState) -> None: - """Update the penalty coefficients of the constraint groups. Only the penalty + """Update the penalty coefficients of the constraints. Only the penalty coefficients associated with the ``FormulationType.AUGMENTED_LAGRANGIAN`` and constraints that ``contributes_to_dual_update`` are updated. """ - for constraint_group, constraint_state in cmp_state.observed_constraints: - if constraint_group.formulation_type == FormulationType.AUGMENTED_LAGRANGIAN: + for constraint, constraint_state in cmp_state.observed_constraints: + if constraint.formulation_type == FormulationType.AUGMENTED_LAGRANGIAN: # We might reach this point via an AugmetedLagrangianOptimizer acting # on some constraints that do not use an Augmented Lagrangian formulation, # so we do _not_ apply penalty coefficient updates to those. if constraint_state.contributes_to_dual_update: - constraint_group.update_penalty_coefficient(constraint_state=constraint_state) + constraint.update_penalty_coefficient(constraint_state=constraint_state) class AlternatingPrimalDualOptimizer(BaseAlternatingOptimizer): diff --git a/cooper/optim/constrained_optimizers/constrained_optimizer.py b/cooper/optim/constrained_optimizers/constrained_optimizer.py index c36c24e2..08caefec 100644 --- a/cooper/optim/constrained_optimizers/constrained_optimizer.py +++ b/cooper/optim/constrained_optimizers/constrained_optimizer.py @@ -43,7 +43,7 @@ class ConstrainedOptimizer: dual_optimizers: Optimizer(s) for the dual variables (e.g. the Lagrange multipliers associated with the constraints). An iterable of ``torch.optim.Optimizer``\\s can be passed to handle the case of several - ``~cooper.constraints.ConstraintGroup``\\s. If dealing with an unconstrained + ``~cooper.constraints.Constraint``\\s. If dealing with an unconstrained problem, please use a :py:class:`~cooper.optim.cooper_optimizer.UnconstrainedOptimizer` instead. diff --git a/cooper/optim/utils.py b/cooper/optim/utils.py index aa5a8b42..e04d9a6e 100644 --- a/cooper/optim/utils.py +++ b/cooper/optim/utils.py @@ -61,7 +61,7 @@ def load_cooper_optimizer_from_state_dict( ): """Creates a Cooper optimizer and loads the state_dicts contained in a :py:class:`~cooper.optim.CooperOptimizerState` onto instantiated primal and dual - optimizers and constraint groups or multipliers. + optimizers and constraints or multipliers. """ # Load primal optimizers @@ -104,7 +104,7 @@ def load_cooper_optimizer_from_state_dict( for multiplier, multiplier_state in zip(multipliers, multiplier_states): multiplier.load_state_dict(multiplier_state) - # Since we have extracted the multiplier information above, we discard the constraint_groups below + # Since we have extracted the multiplier information above, we discard the constraints below return create_optimizer_from_kwargs( primal_optimizers=primal_optimizers, extrapolation=cooper_optimizer_state.extrapolation, diff --git a/tests/formulations/test_augmented_lagrangian.py b/tests/formulations/test_augmented_lagrangian.py index 4fb8bb9b..d7727cbd 100644 --- a/tests/formulations/test_augmented_lagrangian.py +++ b/tests/formulations/test_augmented_lagrangian.py @@ -1,6 +1,3 @@ -import os -import tempfile - import cooper_test_utils import pytest import testing_utils @@ -221,67 +218,5 @@ def test_convergence_augmented_lagrangian( assert torch.allclose(param, exact_solution, atol=1e-3) -def test_save_and_load_state_dict(alternation_type, Toy2dCMP_params_init, device): - params, primal_optimizers = cooper_test_utils.build_params_and_primal_optimizers( - use_multiple_primal_optimizers=False, params_init=Toy2dCMP_params_init - ) - - cmp, cooper_optimizer, penalty_coefficients, formulation_kwargs = setup_augmented_lagrangian_objects( - primal_optimizers=primal_optimizers, alternation_type=alternation_type, device=device - ) - - roll_kwargs = {"compute_cmp_state_fn": lambda: cmp.compute_cmp_state(params)} - - for _ in range(10): - cooper_optimizer.roll(**roll_kwargs) - - # Generate checkpoints after 10 steps of training - penalty_coefficient0_after10 = penalty_coefficients[0]().clone().detach() - penalty_coefficient1_after10 = penalty_coefficients[1]().clone().detach() - multiplier0_after10 = cmp.constraint_groups[0].multiplier().clone().detach() - multiplier1_after10 = cmp.constraint_groups[1].multiplier().clone().detach() - - with tempfile.TemporaryDirectory() as tmpdirname: - torch.save(cmp.constraint_groups[0].state_dict(), os.path.join(tmpdirname, "cg0.pt")) - torch.save(cmp.constraint_groups[1].state_dict(), os.path.join(tmpdirname, "cg1.pt")) - - cg0_state_dict = torch.load(os.path.join(tmpdirname, "cg0.pt")) - cg1_state_dict = torch.load(os.path.join(tmpdirname, "cg1.pt")) - - # Train for another 10 steps -- so a total of 20 steps - for _ in range(10): - cmp_state = cooper_optimizer.roll(**roll_kwargs) # noqa: F841 - - # Reload from checkpoint at 10 steps - new_penalty_coefficient0 = cooper.multipliers.DensePenaltyCoefficient(torch.tensor(1.0, device=device)) - new_penalty_coefficient1 = cooper.multipliers.DensePenaltyCoefficient(torch.tensor(1.0, device=device)) - new_cmp = cooper_test_utils.Toy2dCMP( - use_ineq_constraints=True, - formulation_type=cooper.FormulationType.AUGMENTED_LAGRANGIAN, - penalty_coefficients=(new_penalty_coefficient0, new_penalty_coefficient1), - device=device, - ) - new_cmp.constraint_groups[0].load_state_dict(cg0_state_dict) - new_cmp.constraint_groups[1].load_state_dict(cg1_state_dict) - - # Ensure loaded value of the penalty coefficient matches that observe when creating - # the checkpoint - new_penalty_coefficient0_value = new_penalty_coefficient0().clone().detach() - new_penalty_coefficient1_value = new_penalty_coefficient1().clone().detach() - assert torch.allclose(new_penalty_coefficient0_value, penalty_coefficient0_after10) - assert torch.allclose(new_penalty_coefficient1_value, penalty_coefficient1_after10) - - # Ensure loaded value of the multipliers matches that observe when creating the - # checkpoint - new_multiplier0_value = new_cmp.constraint_groups[0].multiplier().clone().detach() - new_multiplier1_value = new_cmp.constraint_groups[1].multiplier().clone().detach() - if new_multiplier0_value != 0: - assert not torch.allclose(new_multiplier0_value, cmp.constraint_groups[0].multiplier()) - if new_multiplier1_value != 0: - assert not torch.allclose(new_multiplier1_value, cmp.constraint_groups[1].multiplier()) - assert torch.allclose(new_multiplier0_value, multiplier0_after10) - assert torch.allclose(new_multiplier1_value, multiplier1_after10) - - # TODO(gallego-posada): Add a test to ensure IndexedPenaltyCoefficient works as expected # when used in an Augmented Lagrangian formulation. diff --git a/tests/helpers/cooper_test_utils.py b/tests/helpers/cooper_test_utils.py index 775e4d77..f3580792 100644 --- a/tests/helpers/cooper_test_utils.py +++ b/tests/helpers/cooper_test_utils.py @@ -66,7 +66,7 @@ def __init__( self.slack_variables = slack_variables - self.constraint_groups = [] + self.constraints = [] if self.use_ineq_constraints: for ix in range(2): @@ -77,16 +77,16 @@ def __init__( ) penalty_coefficient = penalty_coefficients[ix] if penalty_coefficients is not None else None - constraint_group = cooper.ConstraintGroup( + constraint = cooper.Constraint( constraint_type=constraint_type, formulation_type=formulation_type, multiplier=multiplier, penalty_coefficient=penalty_coefficient, formulation_kwargs=formulation_kwargs, ) - self.constraint_groups.append(constraint_group) + self.constraints.append(constraint) - self.multipliers = [cg.multiplier for cg in self.constraint_groups if cg.multiplier is not None] + self.multipliers = [cg.multiplier for cg in self.constraints if cg.multiplier is not None] def analytical_gradients(self, params): """Returns the analytical gradients of the loss and constraints for a given @@ -147,7 +147,7 @@ def compute_violations(self, params) -> cooper.CMPState: cg0_state = cooper.ConstraintState(violation=cg0_violation) cg1_state = cooper.ConstraintState(violation=cg1_violation) - observed_constraints = [(self.constraint_groups[0], cg0_state), (self.constraint_groups[1], cg1_state)] + observed_constraints = [(self.constraints[0], cg0_state), (self.constraints[1], cg1_state)] return cooper.CMPState(loss=None, observed_constraints=observed_constraints) diff --git a/tests/test_checkpoint.py b/tests/test_checkpoint.py index d610aa88..911ad649 100644 --- a/tests/test_checkpoint.py +++ b/tests/test_checkpoint.py @@ -7,7 +7,6 @@ # Import basic closure example from helpers import cooper_test_utils -import pytest import torch import cooper @@ -106,124 +105,3 @@ def test_checkpoint(Toy2dCMP_problem_properties, Toy2dCMP_params_init, use_multi assert cooper.utils.validate_state_dicts(loaded_model.state_dict(), model_state_dict_200) # These are ConstrainedOptimizerState objects and not dicts assert loaded_constrained_optimizer.state_dict() == constrained_optimizer_state_dict_200 - - -@pytest.mark.parametrize( - "formulation_type", - [ - cooper.FormulationType.PENALTY, - cooper.FormulationType.QUADRATIC_PENALTY, - cooper.FormulationType.LAGRANGIAN, - cooper.FormulationType.AUGMENTED_LAGRANGIAN, - ], -) -def test_formulation_checkpoint(formulation_type, Toy2dCMP_params_init, device): - formulation_class = formulation_type.value - - if formulation_type == cooper.FormulationType.PENALTY: - constraint_type = cooper.ConstraintType.PENALTY - else: - constraint_type = cooper.ConstraintType.INEQUALITY - - has_multiplier = formulation_class.expects_multiplier - has_penalties = formulation_class.expects_penalty_coefficient - - params, primal_optimizers = cooper_test_utils.build_params_and_primal_optimizers( - use_multiple_primal_optimizers=False, params_init=Toy2dCMP_params_init - ) - - def make_fresh_penalty_coefficients(has_penalties): - if has_penalties: - penalty_coefficient0 = cooper.multipliers.DensePenaltyCoefficient(torch.tensor(1.0, device=device)) - penalty_coefficient1 = cooper.multipliers.DensePenaltyCoefficient(torch.tensor(1.0, device=device)) - return [penalty_coefficient0, penalty_coefficient1] - else: - return None - - penalty_coefficients = make_fresh_penalty_coefficients(has_penalties=has_penalties) - cmp = cooper_test_utils.Toy2dCMP( - use_ineq_constraints=True, - formulation_type=formulation_type, - penalty_coefficients=penalty_coefficients, - constraint_type=constraint_type, - device=device, - ) - - cooper_optimizer = cooper_test_utils.build_cooper_optimizer_for_Toy2dCMP( - primal_optimizers=primal_optimizers, - multipliers=cmp.multipliers, - extrapolation=False, - alternation_type=cooper.optim.AlternationType.FALSE, - ) - - roll_kwargs = {"compute_cmp_state_fn": lambda: cmp.compute_cmp_state(params)} - - for _ in range(10): - cooper_optimizer.roll(**roll_kwargs) - if has_penalties: - # Multiply the penalty coefficients by 1.01 - for _penalty_coefficient in penalty_coefficients: - _penalty_coefficient.value = _penalty_coefficient() * 1.01 - - # Generate checkpoints after 10 steps of training - if has_penalties: - penalty_coefficient0_after10 = penalty_coefficients[0]().clone().detach() - penalty_coefficient1_after10 = penalty_coefficients[1]().clone().detach() - - if has_multiplier: - multiplier0_after10 = cmp.constraint_groups[0].multiplier().clone().detach() - multiplier1_after10 = cmp.constraint_groups[1].multiplier().clone().detach() - - with tempfile.TemporaryDirectory() as tmpdirname: - torch.save(cmp.constraint_groups[0].state_dict(), os.path.join(tmpdirname, "cg0.pt")) - torch.save(cmp.constraint_groups[1].state_dict(), os.path.join(tmpdirname, "cg1.pt")) - - cg0_state_dict = torch.load(os.path.join(tmpdirname, "cg0.pt")) - cg1_state_dict = torch.load(os.path.join(tmpdirname, "cg1.pt")) - - # Train for another 10 steps - for _ in range(10): - cooper_optimizer.roll(**roll_kwargs) - if has_penalties: - # Multiply the penalty coefficients by 1.01 - for penalty_coefficient in penalty_coefficients: - penalty_coefficient.value = penalty_coefficient() * 1.01 - - # Reload from checkpoint - new_penalty_coefficients = make_fresh_penalty_coefficients(has_penalties=has_penalties) - - new_cmp = cooper_test_utils.Toy2dCMP( - use_ineq_constraints=True, - formulation_type=formulation_type, - penalty_coefficients=new_penalty_coefficients, - constraint_type=constraint_type, - device=device, - ) - new_cmp.constraint_groups[0].load_state_dict(cg0_state_dict) - new_cmp.constraint_groups[1].load_state_dict(cg1_state_dict) - - if has_penalties: - # The loaded penalty coefficients come from 10 steps of training, so they should be - # different from the current ones - new_penalty_coefficient0_value = new_penalty_coefficients[0]().clone().detach() - new_penalty_coefficient1_value = new_penalty_coefficients[1]().clone().detach() - assert not torch.allclose(new_penalty_coefficient0_value, penalty_coefficients[0]()) - assert not torch.allclose(new_penalty_coefficient1_value, penalty_coefficients[1]()) - - # They should, however, be the same as the ones recorded before the checkpoint - assert torch.allclose(new_penalty_coefficient0_value, penalty_coefficient0_after10) - assert torch.allclose(new_penalty_coefficient1_value, penalty_coefficient1_after10) - - if has_multiplier: - # Similar story for the multipliers - new_multiplier0_value = new_cmp.constraint_groups[0].multiplier().clone().detach() - new_multiplier1_value = new_cmp.constraint_groups[1].multiplier().clone().detach() - - # Ignoring the case where the multiplier is 0 as both may match simply because - # the run is feasible - if new_multiplier0_value != 0: - assert not torch.allclose(new_multiplier0_value, cmp.constraint_groups[0].multiplier()) - if new_multiplier1_value != 0: - assert not torch.allclose(new_multiplier1_value, cmp.constraint_groups[1].multiplier()) - assert torch.allclose(new_multiplier0_value, multiplier0_after10) - assert torch.allclose(new_multiplier1_value, multiplier1_after10) diff --git a/tests/test_cmp.py b/tests/test_cmp.py index 9958c81c..934fb931 100644 --- a/tests/test_cmp.py +++ b/tests/test_cmp.py @@ -65,12 +65,12 @@ def evaluate_constraints(params) -> list[cooper.ConstraintState]: multiplier0 = cooper.multipliers.DenseMultiplier( constraint_type=constraint_type, num_constraints=1, device=device ) - cg0 = cooper.ConstraintGroup(**default_cg_kwargs, multiplier=multiplier0) + cg0 = cooper.Constraint(**default_cg_kwargs, multiplier=multiplier0) multiplier1 = cooper.multipliers.DenseMultiplier( constraint_type=constraint_type, num_constraints=1, device=device ) - cg1 = cooper.ConstraintGroup(**default_cg_kwargs, multiplier=multiplier1) + cg1 = cooper.Constraint(**default_cg_kwargs, multiplier=multiplier1) multipliers = [multiplier0, multiplier1] else: diff --git a/tests/test_extrapolation.py b/tests/test_extrapolation.py index 12c357d7..01577360 100644 --- a/tests/test_extrapolation.py +++ b/tests/test_extrapolation.py @@ -77,7 +77,7 @@ def test_manual_extrapolation(Toy2dCMP_problem_properties, Toy2dCMP_params_init, cooper_optimizer.roll(compute_cmp_state_fn) assert torch.allclose(params, mktensor([5.8428e-04, -9.2410e-01])) - multiplier_values = [constraint.multiplier() for constraint in cmp.constraint_groups] + multiplier_values = [constraint.multiplier() for constraint in cmp.constraints] for multiplier, target_value in zip(multiplier_values, [0.0388, 0.0]): if not torch.allclose(multiplier, mktensor([target_value]), atol=1e-4): breakpoint() diff --git a/tests/test_held_out_constraints.py b/tests/test_held_out_constraints.py index 0d838620..727fa04c 100644 --- a/tests/test_held_out_constraints.py +++ b/tests/test_held_out_constraints.py @@ -37,7 +37,7 @@ def __init__(self, use_constraint_surrogate=False, device=None, observe_probabil self.multiplier = cooper.multipliers.IndexedMultiplier( constraint_type=cooper.ConstraintType.INEQUALITY, num_constraints=2, device=device ) - self.constraint_group = cooper.ConstraintGroup( + self.constraint = cooper.Constraint( constraint_type=cooper.ConstraintType.INEQUALITY, formulation_type=cooper.FormulationType.LAGRANGIAN, multiplier=self.multiplier, @@ -112,7 +112,7 @@ def compute_cmp_state(self, params): strict_constraint_features=strict_constraint_features, ) - return cooper.CMPState(loss=loss, observed_constraints=[(self.constraint_group, constraint_state)]) + return cooper.CMPState(loss=loss, observed_constraints=[(self.constraint, constraint_state)]) @pytest.fixture(params=[0.1, 0.5, 0.7, 1.0]) @@ -134,7 +134,7 @@ def test_manual_heldout_constraints(Toy2dCMP_problem_properties, Toy2dCMP_params cmp = RandomConstraintsToy2dCMP( use_constraint_surrogate=True, device=device, observe_probability=observe_probability ) - multipliers = cmp.constraint_group.multiplier + multipliers = cmp.constraint.multiplier primal_lr, dual_lr = 1e-2, 1e-2 params, primal_optimizers = cooper_test_utils.build_params_and_primal_optimizers( diff --git a/tutorials/scripts/plot_gaussian_mixture.py b/tutorials/scripts/plot_gaussian_mixture.py index e6472056..a1f9bd0d 100644 --- a/tutorials/scripts/plot_gaussian_mixture.py +++ b/tutorials/scripts/plot_gaussian_mixture.py @@ -109,7 +109,7 @@ def __init__(self, use_strict_constraints: bool = False, constraint_level: float constraint_type = constraint_type = cooper.ConstraintType.INEQUALITY self.multiplier = cooper.multipliers.DenseMultiplier(constraint_type=constraint_type, num_constraints=1) - self.constraint_group = cooper.ConstraintGroup( + self.constraint = cooper.Constraint( constraint_type=constraint_type, formulation_type=cooper.FormulationType.LAGRANGIAN, multiplier=self.multiplier, @@ -139,7 +139,7 @@ def compute_cmp_state(self, model, inputs, targets): strict_violation = self.constraint_level - prop_0 constraint_state = cooper.ConstraintState(violation=differentiable_violation, strict_violation=strict_violation) - return cooper.CMPState(loss=loss, observed_constraints=[(self.constraint_group, constraint_state)]) + return cooper.CMPState(loss=loss, observed_constraints=[(self.constraint, constraint_state)]) def train(problem_name, inputs, targets, num_iters=5000, lr=1e-2, constraint_level=0.7): diff --git a/tutorials/scripts/plot_infrequent_true_constraint.py b/tutorials/scripts/plot_infrequent_true_constraint.py index 8a2f6f20..82a4e017 100644 --- a/tutorials/scripts/plot_infrequent_true_constraint.py +++ b/tutorials/scripts/plot_infrequent_true_constraint.py @@ -89,12 +89,12 @@ def __init__(self, y: torch.Tensor, z: torch.Tensor, constraint_level: float = 1 self.r = min(y.shape[0], z.shape[0]) self.constraint_level = constraint_level - # Creating a constraint group with a single constraint + # Creating a constraint with a single constraint constraint_type = cooper.ConstraintType.EQUALITY self.multiplier = cooper.multipliers.DenseMultiplier( constraint_type=constraint_type, num_constraints=1, device=DEVICE ) - self.constraint = cooper.ConstraintGroup( + self.constraint = cooper.Constraint( constraint_type=constraint_type, formulation_type=cooper.FormulationType.LAGRANGIAN, multiplier=self.multiplier, diff --git a/tutorials/scripts/plot_max_entropy.py b/tutorials/scripts/plot_max_entropy.py index 256c64aa..d1d985a9 100644 --- a/tutorials/scripts/plot_max_entropy.py +++ b/tutorials/scripts/plot_max_entropy.py @@ -39,8 +39,8 @@ def __init__(self, target_mean: float) -> None: } mean_multiplier = cooper.multipliers.DenseMultiplier(**default_multiplier_kwargs, num_constraints=1) sum_multiplier = cooper.multipliers.DenseMultiplier(**default_multiplier_kwargs, num_constraints=1) - self.mean_constraint = cooper.ConstraintGroup(**default_cg_kwargs, multiplier=mean_multiplier) - self.sum_constraint = cooper.ConstraintGroup(**default_cg_kwargs, multiplier=sum_multiplier) + self.mean_constraint = cooper.Constraint(**default_cg_kwargs, multiplier=mean_multiplier) + self.sum_constraint = cooper.Constraint(**default_cg_kwargs, multiplier=sum_multiplier) self.multipliers = {"mean": mean_multiplier, "sum": sum_multiplier} self.all_constraints = [self.sum_constraint, self.mean_constraint] @@ -66,7 +66,7 @@ def compute_cmp_state(self, log_probs: torch.Tensor) -> cooper.CMPState: return cooper.CMPState(loss=-entropy, observed_constraints=observed_constraints) -# Define the problem with the constraint groups +# Define the problem with the constraintss cmp = MaximumEntropy(target_mean=4.5) # Define the primal parameters and optimizer diff --git a/tutorials/scripts/plot_max_entropy_augmented_lagrangian.py b/tutorials/scripts/plot_max_entropy_augmented_lagrangian.py index 0b97c5e8..d513fab4 100644 --- a/tutorials/scripts/plot_max_entropy_augmented_lagrangian.py +++ b/tutorials/scripts/plot_max_entropy_augmented_lagrangian.py @@ -39,14 +39,14 @@ def __init__(self, target_mean: float) -> None: mean_penalty_coefficient = cooper.multipliers.DensePenaltyCoefficient(torch.tensor(1.0, device=DEVICE)) sum_multiplier = cooper.multipliers.DenseMultiplier(**default_multiplier_kwargs, num_constraints=1) - self.mean_constraint = cooper.ConstraintGroup( + self.mean_constraint = cooper.Constraint( constraint_type=cooper.ConstraintType.EQUALITY, formulation_type=cooper.FormulationType.AUGMENTED_LAGRANGIAN, multiplier=mean_multiplier, penalty_coefficient=mean_penalty_coefficient, formulation_kwargs={"penalty_growth_factor": 1.001}, ) - self.sum_constraint = cooper.ConstraintGroup( + self.sum_constraint = cooper.Constraint( constraint_type=cooper.ConstraintType.EQUALITY, formulation_type=cooper.FormulationType.LAGRANGIAN, multiplier=sum_multiplier, @@ -77,7 +77,7 @@ def compute_cmp_state(self, log_probs: torch.Tensor) -> cooper.CMPState: return cooper.CMPState(loss=-entropy, observed_constraints=observed_constraints) -# Define the problem with the constraint groups +# Define the problem with the constraints cmp = MaximumEntropy(target_mean=4.5) # Define the primal parameters and optimizer diff --git a/tutorials/scripts/plot_min_norm.py b/tutorials/scripts/plot_min_norm.py index 5073c0b8..b04c0704 100644 --- a/tutorials/scripts/plot_min_norm.py +++ b/tutorials/scripts/plot_min_norm.py @@ -39,7 +39,7 @@ from torch.utils.data.sampler import BatchSampler, RandomSampler import cooper -from cooper import CMPState, ConstraintGroup, ConstraintState, ConstraintType, FormulationType +from cooper import CMPState, Constraint, ConstraintState, ConstraintType, FormulationType style_utils.set_plot_style() DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -112,14 +112,14 @@ class MinNormWithLinearConstraints(cooper.ConstrainedMinimizationProblem): """Min-norm problem with linear equality constraints.""" def __init__(self, num_equations: int) -> None: - # Create a constraint group for the equality constraints. We use a sparse constraint + # Create a constraint for the equality constraints. We use a sparse constraint # to be able to update the multipliers only with the observed constraints (i.e. the # ones that are active in the current batch) constraint_type = ConstraintType.EQUALITY self.multiplier = cooper.multipliers.IndexedMultiplier( constraint_type=constraint_type, num_constraints=num_equations, device=DEVICE ) - self.eq_constraint = ConstraintGroup( + self.eq_constraint = Constraint( constraint_type=constraint_type, formulation_type=FormulationType.LAGRANGIAN, multiplier=self.multiplier ) super().__init__() @@ -145,7 +145,7 @@ def run_experiment( linear_system_dataset = LinearConstraintDataset(A, b) constraint_loader = instantiate_dataloader(dataset=linear_system_dataset, batch_size=batch_size, seed=exp_seed) - # Define the problem with the constraint group + # Define the problem with the constraint cmp = MinNormWithLinearConstraints(num_equations=num_equations) # Randomly initialize the primal variable and instantiate the optimizers diff --git a/tutorials/scripts/plot_mnist_logistic_regression.py b/tutorials/scripts/plot_mnist_logistic_regression.py index 895e5783..5b67d4a9 100644 --- a/tutorials/scripts/plot_mnist_logistic_regression.py +++ b/tutorials/scripts/plot_mnist_logistic_regression.py @@ -43,11 +43,11 @@ primal_optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, amsgrad=True) -# Define the constraint group for the norm constraint +# Define the constraint for the norm constraint multiplier = cooper.multipliers.DenseMultiplier( constraint_type=cooper.ConstraintType.INEQUALITY, num_constraints=1, device=DEVICE ) -norm_constraint = cooper.ConstraintGroup( +norm_constraint = cooper.Constraint( constraint_type=cooper.ConstraintType.INEQUALITY, formulation_type=cooper.FormulationType.LAGRANGIAN, multiplier=multiplier,