diff --git a/cooper/__init__.py b/cooper/__init__.py
index 9828907d..1768af57 100644
--- a/cooper/__init__.py
+++ b/cooper/__init__.py
@@ -16,7 +16,7 @@
     warnings.warn("Could not retrieve Cooper version!")
 
 from cooper.cmp import CMPState, ConstrainedMinimizationProblem, LagrangianStore
-from cooper.constraints import ConstraintGroup, ConstraintState, ConstraintType
+from cooper.constraints import Constraint, ConstraintState, ConstraintType
 from cooper.formulations import FormulationType
 
 from . import formulations, multipliers, optim, utils
diff --git a/cooper/cmp.py b/cooper/cmp.py
index 356fc056..187e9578 100644
--- a/cooper/cmp.py
+++ b/cooper/cmp.py
@@ -5,7 +5,7 @@
 
 import torch
 
-from cooper.constraints import ConstraintGroup, ConstraintMeasurement, ConstraintState
+from cooper.constraints import Constraint, ConstraintMeasurement, ConstraintState
 
 # Formulation, and some other classes below, are inspired by the design of the
 # TensorFlow Constrained Optimization (TFCO) library:
@@ -48,7 +48,7 @@ class CMPState:
     def __init__(
         self,
         loss: Optional[torch.Tensor] = None,
-        observed_constraints: Sequence[tuple[ConstraintGroup, ConstraintState]] = (),
+        observed_constraints: Sequence[tuple[Constraint, ConstraintState]] = (),
         misc: Optional[dict] = None,
     ):
         self.loss = loss
@@ -87,13 +87,13 @@ def populate_primal_lagrangian(self) -> LagrangianStore:
         current_primal_lagrangian = 0.0 if self.loss is None else torch.clone(self.loss)
 
         current_primal_constraint_measurements = []
-        for constraint_group, constraint_state in contributing_constraints:
-            primal_constraint_contrib, primal_measurement = constraint_group.compute_constraint_primal_contribution(
+        for constraint, constraint_state in contributing_constraints:
+            primal_lagrangian_contribution, primal_measurement = constraint.compute_constraint_primal_contribution(
                 constraint_state
             )
             current_primal_constraint_measurements.append(primal_measurement)
-            if primal_constraint_contrib is not None:
-                current_primal_lagrangian = current_primal_lagrangian + primal_constraint_contrib
+            if primal_lagrangian_contribution is not None:
+                current_primal_lagrangian = current_primal_lagrangian + primal_lagrangian_contribution
 
         # Modify "private" attributes to accumulate Lagrangian values over successive
         # calls to `populate_primal_lagrangian`
@@ -136,19 +136,19 @@ def populate_dual_lagrangian(self) -> LagrangianStore:
         current_dual_lagrangian = 0.0
 
         current_dual_constraint_measurements = []
-        for constraint_group, constraint_state in contributing_constraints:
-            dual_lagrangian_contrib, dual_measurement = constraint_group.compute_constraint_dual_contribution(
+        for constraint, constraint_state in contributing_constraints:
+            dual_lagrangian_contribution, dual_measurement = constraint.compute_constraint_dual_contribution(
                 constraint_state
             )
             current_dual_constraint_measurements.append(dual_measurement)
-            if dual_lagrangian_contrib is not None:
-                current_dual_lagrangian = current_dual_lagrangian + dual_lagrangian_contrib
+            if dual_lagrangian_contribution is not None:
+                current_dual_lagrangian = current_dual_lagrangian + dual_lagrangian_contribution
 
                 # Extracting the violation from the dual_constraint_measurement ensures that it is
                 # the "strict" violation, if available.
                 _, strict_constraint_features = constraint_state.extract_constraint_features()
-                constraint_group.update_strictly_feasible_indices_(
-                    strict_violation=dual_lagrangian_contrib.violation,
+                constraint.update_strictly_feasible_indices_(
+                    strict_violation=dual_measurement.violation,
                     strict_constraint_features=strict_constraint_features,
                 )
 
@@ -234,8 +234,8 @@ def backward(self) -> None:
 
     def __repr__(self) -> str:
         _string = f"CMPState(\n  loss={self.loss},\n  observed_constraints=["
-        for constraint_group, constraint_state in self.observed_constraints:
-            _string += f"\n\t{constraint_group} -> {constraint_state},"
+        for constraint, constraint_state in self.observed_constraints:
+            _string += f"\n\t{constraint} -> {constraint_state},"
         _string += f"\n  ]\n  misc={self.misc}\n)"
         return _string
 
diff --git a/cooper/constraints/__init__.py b/cooper/constraints/__init__.py
index 18d3c759..925977e2 100644
--- a/cooper/constraints/__init__.py
+++ b/cooper/constraints/__init__.py
@@ -1,3 +1,3 @@
-from .constraint_group import ConstraintGroup
+from .constraint import Constraint
 from .constraint_state import ConstraintMeasurement, ConstraintState, ConstraintType
 from .slacks import ConstantSlack, DenseSlack, ExplicitSlack, IndexedSlack, SlackVariable
diff --git a/cooper/constraints/constraint_group.py b/cooper/constraints/constraint.py
similarity index 74%
rename from cooper/constraints/constraint_group.py
rename to cooper/constraints/constraint.py
index 99810d82..dfaf2edd 100644
--- a/cooper/constraints/constraint_group.py
+++ b/cooper/constraints/constraint.py
@@ -8,8 +8,8 @@
 from cooper.multipliers import IndexedMultiplier, Multiplier, PenaltyCoefficient
 
 
-class ConstraintGroup:
-    """Constraint Group."""
+class Constraint:
+    """Constraint."""
 
     # TODO(gallego-posada): Add documentation
 
@@ -50,7 +50,7 @@ def sanity_check_multiplier(self, multiplier: Multiplier, constraint_type: Const
             if multiplier.constraint_type != constraint_type:
                 raise ValueError(
                     f"Constraint type of provided multiplier is {multiplier.constraint_type} \
-                    which is inconsistent with {constraint_type} set for the constraint group."
+                    which is inconsistent with {constraint_type} set for the constraint."
                 )
 
     def sanity_check_penalty_coefficient(self, penalty_coefficient: PenaltyCoefficient) -> None:
@@ -58,9 +58,9 @@ def sanity_check_penalty_coefficient(self, penalty_coefficient: PenaltyCoefficie
             raise ValueError("All entries of the penalty coefficient must be non-negative.")
 
     def update_penalty_coefficient(self, constraint_state: ConstraintState) -> None:
-        """Update the penalty coefficient of the constraint group."""
+        """Update the penalty coefficient of the constraint."""
         if self.penalty_coefficient is None:
-            raise ValueError("Constraint group does not have a penalty coefficient.")
+            raise ValueError("Constraint does not have a penalty coefficient.")
         else:
             self.penalty_coefficient.update_value(
                 constraint_state=constraint_state,
@@ -112,32 +112,8 @@ def update_strictly_feasible_indices_(
 
             self.multiplier.strictly_feasible_indices = strictly_feasible_indices
 
-    def state_dict(self):
-        state_dict = {"constraint_type": self.constraint_type, "formulation": self.formulation.state_dict()}
-        for attr_name, attr in [("multiplier", self.multiplier), ("penalty_coefficient", self.penalty_coefficient)]:
-            state_dict[attr_name] = attr.state_dict() if attr is not None else None
-        return state_dict
-
-    def load_state_dict(self, state_dict):
-        self.constraint_type = state_dict["constraint_type"]
-        self.formulation.load_state_dict(state_dict["formulation"])
-
-        if state_dict["multiplier"] is not None and self.multiplier is None:
-            raise ValueError("Cannot load multiplier state dict since existing multiplier is `None`.")
-        elif state_dict["multiplier"] is None and self.multiplier is not None:
-            raise ValueError("Multiplier exists but state dict is `None`.")
-        elif state_dict["multiplier"] is not None and self.multiplier is not None:
-            self.multiplier.load_state_dict(state_dict["multiplier"])
-
-        if state_dict["penalty_coefficient"] is not None and self.penalty_coefficient is None:
-            raise ValueError("Cannot load penalty_coefficient state dict since existing penalty_coefficient is `None`.")
-        elif state_dict["penalty_coefficient"] is None and self.penalty_coefficient is not None:
-            raise ValueError("Penalty coefficient exists but state dict is `None`.")
-        elif state_dict["penalty_coefficient"] is not None and self.penalty_coefficient is not None:
-            self.penalty_coefficient.load_state_dict(state_dict["penalty_coefficient"])
-
     def __repr__(self):
-        repr = f"ConstraintGroup(constraint_type={self.constraint_type}, formulation={self.formulation}"
+        repr = f"Constraint(constraint_type={self.constraint_type}, formulation={self.formulation}"
         if self.multiplier is not None:
             repr += f", multiplier={self.multiplier}"
         if self.penalty_coefficient is not None:
diff --git a/cooper/constraints/constraint_state.py b/cooper/constraints/constraint_state.py
index 04a1d7cb..b0801481 100644
--- a/cooper/constraints/constraint_state.py
+++ b/cooper/constraints/constraint_state.py
@@ -13,21 +13,21 @@ class ConstraintType(Enum):
 
 @dataclass
 class ConstraintState:
-    """State of a constraint group describing the current constraint violation.
+    """State of a constraint describing the current constraint violation.
 
     Args:
         violation: Measurement of the constraint violation at some value of the primal
             parameters. This is expected to be differentiable with respect to the
             primal parameters.
         constraint_features: The features of the (differentiable) constraint. This is
-            used to evaluate the Lagrange multiplier associated with a constraint group.
+            used to evaluate the Lagrange multiplier associated with a constraint.
             For example, an `IndexedMultiplier` expects the indices of the constraints
             whose Lagrange multipliers are to be retrieved; while an
             `ImplicitMultiplier` expects general tensor-valued features for the
             constraints. This field is not used for `DenseMultiplier`//s.
             This can be used in conjunction with an `IndexedMultiplier` to indicate the
             measurement of the violation for only a subset of the constraints within a
-            `ConstraintGroup`.
+            `Constraint`.
         strict_violation: Measurement of the constraint violation which may be
             non-differentiable with respect to the primal parameters. When provided,
             the (necessarily differentiable) `violation` is used to compute the gradient
diff --git a/cooper/constraints/slacks.py b/cooper/constraints/slacks.py
index f46c14d1..d616b4a5 100644
--- a/cooper/constraints/slacks.py
+++ b/cooper/constraints/slacks.py
@@ -60,7 +60,7 @@ class ExplicitSlack(SlackVariable):
     """
     An explicit slack holds a :py:class:`~torch.nn.parameter.Parameter` which contains
     (explicitly) the value of the slack variable with a
-    :py:class:`~cooper.constraints.ConstraintGroup` in a
+    :py:class:`~cooper.constraints.Constraint` in a
     :py:class:`~cooper.cmp.ConstrainedMinimizationProblem`.
 
     Args:
@@ -105,10 +105,10 @@ class DenseSlack(ExplicitSlack):
     """Simplest kind of trainable slack variable.
 
     :py:class:`~cooper.constraints.slacks.DenseSlack`\\s are suitable for low to
-    mid-scale :py:class:`~cooper.constraints.ConstraintGroup`\\s for which all the
+    mid-scale :py:class:`~cooper.constraints.Constraint`\\s for which all the
     constraints in the group are measured constantly.
 
-    For large-scale :py:class:`~cooper.constraints.ConstraintGroup`\\s (for example,
+    For large-scale :py:class:`~cooper.constraints.Constraint`\\s (for example,
     one constraint per training example) you may consider using an
     :py:class:`~cooper.constraints.slacks.IndexedSlack`.
     """
@@ -121,12 +121,12 @@ def forward(self):
 class IndexedSlack(ExplicitSlack):
     """Indexed slacks extend the functionality of
     :py:class:`~cooper.constraints.slacks.DenseSlack`\\s to cases where the number of
-    constraints in the :py:class:`~cooper.constraints.ConstraintGroup` is too large.
+    constraints in the :py:class:`~cooper.constraints.Constraint` is too large.
     This situation may arise, for example, when imposing point-wise constraints over all
     the training samples in a learning task.
 
     In such cases, it might be computationally prohibitive to measure the value for all
-    the constraints in the :py:class:`~cooper.constraints.ConstraintGroup` and one may
+    the constraints in the :py:class:`~cooper.constraints.Constraint` and one may
     typically resort to sampling. :py:class:`~cooper.constraints.slacks.IndexedSlack`\\s
     enable time-efficient retrieval of the slack variables for the sampled constraints
     only, and memory-efficient sparse gradients (on GPU).
diff --git a/cooper/formulations/utils.py b/cooper/formulations/utils.py
index 152a3a69..5b53e2e9 100644
--- a/cooper/formulations/utils.py
+++ b/cooper/formulations/utils.py
@@ -14,7 +14,7 @@ def compute_primal_weighted_violation(
 
     Args:
         constraint_factor_value: The value of the multiplier or penalty coefficient for the
-            constraint group.
+            constraint.
         violation: Tensor of constraint violations.
     """
 
@@ -57,7 +57,7 @@ def compute_dual_weighted_violation(
     Bertsekas (2016).
 
     Args:
-        multiplier_value: The value of the multiplier for the constraint group.
+        multiplier_value: The value of the multiplier for the constraint.
         violation: Tensor of constraint violations.
         penalty_coefficient_value: Tensor of penalty coefficient values.
     """
@@ -92,7 +92,7 @@ def compute_quadratic_augmented_contribution(
     constraint_type: ConstraintType,
 ) -> Optional[torch.Tensor]:
     r"""
-    Computes the quadratic penalty for a constraint group.
+    Computes the quadratic penalty for a constraint.
 
     When the constraint is an inequality constraint, the quadratic penalty is computed
     following Eq 17.65 in Numerical Optimization by Nocedal and Wright (2006). Denoting
diff --git a/cooper/multipliers/__init__.py b/cooper/multipliers/__init__.py
index 4e3aebed..8c7b8cda 100644
--- a/cooper/multipliers/__init__.py
+++ b/cooper/multipliers/__init__.py
@@ -13,7 +13,7 @@ def evaluate_constraint_factor(
     module: ConstraintFactor, constraint_features: torch.Tensor, violation: torch.Tensor
 ) -> torch.Tensor:
     """Evaluate the Lagrange multiplier or penalty coefficient associated with a
-    constraint group.
+    constraint.
 
     Args:
         module: Multiplier or penalty coefficient module.
@@ -35,7 +35,7 @@ def evaluate_constraint_factor(
     if not value.requires_grad and value.numel() == 1 and violation.numel() > 1:
         # Expand the value of the penalty coefficient to match the shape of the violation.
         # This enables the use of a single penalty coefficient for all constraints in a
-        # constraint group.
+        # constraint.
         # We only do this for penalty coefficients an not multipliers because we expect
         # a one-to-one mapping between multiplier values and constraints. If multiplier
         # sharing is desired, this should be done explicitly by the user.
diff --git a/cooper/multipliers/multipliers.py b/cooper/multipliers/multipliers.py
index 06780003..28a070aa 100644
--- a/cooper/multipliers/multipliers.py
+++ b/cooper/multipliers/multipliers.py
@@ -28,12 +28,12 @@ class ExplicitMultiplier(Multiplier):
     """
     An explicit multiplier holds a :py:class:`~torch.nn.parameter.Parameter` which
     contains (explicitly) the value of the Lagrange multipliers associated with a
-    :py:class:`~cooper.constraints.ConstraintGroup` in a
+    :py:class:`~cooper.constraints.Constraint` in a
     :py:class:`~cooper.cmp.ConstrainedMinimizationProblem`.
 
     .. warning::
         When `restart_on_feasible=True`, the entries of the multiplier which correspond
-        to feasible constraints in the :py:class:`~cooper.constraints.ConstraintGroup`
+        to feasible constraints in the :py:class:`~cooper.constraints.Constraint`
         are reset to a default value (typically zero) by the
         :py:meth:`~cooper.multipliers.ExplicitMultiplier.post_step_` method. Note that
         we do **not** perform any modification to the dual optimizer associated with
@@ -163,10 +163,10 @@ class DenseMultiplier(ExplicitMultiplier):
     """Simplest kind of trainable Lagrange multiplier.
 
     :py:class:`~cooper.multipliers.DenseMultiplier`\\s are suitable for low to mid-scale
-    :py:class:`~cooper.constraints.ConstraintGroup`\\s for which all the constraints
+    :py:class:`~cooper.constraints.Constraint`\\s for which all the constraints
     in the group are measured constantly.
 
-    For large-scale :py:class:`~cooper.constraints.ConstraintGroup`\\s (for example,
+    For large-scale :py:class:`~cooper.constraints.Constraint`\\s (for example,
     one constraint per training example) you may consider using an
     :py:class:`~cooper.multipliers.IndexedMultiplier`.
     """
@@ -182,12 +182,12 @@ def __repr__(self):
 class IndexedMultiplier(ExplicitMultiplier):
     """Indexed multipliers extend the functionality of
     :py:class:`~cooper.multipliers.DenseMultiplier`\\s to cases where the number of
-    constraints in the :py:class:`~cooper.constraints.ConstraintGroup` is too large.
+    constraints in the :py:class:`~cooper.constraints.Constraint` is too large.
     This situation may arise, for example, when imposing point-wise constraints over all
     the training samples in a learning task.
 
     In such cases, it might be computationally prohibitive to measure the value for all
-    the constraints in the :py:class:`~cooper.constraints.ConstraintGroup` and one may
+    the constraints in the :py:class:`~cooper.constraints.Constraint` and one may
     typically resort to sampling. :py:class:`~cooper.multipliers.IndexedMultiplier`\\s
     enable time-efficient retrieval of the multipliers for the sampled constraints only,
     and memory-efficient sparse gradients (on GPU).
@@ -244,7 +244,7 @@ def __repr__(self):
 class ImplicitMultiplier(Multiplier):
     """An implicit multiplier is a :py:class:`~torch.nn.Module` that computes the value
     of a Lagrange multiplier associated with a
-    :py:class:`~cooper.constraints.ConstraintGroup` based on "features" for each
+    :py:class:`~cooper.constraints.Constraint` based on "features" for each
     constraint. The multiplier is _implicitly_  represented by the features of its
     associated constraint as well as the computation that takes place in the
     :py:meth:`~cooper.multipliers.ImplicitMultiplier.forward` method.
diff --git a/cooper/optim/constrained_optimizers/alternating_optimizer.py b/cooper/optim/constrained_optimizers/alternating_optimizer.py
index 9bcafbd9..4a466aa7 100644
--- a/cooper/optim/constrained_optimizers/alternating_optimizer.py
+++ b/cooper/optim/constrained_optimizers/alternating_optimizer.py
@@ -64,17 +64,17 @@ def step(self):
         pass
 
     def update_penalty_coefficients(self, cmp_state: CMPState) -> None:
-        """Update the penalty coefficients of the constraint groups. Only the penalty
+        """Update the penalty coefficients of the constraints. Only the penalty
         coefficients associated with the ``FormulationType.AUGMENTED_LAGRANGIAN`` and
         constraints that ``contributes_to_dual_update`` are updated.
         """
-        for constraint_group, constraint_state in cmp_state.observed_constraints:
-            if constraint_group.formulation_type == FormulationType.AUGMENTED_LAGRANGIAN:
+        for constraint, constraint_state in cmp_state.observed_constraints:
+            if constraint.formulation_type == FormulationType.AUGMENTED_LAGRANGIAN:
                 # We might reach this point via an AugmetedLagrangianOptimizer acting
                 # on some constraints that do not use an Augmented Lagrangian formulation,
                 # so we do _not_ apply penalty coefficient updates to those.
                 if constraint_state.contributes_to_dual_update:
-                    constraint_group.update_penalty_coefficient(constraint_state=constraint_state)
+                    constraint.update_penalty_coefficient(constraint_state=constraint_state)
 
 
 class AlternatingPrimalDualOptimizer(BaseAlternatingOptimizer):
diff --git a/cooper/optim/constrained_optimizers/constrained_optimizer.py b/cooper/optim/constrained_optimizers/constrained_optimizer.py
index c36c24e2..08caefec 100644
--- a/cooper/optim/constrained_optimizers/constrained_optimizer.py
+++ b/cooper/optim/constrained_optimizers/constrained_optimizer.py
@@ -43,7 +43,7 @@ class ConstrainedOptimizer:
         dual_optimizers: Optimizer(s) for the dual variables (e.g. the Lagrange
             multipliers associated with the constraints). An iterable of
             ``torch.optim.Optimizer``\\s can be passed to handle the case of several
-            ``~cooper.constraints.ConstraintGroup``\\s. If dealing with an unconstrained
+            ``~cooper.constraints.Constraint``\\s. If dealing with an unconstrained
             problem, please use a
             :py:class:`~cooper.optim.cooper_optimizer.UnconstrainedOptimizer` instead.
 
diff --git a/cooper/optim/utils.py b/cooper/optim/utils.py
index aa5a8b42..e04d9a6e 100644
--- a/cooper/optim/utils.py
+++ b/cooper/optim/utils.py
@@ -61,7 +61,7 @@ def load_cooper_optimizer_from_state_dict(
 ):
     """Creates a Cooper optimizer and loads the state_dicts contained in a
     :py:class:`~cooper.optim.CooperOptimizerState` onto instantiated primal and dual
-    optimizers and constraint groups or multipliers.
+    optimizers and constraints or multipliers.
     """
 
     # Load primal optimizers
@@ -104,7 +104,7 @@ def load_cooper_optimizer_from_state_dict(
         for multiplier, multiplier_state in zip(multipliers, multiplier_states):
             multiplier.load_state_dict(multiplier_state)
 
-    # Since we have extracted the multiplier information above, we discard the constraint_groups below
+    # Since we have extracted the multiplier information above, we discard the constraints below
     return create_optimizer_from_kwargs(
         primal_optimizers=primal_optimizers,
         extrapolation=cooper_optimizer_state.extrapolation,
diff --git a/tests/formulations/test_augmented_lagrangian.py b/tests/formulations/test_augmented_lagrangian.py
index 4fb8bb9b..d7727cbd 100644
--- a/tests/formulations/test_augmented_lagrangian.py
+++ b/tests/formulations/test_augmented_lagrangian.py
@@ -1,6 +1,3 @@
-import os
-import tempfile
-
 import cooper_test_utils
 import pytest
 import testing_utils
@@ -221,67 +218,5 @@ def test_convergence_augmented_lagrangian(
         assert torch.allclose(param, exact_solution, atol=1e-3)
 
 
-def test_save_and_load_state_dict(alternation_type, Toy2dCMP_params_init, device):
-    params, primal_optimizers = cooper_test_utils.build_params_and_primal_optimizers(
-        use_multiple_primal_optimizers=False, params_init=Toy2dCMP_params_init
-    )
-
-    cmp, cooper_optimizer, penalty_coefficients, formulation_kwargs = setup_augmented_lagrangian_objects(
-        primal_optimizers=primal_optimizers, alternation_type=alternation_type, device=device
-    )
-
-    roll_kwargs = {"compute_cmp_state_fn": lambda: cmp.compute_cmp_state(params)}
-
-    for _ in range(10):
-        cooper_optimizer.roll(**roll_kwargs)
-
-    # Generate checkpoints after 10 steps of training
-    penalty_coefficient0_after10 = penalty_coefficients[0]().clone().detach()
-    penalty_coefficient1_after10 = penalty_coefficients[1]().clone().detach()
-    multiplier0_after10 = cmp.constraint_groups[0].multiplier().clone().detach()
-    multiplier1_after10 = cmp.constraint_groups[1].multiplier().clone().detach()
-
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        torch.save(cmp.constraint_groups[0].state_dict(), os.path.join(tmpdirname, "cg0.pt"))
-        torch.save(cmp.constraint_groups[1].state_dict(), os.path.join(tmpdirname, "cg1.pt"))
-
-        cg0_state_dict = torch.load(os.path.join(tmpdirname, "cg0.pt"))
-        cg1_state_dict = torch.load(os.path.join(tmpdirname, "cg1.pt"))
-
-    # Train for another 10 steps -- so a total of 20 steps
-    for _ in range(10):
-        cmp_state = cooper_optimizer.roll(**roll_kwargs)  # noqa: F841
-
-    # Reload from checkpoint at 10 steps
-    new_penalty_coefficient0 = cooper.multipliers.DensePenaltyCoefficient(torch.tensor(1.0, device=device))
-    new_penalty_coefficient1 = cooper.multipliers.DensePenaltyCoefficient(torch.tensor(1.0, device=device))
-    new_cmp = cooper_test_utils.Toy2dCMP(
-        use_ineq_constraints=True,
-        formulation_type=cooper.FormulationType.AUGMENTED_LAGRANGIAN,
-        penalty_coefficients=(new_penalty_coefficient0, new_penalty_coefficient1),
-        device=device,
-    )
-    new_cmp.constraint_groups[0].load_state_dict(cg0_state_dict)
-    new_cmp.constraint_groups[1].load_state_dict(cg1_state_dict)
-
-    # Ensure loaded value of the penalty coefficient matches that observe when creating
-    # the checkpoint
-    new_penalty_coefficient0_value = new_penalty_coefficient0().clone().detach()
-    new_penalty_coefficient1_value = new_penalty_coefficient1().clone().detach()
-    assert torch.allclose(new_penalty_coefficient0_value, penalty_coefficient0_after10)
-    assert torch.allclose(new_penalty_coefficient1_value, penalty_coefficient1_after10)
-
-    # Ensure loaded value of the multipliers matches that observe when creating the
-    # checkpoint
-    new_multiplier0_value = new_cmp.constraint_groups[0].multiplier().clone().detach()
-    new_multiplier1_value = new_cmp.constraint_groups[1].multiplier().clone().detach()
-    if new_multiplier0_value != 0:
-        assert not torch.allclose(new_multiplier0_value, cmp.constraint_groups[0].multiplier())
-    if new_multiplier1_value != 0:
-        assert not torch.allclose(new_multiplier1_value, cmp.constraint_groups[1].multiplier())
-    assert torch.allclose(new_multiplier0_value, multiplier0_after10)
-    assert torch.allclose(new_multiplier1_value, multiplier1_after10)
-
-
 # TODO(gallego-posada): Add a test to ensure IndexedPenaltyCoefficient works as expected
 # when used in an Augmented Lagrangian formulation.
diff --git a/tests/helpers/cooper_test_utils.py b/tests/helpers/cooper_test_utils.py
index 775e4d77..f3580792 100644
--- a/tests/helpers/cooper_test_utils.py
+++ b/tests/helpers/cooper_test_utils.py
@@ -66,7 +66,7 @@ def __init__(
 
         self.slack_variables = slack_variables
 
-        self.constraint_groups = []
+        self.constraints = []
         if self.use_ineq_constraints:
             for ix in range(2):
 
@@ -77,16 +77,16 @@ def __init__(
                     )
 
                 penalty_coefficient = penalty_coefficients[ix] if penalty_coefficients is not None else None
-                constraint_group = cooper.ConstraintGroup(
+                constraint = cooper.Constraint(
                     constraint_type=constraint_type,
                     formulation_type=formulation_type,
                     multiplier=multiplier,
                     penalty_coefficient=penalty_coefficient,
                     formulation_kwargs=formulation_kwargs,
                 )
-                self.constraint_groups.append(constraint_group)
+                self.constraints.append(constraint)
 
-        self.multipliers = [cg.multiplier for cg in self.constraint_groups if cg.multiplier is not None]
+        self.multipliers = [cg.multiplier for cg in self.constraints if cg.multiplier is not None]
 
     def analytical_gradients(self, params):
         """Returns the analytical gradients of the loss and constraints for a given
@@ -147,7 +147,7 @@ def compute_violations(self, params) -> cooper.CMPState:
             cg0_state = cooper.ConstraintState(violation=cg0_violation)
             cg1_state = cooper.ConstraintState(violation=cg1_violation)
 
-        observed_constraints = [(self.constraint_groups[0], cg0_state), (self.constraint_groups[1], cg1_state)]
+        observed_constraints = [(self.constraints[0], cg0_state), (self.constraints[1], cg1_state)]
 
         return cooper.CMPState(loss=None, observed_constraints=observed_constraints)
 
diff --git a/tests/test_checkpoint.py b/tests/test_checkpoint.py
index d610aa88..911ad649 100644
--- a/tests/test_checkpoint.py
+++ b/tests/test_checkpoint.py
@@ -7,7 +7,6 @@
 
 # Import basic closure example from helpers
 import cooper_test_utils
-import pytest
 import torch
 
 import cooper
@@ -106,124 +105,3 @@ def test_checkpoint(Toy2dCMP_problem_properties, Toy2dCMP_params_init, use_multi
     assert cooper.utils.validate_state_dicts(loaded_model.state_dict(), model_state_dict_200)
     # These are ConstrainedOptimizerState objects and not dicts
     assert loaded_constrained_optimizer.state_dict() == constrained_optimizer_state_dict_200
-
-
-@pytest.mark.parametrize(
-    "formulation_type",
-    [
-        cooper.FormulationType.PENALTY,
-        cooper.FormulationType.QUADRATIC_PENALTY,
-        cooper.FormulationType.LAGRANGIAN,
-        cooper.FormulationType.AUGMENTED_LAGRANGIAN,
-    ],
-)
-def test_formulation_checkpoint(formulation_type, Toy2dCMP_params_init, device):
-    formulation_class = formulation_type.value
-
-    if formulation_type == cooper.FormulationType.PENALTY:
-        constraint_type = cooper.ConstraintType.PENALTY
-    else:
-        constraint_type = cooper.ConstraintType.INEQUALITY
-
-    has_multiplier = formulation_class.expects_multiplier
-    has_penalties = formulation_class.expects_penalty_coefficient
-
-    params, primal_optimizers = cooper_test_utils.build_params_and_primal_optimizers(
-        use_multiple_primal_optimizers=False, params_init=Toy2dCMP_params_init
-    )
-
-    def make_fresh_penalty_coefficients(has_penalties):
-        if has_penalties:
-            penalty_coefficient0 = cooper.multipliers.DensePenaltyCoefficient(torch.tensor(1.0, device=device))
-            penalty_coefficient1 = cooper.multipliers.DensePenaltyCoefficient(torch.tensor(1.0, device=device))
-            return [penalty_coefficient0, penalty_coefficient1]
-        else:
-            return None
-
-    penalty_coefficients = make_fresh_penalty_coefficients(has_penalties=has_penalties)
-    cmp = cooper_test_utils.Toy2dCMP(
-        use_ineq_constraints=True,
-        formulation_type=formulation_type,
-        penalty_coefficients=penalty_coefficients,
-        constraint_type=constraint_type,
-        device=device,
-    )
-
-    cooper_optimizer = cooper_test_utils.build_cooper_optimizer_for_Toy2dCMP(
-        primal_optimizers=primal_optimizers,
-        multipliers=cmp.multipliers,
-        extrapolation=False,
-        alternation_type=cooper.optim.AlternationType.FALSE,
-    )
-
-    roll_kwargs = {"compute_cmp_state_fn": lambda: cmp.compute_cmp_state(params)}
-
-    for _ in range(10):
-        cooper_optimizer.roll(**roll_kwargs)
-        if has_penalties:
-            # Multiply the penalty coefficients by 1.01
-            for _penalty_coefficient in penalty_coefficients:
-                _penalty_coefficient.value = _penalty_coefficient() * 1.01
-
-    # Generate checkpoints after 10 steps of training
-    if has_penalties:
-        penalty_coefficient0_after10 = penalty_coefficients[0]().clone().detach()
-        penalty_coefficient1_after10 = penalty_coefficients[1]().clone().detach()
-
-    if has_multiplier:
-        multiplier0_after10 = cmp.constraint_groups[0].multiplier().clone().detach()
-        multiplier1_after10 = cmp.constraint_groups[1].multiplier().clone().detach()
-
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        torch.save(cmp.constraint_groups[0].state_dict(), os.path.join(tmpdirname, "cg0.pt"))
-        torch.save(cmp.constraint_groups[1].state_dict(), os.path.join(tmpdirname, "cg1.pt"))
-
-        cg0_state_dict = torch.load(os.path.join(tmpdirname, "cg0.pt"))
-        cg1_state_dict = torch.load(os.path.join(tmpdirname, "cg1.pt"))
-
-    # Train for another 10 steps
-    for _ in range(10):
-        cooper_optimizer.roll(**roll_kwargs)
-        if has_penalties:
-            # Multiply the penalty coefficients by 1.01
-            for penalty_coefficient in penalty_coefficients:
-                penalty_coefficient.value = penalty_coefficient() * 1.01
-
-    # Reload from checkpoint
-    new_penalty_coefficients = make_fresh_penalty_coefficients(has_penalties=has_penalties)
-
-    new_cmp = cooper_test_utils.Toy2dCMP(
-        use_ineq_constraints=True,
-        formulation_type=formulation_type,
-        penalty_coefficients=new_penalty_coefficients,
-        constraint_type=constraint_type,
-        device=device,
-    )
-    new_cmp.constraint_groups[0].load_state_dict(cg0_state_dict)
-    new_cmp.constraint_groups[1].load_state_dict(cg1_state_dict)
-
-    if has_penalties:
-        # The loaded penalty coefficients come from 10 steps of training, so they should be
-        # different from the current ones
-        new_penalty_coefficient0_value = new_penalty_coefficients[0]().clone().detach()
-        new_penalty_coefficient1_value = new_penalty_coefficients[1]().clone().detach()
-        assert not torch.allclose(new_penalty_coefficient0_value, penalty_coefficients[0]())
-        assert not torch.allclose(new_penalty_coefficient1_value, penalty_coefficients[1]())
-
-        # They should, however, be the same as the ones recorded before the checkpoint
-        assert torch.allclose(new_penalty_coefficient0_value, penalty_coefficient0_after10)
-        assert torch.allclose(new_penalty_coefficient1_value, penalty_coefficient1_after10)
-
-    if has_multiplier:
-        # Similar story for the multipliers
-        new_multiplier0_value = new_cmp.constraint_groups[0].multiplier().clone().detach()
-        new_multiplier1_value = new_cmp.constraint_groups[1].multiplier().clone().detach()
-
-        # Ignoring the case where the multiplier is 0 as both may match simply because
-        # the run is feasible
-        if new_multiplier0_value != 0:
-            assert not torch.allclose(new_multiplier0_value, cmp.constraint_groups[0].multiplier())
-        if new_multiplier1_value != 0:
-            assert not torch.allclose(new_multiplier1_value, cmp.constraint_groups[1].multiplier())
-        assert torch.allclose(new_multiplier0_value, multiplier0_after10)
-        assert torch.allclose(new_multiplier1_value, multiplier1_after10)
diff --git a/tests/test_cmp.py b/tests/test_cmp.py
index 9958c81c..934fb931 100644
--- a/tests/test_cmp.py
+++ b/tests/test_cmp.py
@@ -65,12 +65,12 @@ def evaluate_constraints(params) -> list[cooper.ConstraintState]:
         multiplier0 = cooper.multipliers.DenseMultiplier(
             constraint_type=constraint_type, num_constraints=1, device=device
         )
-        cg0 = cooper.ConstraintGroup(**default_cg_kwargs, multiplier=multiplier0)
+        cg0 = cooper.Constraint(**default_cg_kwargs, multiplier=multiplier0)
 
         multiplier1 = cooper.multipliers.DenseMultiplier(
             constraint_type=constraint_type, num_constraints=1, device=device
         )
-        cg1 = cooper.ConstraintGroup(**default_cg_kwargs, multiplier=multiplier1)
+        cg1 = cooper.Constraint(**default_cg_kwargs, multiplier=multiplier1)
 
         multipliers = [multiplier0, multiplier1]
     else:
diff --git a/tests/test_extrapolation.py b/tests/test_extrapolation.py
index 12c357d7..01577360 100644
--- a/tests/test_extrapolation.py
+++ b/tests/test_extrapolation.py
@@ -77,7 +77,7 @@ def test_manual_extrapolation(Toy2dCMP_problem_properties, Toy2dCMP_params_init,
     cooper_optimizer.roll(compute_cmp_state_fn)
 
     assert torch.allclose(params, mktensor([5.8428e-04, -9.2410e-01]))
-    multiplier_values = [constraint.multiplier() for constraint in cmp.constraint_groups]
+    multiplier_values = [constraint.multiplier() for constraint in cmp.constraints]
     for multiplier, target_value in zip(multiplier_values, [0.0388, 0.0]):
         if not torch.allclose(multiplier, mktensor([target_value]), atol=1e-4):
             breakpoint()
diff --git a/tests/test_held_out_constraints.py b/tests/test_held_out_constraints.py
index 0d838620..727fa04c 100644
--- a/tests/test_held_out_constraints.py
+++ b/tests/test_held_out_constraints.py
@@ -37,7 +37,7 @@ def __init__(self, use_constraint_surrogate=False, device=None, observe_probabil
         self.multiplier = cooper.multipliers.IndexedMultiplier(
             constraint_type=cooper.ConstraintType.INEQUALITY, num_constraints=2, device=device
         )
-        self.constraint_group = cooper.ConstraintGroup(
+        self.constraint = cooper.Constraint(
             constraint_type=cooper.ConstraintType.INEQUALITY,
             formulation_type=cooper.FormulationType.LAGRANGIAN,
             multiplier=self.multiplier,
@@ -112,7 +112,7 @@ def compute_cmp_state(self, params):
             strict_constraint_features=strict_constraint_features,
         )
 
-        return cooper.CMPState(loss=loss, observed_constraints=[(self.constraint_group, constraint_state)])
+        return cooper.CMPState(loss=loss, observed_constraints=[(self.constraint, constraint_state)])
 
 
 @pytest.fixture(params=[0.1, 0.5, 0.7, 1.0])
@@ -134,7 +134,7 @@ def test_manual_heldout_constraints(Toy2dCMP_problem_properties, Toy2dCMP_params
     cmp = RandomConstraintsToy2dCMP(
         use_constraint_surrogate=True, device=device, observe_probability=observe_probability
     )
-    multipliers = cmp.constraint_group.multiplier
+    multipliers = cmp.constraint.multiplier
 
     primal_lr, dual_lr = 1e-2, 1e-2
     params, primal_optimizers = cooper_test_utils.build_params_and_primal_optimizers(
diff --git a/tutorials/scripts/plot_gaussian_mixture.py b/tutorials/scripts/plot_gaussian_mixture.py
index e6472056..a1f9bd0d 100644
--- a/tutorials/scripts/plot_gaussian_mixture.py
+++ b/tutorials/scripts/plot_gaussian_mixture.py
@@ -109,7 +109,7 @@ def __init__(self, use_strict_constraints: bool = False, constraint_level: float
 
         constraint_type = constraint_type = cooper.ConstraintType.INEQUALITY
         self.multiplier = cooper.multipliers.DenseMultiplier(constraint_type=constraint_type, num_constraints=1)
-        self.constraint_group = cooper.ConstraintGroup(
+        self.constraint = cooper.Constraint(
             constraint_type=constraint_type,
             formulation_type=cooper.FormulationType.LAGRANGIAN,
             multiplier=self.multiplier,
@@ -139,7 +139,7 @@ def compute_cmp_state(self, model, inputs, targets):
             strict_violation = self.constraint_level - prop_0
 
         constraint_state = cooper.ConstraintState(violation=differentiable_violation, strict_violation=strict_violation)
-        return cooper.CMPState(loss=loss, observed_constraints=[(self.constraint_group, constraint_state)])
+        return cooper.CMPState(loss=loss, observed_constraints=[(self.constraint, constraint_state)])
 
 
 def train(problem_name, inputs, targets, num_iters=5000, lr=1e-2, constraint_level=0.7):
diff --git a/tutorials/scripts/plot_infrequent_true_constraint.py b/tutorials/scripts/plot_infrequent_true_constraint.py
index 8a2f6f20..82a4e017 100644
--- a/tutorials/scripts/plot_infrequent_true_constraint.py
+++ b/tutorials/scripts/plot_infrequent_true_constraint.py
@@ -89,12 +89,12 @@ def __init__(self, y: torch.Tensor, z: torch.Tensor, constraint_level: float = 1
         self.r = min(y.shape[0], z.shape[0])
         self.constraint_level = constraint_level
 
-        # Creating a constraint group with a single constraint
+        # Creating a constraint with a single constraint
         constraint_type = cooper.ConstraintType.EQUALITY
         self.multiplier = cooper.multipliers.DenseMultiplier(
             constraint_type=constraint_type, num_constraints=1, device=DEVICE
         )
-        self.constraint = cooper.ConstraintGroup(
+        self.constraint = cooper.Constraint(
             constraint_type=constraint_type,
             formulation_type=cooper.FormulationType.LAGRANGIAN,
             multiplier=self.multiplier,
diff --git a/tutorials/scripts/plot_max_entropy.py b/tutorials/scripts/plot_max_entropy.py
index 256c64aa..d1d985a9 100644
--- a/tutorials/scripts/plot_max_entropy.py
+++ b/tutorials/scripts/plot_max_entropy.py
@@ -39,8 +39,8 @@ def __init__(self, target_mean: float) -> None:
         }
         mean_multiplier = cooper.multipliers.DenseMultiplier(**default_multiplier_kwargs, num_constraints=1)
         sum_multiplier = cooper.multipliers.DenseMultiplier(**default_multiplier_kwargs, num_constraints=1)
-        self.mean_constraint = cooper.ConstraintGroup(**default_cg_kwargs, multiplier=mean_multiplier)
-        self.sum_constraint = cooper.ConstraintGroup(**default_cg_kwargs, multiplier=sum_multiplier)
+        self.mean_constraint = cooper.Constraint(**default_cg_kwargs, multiplier=mean_multiplier)
+        self.sum_constraint = cooper.Constraint(**default_cg_kwargs, multiplier=sum_multiplier)
 
         self.multipliers = {"mean": mean_multiplier, "sum": sum_multiplier}
         self.all_constraints = [self.sum_constraint, self.mean_constraint]
@@ -66,7 +66,7 @@ def compute_cmp_state(self, log_probs: torch.Tensor) -> cooper.CMPState:
         return cooper.CMPState(loss=-entropy, observed_constraints=observed_constraints)
 
 
-# Define the problem with the constraint groups
+# Define the problem with the constraintss
 cmp = MaximumEntropy(target_mean=4.5)
 
 # Define the primal parameters and optimizer
diff --git a/tutorials/scripts/plot_max_entropy_augmented_lagrangian.py b/tutorials/scripts/plot_max_entropy_augmented_lagrangian.py
index 0b97c5e8..d513fab4 100644
--- a/tutorials/scripts/plot_max_entropy_augmented_lagrangian.py
+++ b/tutorials/scripts/plot_max_entropy_augmented_lagrangian.py
@@ -39,14 +39,14 @@ def __init__(self, target_mean: float) -> None:
         mean_penalty_coefficient = cooper.multipliers.DensePenaltyCoefficient(torch.tensor(1.0, device=DEVICE))
         sum_multiplier = cooper.multipliers.DenseMultiplier(**default_multiplier_kwargs, num_constraints=1)
 
-        self.mean_constraint = cooper.ConstraintGroup(
+        self.mean_constraint = cooper.Constraint(
             constraint_type=cooper.ConstraintType.EQUALITY,
             formulation_type=cooper.FormulationType.AUGMENTED_LAGRANGIAN,
             multiplier=mean_multiplier,
             penalty_coefficient=mean_penalty_coefficient,
             formulation_kwargs={"penalty_growth_factor": 1.001},
         )
-        self.sum_constraint = cooper.ConstraintGroup(
+        self.sum_constraint = cooper.Constraint(
             constraint_type=cooper.ConstraintType.EQUALITY,
             formulation_type=cooper.FormulationType.LAGRANGIAN,
             multiplier=sum_multiplier,
@@ -77,7 +77,7 @@ def compute_cmp_state(self, log_probs: torch.Tensor) -> cooper.CMPState:
         return cooper.CMPState(loss=-entropy, observed_constraints=observed_constraints)
 
 
-# Define the problem with the constraint groups
+# Define the problem with the constraints
 cmp = MaximumEntropy(target_mean=4.5)
 
 # Define the primal parameters and optimizer
diff --git a/tutorials/scripts/plot_min_norm.py b/tutorials/scripts/plot_min_norm.py
index 5073c0b8..b04c0704 100644
--- a/tutorials/scripts/plot_min_norm.py
+++ b/tutorials/scripts/plot_min_norm.py
@@ -39,7 +39,7 @@
 from torch.utils.data.sampler import BatchSampler, RandomSampler
 
 import cooper
-from cooper import CMPState, ConstraintGroup, ConstraintState, ConstraintType, FormulationType
+from cooper import CMPState, Constraint, ConstraintState, ConstraintType, FormulationType
 
 style_utils.set_plot_style()
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -112,14 +112,14 @@ class MinNormWithLinearConstraints(cooper.ConstrainedMinimizationProblem):
     """Min-norm problem with linear equality constraints."""
 
     def __init__(self, num_equations: int) -> None:
-        # Create a constraint group for the equality constraints. We use a sparse constraint
+        # Create a constraint for the equality constraints. We use a sparse constraint
         # to be able to update the multipliers only with the observed constraints (i.e. the
         # ones that are active in the current batch)
         constraint_type = ConstraintType.EQUALITY
         self.multiplier = cooper.multipliers.IndexedMultiplier(
             constraint_type=constraint_type, num_constraints=num_equations, device=DEVICE
         )
-        self.eq_constraint = ConstraintGroup(
+        self.eq_constraint = Constraint(
             constraint_type=constraint_type, formulation_type=FormulationType.LAGRANGIAN, multiplier=self.multiplier
         )
         super().__init__()
@@ -145,7 +145,7 @@ def run_experiment(
     linear_system_dataset = LinearConstraintDataset(A, b)
     constraint_loader = instantiate_dataloader(dataset=linear_system_dataset, batch_size=batch_size, seed=exp_seed)
 
-    # Define the problem with the constraint group
+    # Define the problem with the constraint
     cmp = MinNormWithLinearConstraints(num_equations=num_equations)
 
     # Randomly initialize the primal variable and instantiate the optimizers
diff --git a/tutorials/scripts/plot_mnist_logistic_regression.py b/tutorials/scripts/plot_mnist_logistic_regression.py
index 895e5783..5b67d4a9 100644
--- a/tutorials/scripts/plot_mnist_logistic_regression.py
+++ b/tutorials/scripts/plot_mnist_logistic_regression.py
@@ -43,11 +43,11 @@
 
 primal_optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, amsgrad=True)
 
-# Define the constraint group for the norm constraint
+# Define the constraint for the norm constraint
 multiplier = cooper.multipliers.DenseMultiplier(
     constraint_type=cooper.ConstraintType.INEQUALITY, num_constraints=1, device=DEVICE
 )
-norm_constraint = cooper.ConstraintGroup(
+norm_constraint = cooper.Constraint(
     constraint_type=cooper.ConstraintType.INEQUALITY,
     formulation_type=cooper.FormulationType.LAGRANGIAN,
     multiplier=multiplier,