Replace Elite and EliteBatch with dicts (#397)

## Description  This PR removes the Elite and EliteBatch namedtuples from the public API; instead, we create an Elite and EliteBatch namedtuple on the fly in each archive. This allows us to support custom field names in each namedtuple in the future. In creating this PR, I was considering whether to create a custom namedtuple for each archive (when the archive is constructed, similar to how pandas has itertuples), or to use a dict. These were the pros and cons I came up with of dicts over such namedtuples: Pros: - This is clearly backwards-incompatible, so users will know something has broken. The old tuple unpacking behavior will definitely not work here, and calling the attributes also will not work. - Dicts are less finicky than namedtuples, in that there are no attributes to manage. - Dicts are already a common data structure; people already know how to get the keys etc - It is easier to handle retrieving just a couple of fields. In such a case, we can just add the required keys to the dict. In contrast, we would have to set some fields to None in a namedtuple - We no longer will have a name conflict with the index method of namedtuples Cons: - The old unpacking logic will no longer work - Getting attributes will no longer work - Harder to tell which things are batch because it’s not in the name, although I think it’s usually clear from the context ## TODO  - [x] Replace all usages - [x] Double check for usage of Elite and EliteBatch ## Questions  ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go
icaros-usc · Nov 3, 2023 · 22d40ce · 22d40ce
1 parent 9484717
commit 22d40ce
Show file tree

Hide file tree

Showing 18 changed files with 238 additions and 300 deletions.
diff --git a/HISTORY.md b/HISTORY.md
@@ -6,6 +6,8 @@
 
 #### API
 
+- **Backwards-incompatible:** Replace Elite and EliteBatch with dicts
+  ({pr}`397`)
 - **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in
   `as_pandas` ({pr}`396`)
 - Add ArrayStore data structure ({pr}`395`)

diff --git a/ribs/archives/__init__.py b/ribs/archives/__init__.py
@@ -8,7 +8,7 @@
 The archives in this subpackage are arranged in a one-layer hierarchy, with all
 archives inheriting from :class:`~ribs.archives.ArchiveBase`. This subpackage
 also contains several utilities associated with the archives, such as
-:class:`~ribs.archives.Elite` and :class:`~ribs.archives.ArchiveDataFrame`.
+:class:`~ribs.archives.ArchiveDataFrame`.
 
 .. autosummary::
     :toctree:
@@ -19,8 +19,6 @@
     ribs.archives.ArchiveBase
     ribs.archives.ArrayStore
     ribs.archives.AddStatus
-    ribs.archives.Elite
-    ribs.archives.EliteBatch
     ribs.archives.ArchiveDataFrame
     ribs.archives.ArchiveStats
     ribs.archives.CQDScoreResult
@@ -32,7 +30,6 @@
 from ribs.archives._array_store import ArrayStore
 from ribs.archives._cqd_score_result import CQDScoreResult
 from ribs.archives._cvt_archive import CVTArchive
-from ribs.archives._elite import Elite, EliteBatch
 from ribs.archives._grid_archive import GridArchive
 from ribs.archives._sliding_boundaries_archive import SlidingBoundariesArchive
 
@@ -43,7 +40,6 @@
     "ArchiveBase",
     "ArrayStore",
     "AddStatus",
-    "Elite",
     "ArchiveDataFrame",
     "ArchiveStats",
     "CQDScoreResult",

diff --git a/ribs/archives/_archive_base.py b/ribs/archives/_archive_base.py
@@ -11,7 +11,6 @@
 from ribs.archives._archive_data_frame import ArchiveDataFrame
 from ribs.archives._archive_stats import ArchiveStats
 from ribs.archives._cqd_score_result import CQDScoreResult
-from ribs.archives._elite import Elite, EliteBatch
 
 _ADD_WARNING = (" Note that starting in pyribs 0.5.0, add() takes in a "
                 "batch of solutions unlike in pyribs 0.4.0, where add() "
@@ -45,13 +44,13 @@ def __next__(self):
 
         idx = self.archive._occupied_indices[self.iter_idx]
         self.iter_idx += 1
-        return Elite(
-            self.archive._solution_arr[idx],
-            self.archive._objective_arr[idx],
-            self.archive._measures_arr[idx],
-            idx,
-            self.archive._metadata_arr[idx],
-        )
+        return {
+            "solution": self.archive._solution_arr[idx],
+            "objective": self.archive._objective_arr[idx],
+            "measures": self.archive._measures_arr[idx],
+            "index": idx,
+            "metadata": self.archive._metadata_arr[idx],
+        }
 
 
 class ArchiveBase(ABC):  # pylint: disable = too-many-instance-attributes
@@ -273,7 +272,7 @@ def stats(self):
 
     @property
     def best_elite(self):
-        """:class:`Elite`: The elite with the highest objective in the archive.
+        """dict: The elite with the highest objective in the archive.
 
         None if there are no elites in the archive.
 
@@ -299,15 +298,15 @@ def __len__(self):
         return self._num_occupied
 
     def __iter__(self):
-        """Creates an iterator over the :class:`Elite`'s in the archive.
+        """Creates an iterator over the elites in the archive.
 
         Example:
 
             ::
 
                 for elite in archive:
-                    elite.sol
-                    elite.obj
+                    elite["solution"]
+                    elite["objective"]
                     ...
         """
         return ArchiveIterator(self)
@@ -687,13 +686,13 @@ def add(self,
 
         if self._stats.obj_max is None or max_obj_insert > self._stats.obj_max:
             new_obj_max = max_obj_insert
-            self._best_elite = Elite(
-                readonly(np.copy(solution_batch_insert[max_idx])),
-                objective_batch_insert[max_idx],
-                readonly(np.copy(measures_batch_insert[max_idx])),
-                index_batch_insert[max_idx],
-                metadata_batch_insert[max_idx],
-            )
+            self._best_elite = {
+                "solution": readonly(np.copy(solution_batch_insert[max_idx])),
+                "objective": objective_batch_insert[max_idx],
+                "measures": readonly(np.copy(measures_batch_insert[max_idx])),
+                "index": index_batch_insert[max_idx],
+                "metadata": metadata_batch_insert[max_idx],
+            }
         else:
             new_obj_max = self._stats.obj_max
 
@@ -811,13 +810,13 @@ def add_single(self, solution, objective, measures, metadata=None):
 
             if self._stats.obj_max is None or objective > self._stats.obj_max:
                 new_obj_max = objective
-                self._best_elite = Elite(
-                    readonly(np.copy(self._solution_arr[index])),
-                    objective,
-                    readonly(np.copy(self._measures_arr[index])),
-                    index,
-                    metadata,
-                )
+                self._best_elite = {
+                    "solution": readonly(np.copy(self._solution_arr[index])),
+                    "objective": objective,
+                    "measures": readonly(np.copy(self._measures_arr[index])),
+                    "index": index,
+                    "metadata": metadata,
+                }
             else:
                 new_obj_max = self._stats.obj_max
 
@@ -836,40 +835,33 @@ def retrieve(self, measures_batch):
         """Retrieves the elites with measures in the same cells as the measures
         specified.
 
-        This method operates in batch, i.e. it takes in a batch of measures and
-        outputs an :namedtuple:`EliteBatch`. Since :namedtuple:`EliteBatch` is a
-        namedtuple, it can be unpacked::
-
-            solution_batch, objective_batch, measures_batch, \\
-                index_batch, metadata_batch = archive.retrieve(...)
+        This method operates in batch, i.e., it takes in a batch of measures and
+        outputs the batched data for the elites::
 
-        Or the fields may be accessed by name::
+            elites = archive.retrieve(...)
+            elites["solution"]  # Shape: (batch_size, solution_dim)
+            elites["objective"]
+            elites["measures"]
+            elites["index"]
+            elites["metadata"]
 
-            elite_batch = archive.retrieve(...)
-            elite_batch.solution_batch
-            elite_batch.objective_batch
-            elite_batch.measures_batch
-            elite_batch.index_batch
-            elite_batch.metadata_batch
-
-        If the cell associated with ``measures_batch[i]`` has an elite in it,
-        then ``elite_batch.solution_batch[i]``,
-        ``elite_batch.objective_batch[i]``, ``elite_batch.measures_batch[i]``,
-        ``elite_batch.index_batch[i]``, and ``elite_batch.metadata_batch[i]``
-        will be set to the properties of the elite. Note that
-        ``elite_batch.measures_batch[i]`` may not be equal to
-        ``measures_batch[i]`` since the measures only need to be in the same
-        archive cell.
+        If the cell associated with ``elites["measures"][i]`` has an elite in
+        it, then ``elites["solution"][i]``, ``elites["objective"][i]``,
+        ``elites["measures"][i]``, ``elites["index"][i]``, and
+        ``elites["metadata"][i]`` will be set to the properties of the elite.
+        Note that ``elites["measures"][i]`` may not be equal to the
+        ``measures_batch[i]`` passed as an argument, since the measures only
+        need to be in the same archive cell.
 
         If the cell associated with ``measures_batch[i]`` *does not* have any
         elite in it, then the corresponding outputs are set to empty values --
         namely:
 
-        * ``elite_batch.solution_batch[i]`` will be an array of NaN
-        * ``elite_batch.objective_batch[i]`` will be NaN
-        * ``elite_batch.measures_batch[i]`` will be an array of NaN
-        * ``elite_batch.index_batch[i]`` will be -1
-        * ``elite_batch.metadata_batch[i]`` will be None
+        * ``elites["solution"][i]`` will be an array of NaN
+        * ``elites["objective"][i]`` will be NaN
+        * ``elites["measures"][i]`` will be an array of NaN
+        * ``elites["index"][i]`` will be -1
+        * ``elites["metadata"][i]`` will be None
 
         If you need to retrieve a *single* elite associated with some measures,
         consider using :meth:`retrieve_single`.
@@ -878,7 +870,7 @@ def retrieve(self, measures_batch):
             measures_batch (array-like): (batch_size, :attr:`measure_dim`)
                 array of coordinates in measure space.
         Returns:
-            EliteBatch: See above.
+            dict: See above.
         Raises:
             ValueError: ``measures_batch`` is not of shape (batch_size,
                 :attr:`measure_dim`).
@@ -893,63 +885,67 @@ def retrieve(self, measures_batch):
         occupied_batch = self._occupied_arr[index_batch]
         expanded_occupied_batch = occupied_batch[:, None]
 
-        return EliteBatch(
-            solution_batch=readonly(
-                # For each occupied_batch[i], this np.where selects
-                # self._solution_arr[index_batch][i] if occupied_batch[i] is
-                # True. Otherwise, it uses the alternate value (a solution
-                # array consisting of np.nan).
-                np.where(
-                    expanded_occupied_batch,
-                    self._solution_arr[index_batch],
-                    np.full(self._solution_dim, np.nan),
-                )),
-            objective_batch=readonly(
-                np.where(
-                    occupied_batch,
-                    self._objective_arr[index_batch],
-                    # Here the alternative is just a scalar np.nan.
-                    np.nan,
-                )),
-            measures_batch=readonly(
-                np.where(
-                    expanded_occupied_batch,
-                    self._measures_arr[index_batch],
-                    # And here it is a measures array of np.nan.
-                    np.full(self._measure_dim, np.nan),
-                )),
-            index_batch=readonly(
-                np.where(
-                    occupied_batch,
-                    index_batch,
-                    # Indices must be integers, so np.nan would not work, hence
-                    # we use -1.
-                    -1,
-                )),
-            metadata_batch=readonly(
-                np.where(
-                    occupied_batch,
-                    self._metadata_arr[index_batch],
-                    None,
-                )),
-        )
+        return {
+            "solution":
+                readonly(
+                    # For each occupied_batch[i], this np.where selects
+                    # self._solution_arr[index_batch][i] if occupied_batch[i] is
+                    # True. Otherwise, it uses the alternate value (a solution
+                    # array consisting of np.nan).
+                    np.where(
+                        expanded_occupied_batch,
+                        self._solution_arr[index_batch],
+                        np.full(self._solution_dim, np.nan),
+                    )),
+            "objective":
+                readonly(
+                    np.where(
+                        occupied_batch,
+                        self._objective_arr[index_batch],
+                        # Here the alternative is just a scalar np.nan.
+                        np.nan,
+                    )),
+            "measures":
+                readonly(
+                    np.where(
+                        expanded_occupied_batch,
+                        self._measures_arr[index_batch],
+                        # And here it is a measures array of np.nan.
+                        np.full(self._measure_dim, np.nan),
+                    )),
+            "index":
+                readonly(
+                    np.where(
+                        occupied_batch,
+                        index_batch,
+                        # Indices must be integers, so np.nan would not work,
+                        # hence we use -1.
+                        -1,
+                    )),
+            "metadata":
+                readonly(
+                    np.where(
+                        occupied_batch,
+                        self._metadata_arr[index_batch],
+                        None,
+                    )),
+        }
 
     def retrieve_single(self, measures):
         """Retrieves the elite with measures in the same cell as the measures
         specified.
 
         While :meth:`retrieve` takes in a *batch* of measures, this method takes
-        in the measures for only *one* solution and returns a single
-        :namedtuple:`Elite`.
+        in the measures for only *one* solution and returns a dict with single
+        entries.
 
         Args:
             measures (array-like): (:attr:`measure_dim`,) array of measures.
         Returns:
             If there is an elite with measures in the same cell as the measures
-            specified, then this method returns an :namedtuple:`Elite` where all
-            the fields hold the info of that elite. Otherwise, this method
-            returns an :namedtuple:`Elite` filled with the same "empty" values
-            described in :meth:`retrieve`.
+            specified, then this method returns dict where all the fields hold
+            the info of the elite. Otherwise, this method returns a dict filled
+            with the same "empty" values described in :meth:`retrieve`.
         Raises:
             ValueError: ``measures`` is not of shape (:attr:`measure_dim`,).
             ValueError: ``measures`` has non-finite values (inf or NaN).
@@ -958,14 +954,10 @@ def retrieve_single(self, measures):
         check_1d_shape(measures, "measures", self.measure_dim, "measure_dim")
         check_finite(measures, "measures")
 
-        elite_batch = self.retrieve(measures[None])
-        return Elite(
-            elite_batch.solution_batch[0],
-            elite_batch.objective_batch[0],
-            elite_batch.measures_batch[0],
-            elite_batch.index_batch[0],
-            elite_batch.metadata_batch[0],
-        )
+        return {
+            field: arr[0]
+            for field, arr in self.retrieve(measures[None]).items()
+        }
 
     def sample_elites(self, n):
         """Randomly samples elites from the archive.
@@ -974,23 +966,19 @@ def sample_elites(self, n):
         sample is done independently, so elites may be repeated in the sample.
         Additional sampling methods may be supported in the future.
 
-        Since :namedtuple:`EliteBatch` is a namedtuple, the result can be
-        unpacked (here we show how to ignore some of the fields)::
-
-            solution_batch, objective_batch, measures_batch, *_ = \\
-                archive.sample_elites(32)
+        Example:
 
-        Or the fields may be accessed by name::
+            ::
 
-            elite = archive.sample_elites(16)
-            elite.solution_batch
-            elite.objective_batch
-            ...
+                elites = archive.sample_elites(16)
+                elites["solution"]  # Shape: (16, solution_dim)
+                elites["objective"]
+                ...
 
         Args:
             n (int): Number of elites to sample.
         Returns:
-            EliteBatch: A batch of elites randomly selected from the archive.
+            dict: Holds a batch of elites randomly selected from the archive.
         Raises:
             IndexError: The archive is empty.
         """
@@ -1000,13 +988,13 @@ def sample_elites(self, n):
         random_indices = self._rng.integers(self._num_occupied, size=n)
         selected_indices = self._occupied_indices[random_indices]
 
-        return EliteBatch(
-            readonly(self._solution_arr[selected_indices]),
-            readonly(self._objective_arr[selected_indices]),
-            readonly(self._measures_arr[selected_indices]),
-            readonly(selected_indices),
-            readonly(self._metadata_arr[selected_indices]),
-        )
+        return {
+            "solution": readonly(self._solution_arr[selected_indices]),
+            "objective": readonly(self._objective_arr[selected_indices]),
+            "measures": readonly(self._measures_arr[selected_indices]),
+            "index": readonly(selected_indices),
+            "metadata": readonly(self._metadata_arr[selected_indices]),
+        }
 
     def as_pandas(self, include_solutions=True, include_metadata=False):
         """Converts the archive into an :class:`ArchiveDataFrame` (a child class