Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add statistics to archives #100

Merged
merged 20 commits into from Jul 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
118 changes: 95 additions & 23 deletions ribs/archives/_archive_base.py
Expand Up @@ -110,7 +110,7 @@ def __init__(self, archive):
self.state = archive._state.copy()

def __iter__(self):
"""This is the iterator, so return self."""
"""This is the iterator, so it returns itself."""
return self

def __next__(self):
Expand All @@ -135,7 +135,9 @@ def __next__(self):
)


class ArchiveBase(ABC): # pylint: disable = too-many-instance-attributes
class ArchiveBase(
ABC
): # pylint: disable = too-many-instance-attributes, too-many-public-methods
"""Base class for archives.

This class assumes all archives use a fixed-size container with bins that
Expand Down Expand Up @@ -261,6 +263,12 @@ def __init__(self, storage_dims, behavior_dim, seed=None, dtype=np.float64):
# Tracks archive modifications by counting calls to clear() and add().
self._state = None

# Stats.
self._coverage = None
self._qd_score = None
self._obj_max = None
self._obj_mean = None

self._dtype = self._parse_dtype(dtype)

@staticmethod
Expand All @@ -284,22 +292,24 @@ def _parse_dtype(dtype):

raise ValueError("Unsupported dtype. Must be np.float32 or np.float64")

## "Housekeeping" attributes ##

@property
def initialized(self):
"""Whether the archive has been initialized by a call to
:meth:`initialize`"""
return self._initialized

@property
def empty(self):
"""bool: Whether the archive is empty."""
return not self._occupied_indices

@property
def bins(self):
"""int: Total number of bins in the archive."""
return self._bins

@property
def empty(self):
"""bool: Whether the archive is empty."""
return not self._occupied_indices

@property
def behavior_dim(self):
"""int: Dimensionality of the behavior space."""
Expand All @@ -311,6 +321,14 @@ def solution_dim(self):
"""int: Dimensionality of the solutions in the archive."""
return self._solution_dim

@property
def dtype(self):
"""data-type: The dtype of the solutions, objective values, and behavior
values."""
return self._dtype

## Data attributes ##

@property
@require_init
def solutions(self):
Expand Down Expand Up @@ -374,11 +392,50 @@ def metadata(self):
return self._metadata_view.update(self._occupied_indices_cols,
self._state)

## Statistics attributes ##

@property
def dtype(self):
"""data-type: The dtype of the solutions, objective values, and behavior
values."""
return self._dtype
@require_init
def coverage(self):
""":attr:`dtype`: Proportion of bins in the archive that are currently
occupied.

This will be a value in the range :math:`[0,1]`
"""
return self._coverage

@property
@require_init
def qd_score(self):
""":attr:`dtype`: QD score, i.e. sum of objective values
of all elites in the archive.

Note that this score only makes sense if objective values are all
non-negative.
"""
return self._qd_score

@property
@require_init
def obj_max(self):
""":attr:`dtype`: Maximum objective value of the elites currently in the
archive.

This value is None if there are no elites in the archive.
"""
return self._obj_max

@property
@require_init
def obj_mean(self):
""":attr:`dtype`: mean objective value of the elites currently in the
archive.

This value is None if there are no elites in the archive.
"""
return self._obj_mean

## Methods ##

def __len__(self):
"""Number of elites in the archive."""
Expand All @@ -400,6 +457,21 @@ def __iter__(self):
require_init_inline(self)
return ArchiveIterator(self)

def _stats_reset(self):
"""Resets the archive stats."""
self._coverage = self.dtype(0.0)
self._qd_score = self.dtype(0.0)
self._obj_max = None
self._obj_mean = None

def _stats_update(self, old_obj, new_obj):
"""Updates the archive stats when old_obj is replaced by new_obj."""
self._coverage = self.dtype(len(self) / self.bins)
self._qd_score += new_obj - old_obj
self._obj_max = new_obj if self._obj_max is None else max(
self._obj_max, new_obj)
self._obj_mean = self._qd_score / self.dtype(len(self))

def initialize(self, solution_dim):
"""Initializes the archive by allocating storage space.

Expand Down Expand Up @@ -434,6 +506,8 @@ def initialize(self, solution_dim):
self._metadata_view = CachedView(self._metadata)
self._state = {"clear": 0, "add": 0}

self._stats_reset()

@require_init
def clear(self):
"""Removes all elites from the archive.
Expand All @@ -451,6 +525,8 @@ def clear(self):
self._state["clear"] += 1
self._state["add"] = 0

self._stats_reset()

@abstractmethod
def get_index(self, behavior_values):
"""Returns archive indices for the given behavior values.
Expand Down Expand Up @@ -549,6 +625,7 @@ def add(self, solution, objective_value, behavior_values, metadata=None):
self._state["add"] += 1
solution = np.asarray(solution)
behavior_values = np.asarray(behavior_values)
objective_value = self.dtype(objective_value)

index = self.get_index(behavior_values)
old_objective = self._objective_values[index]
Expand All @@ -563,13 +640,15 @@ def add(self, solution, objective_value, behavior_values, metadata=None):
self._add_occupied_index(index)
status = AddStatus.NEW
value = objective_value
self._stats_update(self.dtype(0.0), objective_value)
elif was_inserted and already_occupied:
status = AddStatus.IMPROVE_EXISTING
value = objective_value - old_objective
self._stats_update(old_objective, objective_value)
else:
status = AddStatus.NOT_ADDED
value = objective_value - old_objective
return status, self.dtype(value)
return status, value

@require_init
def elite_with_behavior(self, behavior_values):
Expand Down Expand Up @@ -689,22 +768,15 @@ def as_pandas(self, include_solutions=True, include_metadata=False):
data[f"index_{i}"] = np.asarray(self._occupied_indices_cols[i],
dtype=int)

behavior_values = self._behavior_values[self._occupied_indices_cols]
for i in range(self._behavior_dim):
data[f"behavior_{i}"] = np.asarray(behavior_values[:, i],
dtype=self.dtype)
data[f"behavior_{i}"] = self.behavior_values[:, i]

data["objective"] = np.asarray(
self._objective_values[self._occupied_indices_cols],
dtype=self.dtype)
data["objective"] = self.objective_values

if include_solutions:
solutions = self._solutions[self._occupied_indices_cols]
for i in range(self._solution_dim):
data[f"solution_{i}"] = np.asarray(solutions[:, i],
dtype=self.dtype)
data[f"solution_{i}"] = self.solutions[:, i]

if include_metadata:
metadata = self._metadata[self._occupied_indices_cols]
data["metadata"] = np.asarray(metadata, dtype=object)
data["metadata"] = self.metadata
return pd.DataFrame(data)
55 changes: 55 additions & 0 deletions tests/archives/archive_base_test.py
Expand Up @@ -146,6 +146,49 @@ def test_clear_and_add_during_iteration():
data.behavior_values)


#
# Statistics tests -- just GridArchive for simplicity.
#


@pytest.mark.parametrize("dtype", [np.float64, np.float32],
ids=["float64", "float32"])
def test_stats_dtype(dtype):
data = get_archive_data("GridArchive", dtype=dtype)
assert isinstance(data.archive_with_elite.coverage, dtype)
assert isinstance(data.archive_with_elite.qd_score, dtype)
assert isinstance(data.archive_with_elite.obj_max, dtype)
print(type(data.archive_with_elite.obj_mean))
assert isinstance(data.archive_with_elite.obj_mean, dtype)


def test_stats_multiple_add():
archive = GridArchive([10, 20], [(-1, 1), (-2, 2)])
archive.initialize(3)
archive.add([1, 2, 3], 1.0, [0, 0])
archive.add([1, 2, 3], 2.0, [0.25, 0.25])
archive.add([1, 2, 3], 3.0, [-0.25, -0.25])

assert np.isclose(archive.coverage, 3 / 200)
assert np.isclose(archive.qd_score, 6.0)
assert np.isclose(archive.obj_max, 3.0)
assert np.isclose(archive.obj_mean, 2.0)


def test_stats_add_and_overwrite():
archive = GridArchive([10, 20], [(-1, 1), (-2, 2)])
archive.initialize(3)
archive.add([1, 2, 3], 1.0, [0, 0])
archive.add([1, 2, 3], 2.0, [0.25, 0.25])
archive.add([1, 2, 3], 3.0, [-0.25, -0.25])
archive.add([1, 2, 3], 5.0, [0.25, 0.25]) # Overwrites the second add().

assert np.isclose(archive.coverage, 3 / 200)
assert np.isclose(archive.qd_score, 9.0)
assert np.isclose(archive.obj_max, 5.0)
assert np.isclose(archive.obj_mean, 3.0)


#
# General tests -- should work for all archive classes.
#
Expand Down Expand Up @@ -192,6 +235,18 @@ def test_solution_dim_correct(data):
assert data.archive.solution_dim == len(data.solution)


def test_basic_stats(data):
assert data.archive.coverage == 0.0
assert data.archive.qd_score == 0.0
assert data.archive.obj_max is None
assert data.archive.obj_mean is None

assert data.archive_with_elite.coverage == 1 / data.bins
assert data.archive_with_elite.qd_score == data.objective_value
assert data.archive_with_elite.obj_max == data.objective_value
assert data.archive_with_elite.obj_mean == data.objective_value


def test_elite_with_behavior_gets_correct_elite(data):
elite = data.archive_with_elite.elite_with_behavior(data.behavior_values)
assert np.all(elite.sol == data.solution)
Expand Down