Skip to content

Commit

Permalink
ENH: simulate micro data
Browse files Browse the repository at this point in the history
- New method for simulating micro data underlying a dataset.
- Refactor micro contribution calculation, splitting up into multiple Market methods.
- New unit tests verifying that true micro values are well-approximated by observed means backed by many simulated observations.
  • Loading branch information
jeffgortmaker committed Mar 14, 2022
1 parent 0a39dc0 commit f30ea32
Show file tree
Hide file tree
Showing 8 changed files with 295 additions and 89 deletions.
4 changes: 3 additions & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -299,11 +299,12 @@ It can also be converted into a :class:`Problem` with the following method.

SimulationResults.to_problem

The following method replaces micro moment values with those that are consistent with the simulation.
The following methods can either simulate micro data or replace micro moment values with those that are consistent with the simulation.

.. autosummary::
:toctree: _api

SimulationResults.simulate_micro_data
SimulationResults.replace_micro_moment_values


Expand Down Expand Up @@ -371,6 +372,7 @@ When errors occur, they will either be displayed as warnings or raised as except
exceptions.SyntheticSharesNumericalError
exceptions.SyntheticDeltaNumericalError
exceptions.SyntheticCostsNumericalError
exceptions.SyntheticMicroDataNumericalError
exceptions.SyntheticMicroMomentsNumericalError
exceptions.EquilibriumRealizationNumericalError
exceptions.JacobianRealizationNumericalError
Expand Down
4 changes: 4 additions & 0 deletions pyblp/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ class SyntheticCostsNumericalError(NumericalError):
"""


class SyntheticMicroDataNumericalError(NumericalError):
"""Encountered a numerical error when computing synthetic micro data."""


class SyntheticMicroMomentsNumericalError(NumericalError):
"""Encountered a numerical error when computing synthetic micro moments."""

Expand Down
151 changes: 87 additions & 64 deletions pyblp/markets/market.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .. import exceptions, options
from ..configurations.iteration import ContractionResults, Iteration
from ..economies.economy import Economy
from ..micro import MicroDataset, Moments
from ..micro import MicroDataset, MicroMoment, Moments
from ..parameters import BetaParameter, GammaParameter, NonlinearCoefficient, Parameter, Parameters, RhoParameter
from ..primitives import Container
from ..utilities.algebra import approximately_invert, approximately_solve
Expand Down Expand Up @@ -1222,22 +1222,60 @@ def compute_omega_by_theta_jacobian(

return omega_jacobian

def compute_micro_contributions(
self, moments: Moments, delta: Optional[Array] = None, probabilities: Optional[Array] = None,
probabilities_tangent_mapping: Optional[Dict[int, Array]] = None, compute_jacobians: bool = False,
compute_covariances: bool = False, keep_mappings: bool = False) -> (
Tuple[Array, Array, Array, Array, Array, Dict[MicroDataset, Array], Dict[int, Array]]):
"""Compute contributions to micro moment values, Jacobians, and covariances. By default, use the mean utilities
with which this market was initialized and do not compute Jacobian and covariance contributions.
"""
def compute_micro_weights(self, dataset: MicroDataset) -> Array:
"""Compute and validate micro dataset weights."""
try:
weights = np.asarray(dataset.compute_weights(self.t, self.products, self.agents), options.dtype)
except Exception as exception:
message = f"Failed to compute weights for micro dataset '{dataset}' because of the above exception."
raise RuntimeError(message) from exception

shapes = [
(self.I, self.J), (self.I, 1 + self.J), (self.I, self.J, self.J), (self.I, 1 + self.J, self.J),
(self.I, self.J, 1 + self.J), (self.I, 1 + self.J, 1 + self.J)
]
if weights.shape not in shapes:
raise ValueError(
f"In market {self.t}, micro dataset '{dataset}' returned an array of shape {weights.shape}, which is "
f"not one of the acceptable shapes, {shapes}."
)

if np.any(weights < 0):
raise ValueError(
f"In market {self.t}, micro dataset '{dataset}' returned an array with at least one negative value."
)

return weights

def compute_micro_values(self, moment: MicroMoment, weights: Array) -> Array:
"""Compute and validate micro moment values."""
try:
values = np.asarray(moment.compute_values(self.t, self.products, self.agents), options.dtype)
except Exception as exception:
message = f"Failed to compute values for micro moment '{moment}' because of the above exception."
raise RuntimeError(message) from exception

if values.shape != weights.shape:
raise ValueError(
f"In market {self.t}, micro moment '{moment}' returned an array of shape {values.shape} from "
f"compute_values, which is not {weights.shape}, the shape of the array returned by its "
f"dataset's compute_weights."
)

return values

def compute_micro_dataset_contributions(
self, datasets: List[MicroDataset], delta: Optional[Array] = None, probabilities: Optional[Array] = None,
probabilities_tangent_mapping: Optional[Dict[int, Array]] = None, compute_jacobians: bool = False) -> (
Tuple[
Dict[MicroDataset, Array], Dict[MicroDataset, Array], Dict[Tuple[MicroDataset, int], Array],
Dict[Tuple[MicroDataset, int], Array]
]):
"""Compute contributions of micro datasets to micro moments."""
if delta is None:
assert self.delta is not None
delta = self.delta

# pre-compute probabilities if not already computed
if probabilities is None:
probabilities, _ = self.compute_probabilities(delta)

# pre-compute and validate micro dataset weights, multiplying these with probabilities and using these to
# compute micro value denominators
weights_mapping: Dict[MicroDataset, Array] = {}
Expand All @@ -1250,46 +1288,16 @@ def compute_micro_contributions(
eliminated_probabilities_tangent_mapping: Dict[int, Array] = {}
outside_eliminated_probabilities_tangent_mapping: Dict[int, Array] = {}
eliminated_outside_probabilities_tangent_mapping: Dict[int, Array] = {}
for moment in moments.micro_moments:
dataset = moment.dataset
for dataset in datasets:
if dataset in weights_mapping or (dataset.market_ids is not None and self.t not in dataset.market_ids):
continue

# compute and validate weights
try:
weights = np.asarray(dataset.compute_weights(self.t, self.products, self.agents), options.dtype)
except Exception as exception:
message = f"Failed to compute weights for micro dataset '{dataset}' because of the above exception."
raise RuntimeError(message) from exception
shapes = [
(self.I, self.J), (self.I, 1 + self.J), (self.I, self.J, self.J), (self.I, 1 + self.J, self.J),
(self.I, self.J, 1 + self.J), (self.I, 1 + self.J, 1 + self.J)
]
if weights.shape not in shapes:
raise ValueError(
f"In market {self.t}, micro dataset '{moment.dataset}' returned an array of shape "
f"{weights.shape}, which is not one of the acceptable shapes, {shapes}."
)
weights = self.compute_micro_weights(dataset)

# check whether the weights admit a denominator that does not depend on theta
constant_denominator = (weights == weights[0]).all()
if len(weights.shape) == 3 and weights.shape[2] == 1 + self.J:
constant_denominator &= (weights == weights[..., [0]]).all()

# if so, we can already compute the contribution to the denominator for micro values based on this dataset
if constant_denominator:
shares = self.products.shares.flatten()
if weights.shape[1] == 1 + self.J:
shares = np.r_[1 - shares.sum(), shares]
if len(weights.shape) == 2:
denominator_mapping[dataset] = shares @ weights[0]
else:
assert len(weights.shape) == 3
denominator_mapping[dataset] = shares @ weights[0, :, 0]

if compute_jacobians:
for p in range(self.parameters.P):
denominator_tangent_mapping[(dataset, p)] = 0
# pre-compute probabilities
if probabilities is None:
probabilities, _ = self.compute_probabilities(delta)

# pre-compute outside probabilities
if outside_probabilities is None and weights.shape[1] == 1 + self.J:
Expand Down Expand Up @@ -1415,14 +1423,39 @@ def compute_micro_contributions(

weights_tangent_mapping[(dataset, p)] = weights_tangent

# compute the contribution to the denominator for micro values based on this dataset if not already computed
if not constant_denominator:
denominator_mapping[dataset] = dataset_weights.sum()
# compute the contribution to the denominator for micro values based on this dataset
denominator_mapping[dataset] = dataset_weights.sum()

if compute_jacobians:
for p in range(self.parameters.P):
if compute_jacobians:
constant_denominator = (weights == weights[0]).all()
if len(weights.shape) == 3:
constant_denominator &= (weights.shape[2] == 1 + self.J) & (weights == weights[..., [0]]).all()

for p in range(self.parameters.P):
if constant_denominator:
denominator_tangent_mapping[(dataset, p)] = 0
else:
denominator_tangent_mapping[(dataset, p)] = weights_tangent_mapping[(dataset, p)].sum()

return weights_mapping, denominator_mapping, weights_tangent_mapping, denominator_tangent_mapping

def compute_micro_contributions(
self, moments: Moments, delta: Optional[Array] = None, probabilities: Optional[Array] = None,
probabilities_tangent_mapping: Optional[Dict[int, Array]] = None, compute_jacobians: bool = False,
compute_covariances: bool = False, keep_mappings: bool = False) -> (
Tuple[Array, Array, Array, Array, Array, Dict[MicroDataset, Array], Dict[int, Array]]):
"""Compute contributions to micro moment values, Jacobians, and covariances. By default, use the mean utilities
with which this market was initialized and do not compute Jacobian and covariance contributions.
"""

# compute dataset contributions
datasets = [m.dataset for m in moments.micro_moments]
weights_mapping, denominator_mapping, weights_tangent_mapping, denominator_tangent_mapping = (
self.compute_micro_dataset_contributions(
datasets, delta, probabilities, probabilities_tangent_mapping, compute_jacobians
)
)

# compute this market's contribution to micro moment values' and covariances' numerators and denominators
values_mapping: Dict[int, Array] = {}
micro_numerator = np.zeros((moments.MM, 1), options.dtype)
Expand All @@ -1443,17 +1476,7 @@ def compute_micro_contributions(

# compute and validate moment values
weights = weights_mapping[dataset]
try:
values = np.asarray(moment.compute_values(self.t, self.products, self.agents), options.dtype)
except Exception as exception:
message = f"Failed to compute values for micro moment '{moment}' because of the above exception."
raise RuntimeError(message) from exception
if values.shape != weights.shape:
raise ValueError(
f"In market {self.t}, micro moment '{moment}' returned an array of shape {values.shape} from "
f"compute_values, which is not {weights.shape}, the shape of the array returned by its "
f"dataset's compute_weights."
)
values = self.compute_micro_values(moment, weights)

# cache values if necessary
if compute_covariances or keep_mappings:
Expand Down
12 changes: 9 additions & 3 deletions pyblp/markets/simulation_results_market.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,22 @@

from .market import Market
from .. import exceptions
from ..micro import Moments
from ..micro import MicroDataset, Moments
from ..utilities.basics import Array, Error, NumericalErrorHandler


class SimulationResultsMarket(Market):
"""A market in a solved simulation of synthetic BLP data."""

@NumericalErrorHandler(exceptions.SyntheticMicroDataNumericalError)
def safely_compute_micro_weights(self, dataset: MicroDataset) -> Tuple[Array, List[Error]]:
"""Compute probabilities needed for simulating micro data, handling any numerical errors."""
errors: List[Error] = []
weights_mapping, _, _, _ = self.compute_micro_dataset_contributions([dataset])
return weights_mapping[dataset], errors

@NumericalErrorHandler(exceptions.SyntheticMicroMomentsNumericalError)
def safely_compute_micro_contributions(
self, moments: Moments) -> Tuple[Array, Array, List[Error]]:
def safely_compute_micro_contributions(self, moments: Moments) -> Tuple[Array, Array, List[Error]]:
"""Compute micro moment value contributions, handling any numerical errors."""
errors: List[Error] = []
micro_numerator, micro_denominator, _, _, _, _, _ = self.compute_micro_contributions(moments)
Expand Down
4 changes: 3 additions & 1 deletion pyblp/micro.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def __init__(
if market_ids is None:
self.market_ids = None
else:
if isinstance(market_ids, set):
market_ids = list(market_ids)
market_ids = np.asarray(market_ids, np.object_)
unique, counts = np.unique(market_ids, return_counts=True)
duplicates = unique[counts > 1]
Expand Down Expand Up @@ -148,7 +150,7 @@ class MicroMoment(StringRepresentation):
The observed value :math:`\bar{v}_m` in :eq:`observed_micro_value`.
compute_values : `callable`
Function for computing micro values :math:`v_{mijt}` (or :math:`v_{mijkt}` if the dataset :math:`d_m` contains
micro data) in a market of the following form::
second choice data) in a market of the following form::
compute_values(t, products, agents) --> values
Expand Down

0 comments on commit f30ea32

Please sign in to comment.