ENH: add micro score computation

- Add version of micro data construction for true agents. - Add score computation, both for standard micro data and for true agents. - Add a unit test for checking that scores look reasonable.
jeffgortmaker · Jun 16, 2022 · 6a7a2ea · 6a7a2ea
1 parent 1094b81
commit 6a7a2ea
Show file tree

Hide file tree

Showing 11 changed files with 606 additions and 159 deletions.
diff --git a/docs/api.rst b/docs/api.rst
@@ -305,7 +305,14 @@ The following methods can either replace micro moment values or simulate micro d
    :toctree: _api
 
    SimulationResults.replace_micro_moment_values
-   SimulationResults.simulate_micro_data
+   SimulationResults.build_micro_data
+
+Finally, the following method can compute scores for micro data.
+
+.. autosummary::
+   :toctree: _api
+
+   SimulationResults.compute_micro_scores
 
 
 Structured Data Classes
@@ -374,6 +381,7 @@ When errors occur, they will either be displayed as warnings or raised as except
    exceptions.SyntheticCostsNumericalError
    exceptions.SyntheticMicroDataNumericalError
    exceptions.SyntheticMicroMomentsNumericalError
+   exceptions.MicroScoresNumericalError
    exceptions.EquilibriumRealizationNumericalError
    exceptions.JacobianRealizationNumericalError
    exceptions.PostEstimationNumericalError

diff --git a/pyblp/economies/problem.py b/pyblp/economies/problem.py
@@ -503,7 +503,7 @@ def solve(
         costs_bounds = self._coerce_optional_bounds(costs_bounds, 'costs_bounds')
 
         # validate and structure micro moments before outputting related information
-        moments = Moments(self, micro_moments)
+        moments = Moments(micro_moments, self)
         micro_moment_covariances = None
         if moments.MM > 0:
             output("")

diff --git a/pyblp/exceptions.py b/pyblp/exceptions.py
@@ -165,6 +165,10 @@ class SyntheticMicroMomentsNumericalError(NumericalError):
     """Encountered a numerical error when computing synthetic micro moments."""
 
 
+class MicroScoresNumericalError(NumericalError):
+    """Encountered a numerical error when computing micro scores."""
+
+
 class EquilibriumRealizationNumericalError(NumericalError):
     """Encountered a numerical error when solving for a realization of equilibrium prices and shares."""
 

diff --git a/pyblp/markets/market.py b/pyblp/markets/market.py
@@ -48,14 +48,15 @@ class Market(Container):
     def __init__(
             self, economy: Economy, t: Any, parameters: Parameters, sigma: Array, pi: Array, rho: Array,
             beta: Optional[Array] = None, gamma: Optional[Array] = None, delta: Optional[Array] = None,
-            data_override: Optional[Dict[str, Array]] = None, agents_override: Optional[RecArray] = None) -> None:
+            data_override: Optional[Dict[str, Array]] = None, products_override: Optional[RecArray] = None,
+            agents_override: Optional[RecArray] = None) -> None:
         """Store or compute information about formulations, data, parameters, and utility."""
 
         # structure relevant data
         self.t = t
         super().__init__(
-            economy.products[economy._product_market_indices[t]],
-            economy.agents[economy._agent_market_indices[t]] if agents_override is None else agents_override
+            economy.products[economy._product_market_indices[t]] if products_override is None else products_override,
+            economy.agents[economy._agent_market_indices[t]] if agents_override is None else agents_override,
         )
 
         # membership matrices are computed on-demand

diff --git a/pyblp/markets/simulation_results_market.py b/pyblp/markets/simulation_results_market.py
@@ -1,9 +1,11 @@
 """Market level structuring of simulated synthetic BLP data."""
 
-from typing import List, Tuple
+from typing import Any, List, Optional, Tuple
+
+import numpy as np
 
 from .market import Market
-from .. import exceptions
+from .. import exceptions, options
 from ..micro import MicroDataset, Moments
 from ..utilities.basics import Array, Error, NumericalErrorHandler
 
@@ -24,3 +26,84 @@ def safely_compute_micro_contributions(self, moments: Moments) -> Tuple[Array, A
         errors: List[Error] = []
         micro_numerator, micro_denominator, _, _, _, _, _ = self.compute_micro_contributions(moments)
         return micro_numerator, micro_denominator, errors
+
+    @NumericalErrorHandler(exceptions.MicroScoresNumericalError)
+    def safely_compute_score_denominator_contributions(
+            self, dataset: MicroDataset) -> Tuple[Array, Array, Array, List[Error]]:
+        """Compute denominator contributions to micro scores, handling any numerical errors."""
+
+        # compute probabilities and their derivatives
+        probabilities, conditionals = self.compute_probabilities()
+        probabilities_tangent_mapping, conditionals_tangent_mapping = (
+            self.compute_probabilities_by_parameter_tangent_mapping(probabilities, conditionals)
+        )
+        xi_jacobian, errors = self.compute_xi_by_theta_jacobian(
+            probabilities, conditionals, probabilities_tangent_mapping
+        )
+        self.update_probabilities_by_parameter_tangent_mapping(
+            probabilities_tangent_mapping, conditionals_tangent_mapping, probabilities, conditionals, xi_jacobian
+        )
+
+        # compute contributions
+        _, denominator_mapping, _, tangent_mapping = self.compute_micro_dataset_contributions(
+            [dataset], self.delta, probabilities, probabilities_tangent_mapping, compute_jacobians=True
+        )
+        if dataset in denominator_mapping:
+            denominator = denominator_mapping[dataset]
+            jacobian = np.array([tangent_mapping[(dataset, p)] for p in range(self.parameters.P)])
+        else:
+            denominator = 0
+            jacobian = np.zeros(self.parameters.P, options.dtype)
+
+        return xi_jacobian, denominator, jacobian, errors
+
+    @NumericalErrorHandler(exceptions.MicroScoresNumericalError)
+    def safely_compute_score_numerator_contributions(
+            self, dataset: MicroDataset, j: Optional[Any], k: Optional[Any], xi_jacobian: Array) -> (
+            Tuple[Array, Array, List[Error]]):
+        """Compute numerator contributions to micro scores, handling any numerical errors."""
+        errors: List[Error] = []
+
+        # compute probabilities and their derivatives
+        probabilities, conditionals = self.compute_probabilities()
+        probabilities_tangent_mapping, conditionals_tangent_mapping = (
+            self.compute_probabilities_by_parameter_tangent_mapping(probabilities, conditionals)
+        )
+        self.update_probabilities_by_parameter_tangent_mapping(
+            probabilities_tangent_mapping, conditionals_tangent_mapping, probabilities, conditionals, xi_jacobian
+        )
+
+        # obtain weights and their derivatives
+        weights_mapping, _, tangent_mapping, _ = self.compute_micro_dataset_contributions(
+            [dataset], self.delta, probabilities, probabilities_tangent_mapping, compute_jacobians=True
+        )
+        if dataset in weights_mapping:
+            weights = weights_mapping[dataset]
+            tangent = np.stack([tangent_mapping[(dataset, p)] for p in range(self.parameters.P)], axis=-1)
+        else:
+            weights = np.zeros_like(self.compute_micro_weights(dataset))
+            tangent = np.zeros(list(weights.shape) + [self.parameters.P], options.dtype)
+
+        # validate choices and select corresponding weights if specified
+        if j is not None:
+            try:
+                weights = weights[:, j]
+                tangent = tangent[:, j]
+            except IndexError as exception:
+                message = f"In market '{self.t}', choice index '{j}' is not between 0 and {weights.shape[1] - 1}."
+                raise ValueError(message) from exception
+
+        # validate second choices and select corresponding weights if specified and there are second choices
+        if k is not None and len(weights.shape) == 1 + int(j is None) + 1:
+            try:
+                weights = weights[:, k] if j is not None else weights[:, :, k]
+                tangent = tangent[:, k] if j is not None else tangent[:, :, k]
+            except IndexError as exception:
+                message = f"In market '{self.t}', choice index '{k}' is not between 0 and {weights.shape[-1] - 1}."
+                raise ValueError(message) from exception
+
+        # integrate over agents to get the numerator contributions
+        numerator = weights.sum(axis=0)
+        jacobian = tangent.sum(axis=0)
+
+        return numerator, jacobian, errors
diff --git a/pyblp/micro.py b/pyblp/micro.py
@@ -207,28 +207,29 @@ class Moments(object):
     values: Array
     MM: int
 
-    def __init__(self, economy: 'Economy', micro_moments: Sequence[MicroMoment]) -> None:
+    def __init__(self, micro_moments: Sequence[MicroMoment], economy: Optional['Economy'] = None) -> None:
         """Validate and store information about a sequence of micro moment instances."""
-        if not isinstance(micro_moments, collections.abc.Sequence):
-            raise TypeError("micro_moments must be a sequence of micro moment instances.")
-        for m, moment in enumerate(micro_moments):
-            if not isinstance(moment, MicroMoment):
-                raise TypeError("micro_moments must consist only of micro moment instances.")
-            try:
-                moment.dataset._validate(economy)
-            except Exception as exception:
-                message = f"The micro dataset '{moment.dataset}' is invalid because of the above exception."
-                raise ValueError(message) from exception
-            for moment2 in micro_moments[:m]:
-                if moment == moment2:
-                    raise ValueError(f"There is more than one of the micro moment '{moment}'.")
-                if moment.name == moment2.name:
-                    raise ValueError(f"Micro moment '{moment}' has the same name as '{moment2}'.")
-                if moment.dataset != moment2.dataset and moment.dataset.name == moment2.dataset.name:
-                    raise ValueError(
-                        f"The dataset of '{moment}' is not the same instance as that of '{moment2}', but the two "
-                        f"datasets have the same name."
-                    )
+        if economy is not None:
+            if not isinstance(micro_moments, collections.abc.Sequence):
+                raise TypeError("micro_moments must be a sequence of micro moment instances.")
+            for m, moment in enumerate(micro_moments):
+                if not isinstance(moment, MicroMoment):
+                    raise TypeError("micro_moments must consist only of micro moment instances.")
+                try:
+                    moment.dataset._validate(economy)
+                except Exception as exception:
+                    message = f"The micro dataset '{moment.dataset}' is invalid because of the above exception."
+                    raise ValueError(message) from exception
+                for moment2 in micro_moments[:m]:
+                    if moment == moment2:
+                        raise ValueError(f"There is more than one of the micro moment '{moment}'.")
+                    if moment.name == moment2.name:
+                        raise ValueError(f"Micro moment '{moment}' has the same name as '{moment2}'.")
+                    if moment.dataset != moment2.dataset and moment.dataset.name == moment2.dataset.name:
+                        raise ValueError(
+                            f"The dataset of '{moment}' is not the same instance as that of '{moment2}', but the two "
+                            f"datasets have the same name."
+                        )
 
         self.micro_moments = micro_moments
         self.values = np.c_[[m.value for m in micro_moments]]

diff --git a/pyblp/parameters.py b/pyblp/parameters.py
@@ -174,11 +174,12 @@ def __init__(
             beta: Optional[Any] = None, gamma: Optional[Any] = None, sigma_bounds: Optional[Tuple[Any, Any]] = None,
             pi_bounds: Optional[Tuple[Any, Any]] = None, rho_bounds: Optional[Tuple[Any, Any]] = None,
             beta_bounds: Optional[Tuple[Any, Any]] = None, gamma_bounds: Optional[Tuple[Any, Any]] = None,
-            bounded: bool = False, allow_linear_nans: bool = False) -> None:
+            bounded: bool = False, allow_linear_nans: bool = False, check_alpha: bool = True) -> None:
         """Coerce parameters into usable formats before storing information about fixed (equal bounds) and unfixed
         (unequal bounds) elements of sigma, pi, rho, beta, and gamma. Also store information about eliminated
         (concentrated out) parameters in beta and gamma. If allow_linear_nans is True, allow null linear parameters in
-        order to denote those parameters that will be concentrated out.
+        order to denote those parameters that will be concentrated out. If check_alpha is True, check that alpha isn't
+        concentrated out when a supply side is included.
         """
 
         # store labels
@@ -236,7 +237,7 @@ def __init__(
             )
 
         # alpha cannot be concentrated out if there's a supply side
-        if economy.K3 > 0:
+        if check_alpha and economy.K3 > 0:
             for formulation, eliminated in zip(economy._X1_formulations, self.eliminated_beta_index.flatten()):
                 if 'prices' in formulation.names and eliminated:
                     raise ValueError(

diff --git a/pyblp/results/bootstrapped_results.py b/pyblp/results/bootstrapped_results.py
@@ -178,7 +178,7 @@ def market_factory(pair: Tuple[int, Hashable]) -> tuple:
                         args_cs.append(market_arg[c])
                     else:
                         args_cs.append(market_arg[c, self._economy._product_market_indices[s]])
-            return market_cs, *fixed_args, *args_cs
+            return (market_cs, *fixed_args, *args_cs)
 
         # construct a mapping from draws and market IDs to market-specific arrays and compute the full matrix size
         array_mapping: Dict[Tuple[int, Hashable], Array] = {}

diff --git a/pyblp/results/problem_results.py b/pyblp/results/problem_results.py
@@ -549,7 +549,7 @@ def market_factory(s: Hashable) -> tuple:
                 args_s = market_args
             else:
                 args_s = [None if a is None else a[indices_s] for a in market_args]
-            return market_s, *fixed_args, *args_s
+            return (market_s, *fixed_args, *args_s)
 
         # construct a mapping from market IDs to market-specific arrays
         array_mapping: Dict[Hashable, Array] = {}