ENH: detect micro moment collinearity

jeffgortmaker · Feb 3, 2022 · 77f99fa · 77f99fa
1 parent a9f011a
commit 77f99fa
Show file tree

Hide file tree

Showing 5 changed files with 91 additions and 36 deletions.
diff --git a/pyblp/economies/problem.py b/pyblp/economies/problem.py
@@ -17,14 +17,14 @@
 from ..configurations.iteration import Iteration
 from ..configurations.optimization import ObjectiveResults, Optimization
 from ..markets.problem_market import ProblemMarket
-from ..micro import MicroMoment, Moments
+from ..micro import MicroDataset, MicroMoment, Moments
 from ..parameters import Parameters
 from ..primitives import Agents, Products
 from ..results.problem_results import ProblemResults
-from ..utilities.algebra import precisely_invert
+from ..utilities.algebra import precisely_invert, precisely_identify_collinearity
 from ..utilities.basics import (
     Array, Bounds, Error, RecArray, SolverStats, format_number, format_seconds, format_table, generate_items, output,
-    update_matrices, compute_finite_differences
+    update_matrices, compute_finite_differences, warn
 )
 from ..utilities.statistics import IV, compute_gmm_moments_mean, compute_gmm_moments_jacobian_mean
 
@@ -493,6 +493,9 @@ def solve(
                 self._require_psd(micro_moment_covariances, "micro_moment_covariances")
                 self._detect_singularity(micro_moment_covariances, "micro_moment_covariances")
 
+        # determine whether to check micro moment collinearity
+        detect_micro_collinearity = moments.MM > 0 and (options.collinear_atol > 0 or options.collinear_rtol > 0)
+
         # choose whether to do an initial update
         if initial_update is None:
             initial_update = bool(moments.MM > 0 and W is None)
@@ -605,11 +608,11 @@ def solve(
             # define the objective function
             def wrapper(new_theta: Array, iterations: int, evaluations: int) -> ObjectiveResults:
                 """Compute and output progress associated with a single objective evaluation."""
-                nonlocal iteration_stats, smallest_objective, progress
+                nonlocal iteration_stats, smallest_objective, progress, detect_micro_collinearity
                 assert optimization is not None and shares_bounds is not None and costs_bounds is not None
                 progress = compute_step_progress(
                     new_theta, progress, optimization._compute_gradient, compute_hessian=False,
-                    compute_micro_covariances=False
+                    compute_micro_covariances=False, detect_micro_collinearity=detect_micro_collinearity
                 )
                 iteration_stats.append(progress.iteration_stats)
                 formatted_progress = progress.format(
@@ -618,6 +621,7 @@ def wrapper(new_theta: Array, iterations: int, evaluations: int) -> ObjectiveRes
                 if formatted_progress:
                     output(formatted_progress)
                 smallest_objective = min(smallest_objective, progress.objective)
+                detect_micro_collinearity = False
                 return progress.objective, progress.gradient if optimization._compute_gradient else None
 
             # optimize theta if there are parameters to optimize and this isn't the initial update step
@@ -654,10 +658,11 @@ def wrapper(new_theta: Array, iterations: int, evaluations: int) -> ObjectiveRes
             else:
                 output("Estimating standard errors ...")
             final_progress = compute_step_progress(
-                theta, progress, compute_gradient, compute_hessian, compute_micro_covariances
+                theta, progress, compute_gradient, compute_hessian, compute_micro_covariances, detect_micro_collinearity
             )
             iteration_stats.append(final_progress.iteration_stats)
             optimization_stats.evaluations += 1
+            detect_micro_collinearity = False
             results = ProblemResults(
                 final_progress, last_results, step, last_step, step_start_time, optimization_start_time,
                 optimization_end_time, optimization_stats, iteration_stats, scale_objective, shares_bounds,
@@ -690,7 +695,7 @@ def _compute_progress(
             error_behavior: str, error_punishment: float, delta_behavior: str, iteration: Iteration, fp_type: str,
             shares_bounds: Bounds, costs_bounds: Bounds, finite_differences: bool, theta: Array,
             progress: 'InitialProgress', compute_gradient: bool, compute_hessian: bool,
-            compute_micro_covariances: bool) -> 'Progress':
+            compute_micro_covariances: bool, detect_micro_collinearity: bool) -> 'Progress':
         """Compute demand- and supply-side contributions before recovering the linear parameters and structural error
         terms. Then, form the GMM objective value and its gradient. Finally, handle any errors that were encountered
         before structuring relevant progress information.
@@ -728,7 +733,9 @@ def _compute_progress(
         else:
             def market_factory(
                     s: Hashable) -> (
-                    Tuple[ProblemMarket, Array, Array, Array, Moments, Iteration, str, Bounds, Bounds, bool, bool]):
+                    Tuple[
+                        ProblemMarket, Array, Array, Array, Moments, Iteration, str, Bounds, Bounds, bool, bool, bool
+                    ]):
                 """Build a market along with arguments used to compute delta, micro moment contributions, transformed
                 marginal costs, and Jacobians.
                 """
@@ -738,22 +745,30 @@ def market_factory(
                 last_tilde_costs_s = progress.tilde_costs[self._product_market_indices[s]]
                 return (
                     market_s, delta_s, last_delta_s, last_tilde_costs_s, moments, iteration, fp_type, shares_bounds,
-                    costs_bounds, compute_jacobians, compute_micro_covariances
+                    costs_bounds, compute_jacobians, compute_micro_covariances, detect_micro_collinearity
                 )
 
+            # if necessary, identify micro datasets in which there could possibly be collinearity issues
+            micro_collinearity_candidates: Dict[MicroDataset, List[int]] = {}
+            if detect_micro_collinearity:
+                for m, moment in enumerate(moments.micro_moments):
+                    micro_collinearity_candidates.setdefault(moment.dataset, []).append(m)
+                micro_collinearity_candidates = {d: v for d, v in micro_collinearity_candidates.items() if len(v) > 1}
+
             # compute delta, contributions to micro moment values, transformed marginal costs, Jacobians, and
             #   covariances market-by-market
             micro_numerator_mapping: Dict[Hashable, Array] = {}
             micro_denominator_mapping: Dict[Hashable, Array] = {}
             micro_numerator_jacobian_mapping: Dict[Hashable, Array] = {}
             micro_denominator_jacobian_mapping: Dict[Hashable, Array] = {}
             micro_covariances_numerator_mapping: Dict[Hashable, Array] = {}
+            micro_collinearity_candidate_values: Dict[Hashable, Dict[MicroDataset, Array]] = {}
             generator = generate_items(self.unique_market_ids, market_factory, ProblemMarket.solve)
             for t, generated_t in generator:
                 (
                     delta_t, xi_jacobian_t, micro_numerator_t, micro_denominator_t, micro_numerator_jacobian_t,
-                    micro_denominator_jacobian_t, micro_covariances_numerator_t, clipped_shares_t, iteration_stats_t,
-                    tilde_costs_t, omega_jacobian_t, clipped_costs_t, errors_t
+                    micro_denominator_jacobian_t, micro_covariances_numerator_t, weights_mapping_t, values_mapping_t,
+                    clipped_shares_t, iteration_stats_t, tilde_costs_t, omega_jacobian_t, clipped_costs_t, errors_t
                 ) = generated_t
 
                 delta[self._product_market_indices[t]] = delta_t
@@ -767,6 +782,14 @@ def market_factory(
                     micro_denominator_jacobian_mapping[t] = scipy.sparse.csr_matrix(micro_denominator_jacobian_t)
                 if compute_micro_covariances:
                     micro_covariances_numerator_mapping[t] = scipy.sparse.csr_matrix(micro_covariances_numerator_t)
+                if detect_micro_collinearity:
+                    micro_collinearity_candidate_values[t] = {}
+                    for dataset, moment_indices in micro_collinearity_candidates.items():
+                        if dataset in weights_mapping_t:
+                            nonzero = np.nonzero(weights_mapping_t[dataset])
+                            micro_collinearity_candidate_values[t][dataset] = np.column_stack(
+                                [values_mapping_t[m][nonzero].flatten() for m in moment_indices]
+                            )
                 if self.K3 > 0:
                     tilde_costs[self._product_market_indices[t]] = tilde_costs_t
                     clipped_costs[self._product_market_indices[t]] = clipped_costs_t
@@ -822,6 +845,30 @@ def market_factory(
                         # fill the lower triangle
                         lower_indices = np.tril_indices(moments.MM, -1)
                         micro_covariances[lower_indices] = micro_covariances.T[lower_indices]
+                        self._detect_singularity(micro_covariances, "the estimated covariance matrix of micro moments")
+
+                    if detect_micro_collinearity:
+                        for dataset, moment_indices in micro_collinearity_candidates.items():
+                            market_ids = self.unique_market_ids if dataset.market_ids is None else dataset.market_ids
+                            values = np.row_stack([micro_collinearity_candidate_values[t][dataset] for t in market_ids])
+                            collinear, successful = precisely_identify_collinearity(values)
+                            common_message = (
+                                "To disable collinearity checks, set "
+                                "options.collinear_atol = options.collinear_rtol = 0."
+                            )
+                            if not successful:
+                                warn(
+                                    f"Failed to compute the QR decomposition for micro dataset '{dataset.name}' while "
+                                    f"checking for collinearity issues. {common_message}"
+                                )
+                            if collinear.any():
+                                labels = [moments.micro_moments[m].name for m in moment_indices]
+                                collinear_labels = ", ".join(f"'{l}'" for l, c in zip(labels, collinear) if c)
+                                warn(
+                                    f"Detected collinearity issues with the values of micro moments "
+                                    f"[{collinear_labels}] and at least one other micro moment in micro dataset "
+                                    f"'{dataset.name}'. {common_message}"
+                                )
 
         # replace invalid elements in delta, micro moments, and transformed marginal costs with their last values
         bad_delta_index = ~np.isfinite(delta)
@@ -863,7 +910,8 @@ def compute_perturbed_stack(perturbed_theta: Array) -> Array:
                 perturbed_progress = self._compute_progress(
                     parameters, moments, iv, W, scale_objective, error_behavior, error_punishment, delta_behavior,
                     iteration, fp_type, shares_bounds, costs_bounds, finite_differences=False, theta=perturbed_theta,
-                    progress=progress, compute_gradient=False, compute_hessian=False, compute_micro_covariances=False
+                    progress=progress, compute_gradient=False, compute_hessian=False, compute_micro_covariances=False,
+                    detect_micro_collinearity=False
                 )
                 perturbed_stack = perturbed_progress.iv_delta
                 if moments.MM > 0:
@@ -972,7 +1020,8 @@ def compute_perturbed_gradient(perturbed_theta: Array) -> Array:
                 perturbed_progress = self._compute_progress(
                     parameters, moments, iv, W, scale_objective, error_behavior, error_punishment, delta_behavior,
                     iteration, fp_type, shares_bounds, costs_bounds, finite_differences, perturbed_theta, progress,
-                    compute_gradient=True, compute_hessian=False, compute_micro_covariances=False
+                    compute_gradient=True, compute_hessian=False, compute_micro_covariances=False,
+                    detect_micro_collinearity=False
                 )
                 return perturbed_progress.gradient
 

diff --git a/pyblp/markets/market.py b/pyblp/markets/market.py
@@ -1225,7 +1225,8 @@ def compute_omega_by_theta_jacobian(
     def compute_micro_contributions(
             self, moments: Moments, delta: Optional[Array] = None, probabilities: Optional[Array] = None,
             probabilities_tangent_mapping: Optional[Dict[int, Array]] = None, compute_jacobians: bool = False,
-            compute_covariances: bool = False) -> Tuple[Array, Array, Array, Array, Array]:
+            compute_covariances: bool = False, keep_mappings: bool = False) -> (
+            Tuple[Array, Array, Array, Array, Array, Dict[MicroDataset, Array], Dict[int, Array]]):
         """Compute contributions to micro moment values, Jacobians, and covariances. By default, use the mean utilities
         with which this market was initialized and do not compute Jacobian and covariance contributions.
         """
@@ -1454,6 +1455,10 @@ def compute_micro_contributions(
                     f"dataset's compute_weights."
                 )
 
+            # cache values if necessary
+            if compute_covariances or keep_mappings:
+                values_mapping[m] = values
+
             # compute the contribution to the numerator and denominator
             weighted_values = weights * values
             micro_numerator[m] = weighted_values.sum()
@@ -1463,14 +1468,13 @@ def compute_micro_contributions(
                     micro_numerator_jacobian[m, p] = (weights_tangent_mapping[(dataset, p)] * values).sum()
                     micro_denominator_jacobian[m, p] = denominator_tangent_mapping[(dataset, p)]
 
-            # compute the contribution to the covariance numerator (cache values so they don't need to be re-computed)
+            # compute the contribution to the covariance numerator
             if compute_covariances:
-                values_mapping[m] = values
                 for m2, moment2 in enumerate(moments.micro_moments):
                     if m2 <= m and moment2.dataset == moment.dataset:
                         micro_covariances_numerator[m2, m] = (weighted_values * values_mapping[m2]).sum()
 
         return (
             micro_numerator, micro_denominator, micro_numerator_jacobian, micro_denominator_jacobian,
-            micro_covariances_numerator
+            micro_covariances_numerator, weights_mapping, values_mapping
         )
diff --git a/pyblp/markets/problem_market.py b/pyblp/markets/problem_market.py
@@ -7,7 +7,7 @@
 from .market import Market
 from .. import exceptions, options
 from ..configurations.iteration import Iteration
-from ..micro import Moments
+from ..micro import MicroDataset, Moments
 from ..utilities.basics import Array, Bounds, Error, SolverStats, NumericalErrorHandler
 
 
@@ -18,9 +18,10 @@ class ProblemMarket(Market):
     def solve(
             self, delta: Array, last_delta: Array, last_tilde_costs: Array, moments: Moments, iteration: Iteration,
             fp_type: str, shares_bounds: Bounds, costs_bounds: Bounds, compute_jacobians: bool,
-            compute_micro_covariances: bool) -> (
+            compute_micro_covariances: bool, keep_micro_mappings: bool) -> (
             Tuple[
-                Array, Array, Array, Array, Array, Array, Array, Array, SolverStats, Array, Array, Array, List[Error]
+                Array, Array, Array, Array, Array, Array, Array, Dict[MicroDataset, Array], Dict[int, Array], Array,
+                SolverStats, Array, Array, Array, List[Error]
             ]):
         """Compute the mean utility for this market that equates market shares to observed values by solving a fixed
         point problem. Then, if compute_jacobians is True, compute the Jacobian (holding beta fixed) of xi
@@ -81,15 +82,17 @@ def solve(
             micro_numerator_jacobian = np.full((moments.MM, self.parameters.P), np.nan, options.dtype)
             micro_denominator_jacobian = np.full((moments.MM, self.parameters.P), np.nan, options.dtype)
             micro_covariances_numerator = np.full((moments.MM, moments.MM), np.nan, options.dtype)
+            weights_mapping = {}
+            values_mapping = {}
         else:
             assert probabilities is not None
             (
                 micro_numerator, micro_denominator, micro_numerator_jacobian, micro_denominator_jacobian,
-                micro_covariances_numerator, micro_errors
+                micro_covariances_numerator, weights_mapping, values_mapping, micro_errors
             ) = (
                 self.safely_compute_micro_contributions(
                     moments, valid_delta, probabilities, probabilities_tangent_mapping, compute_jacobians,
-                    compute_micro_covariances
+                    compute_micro_covariances, keep_micro_mappings
                 )
             )
             errors.extend(micro_errors)
@@ -128,8 +131,8 @@ def solve(
 
         return (
             delta, xi_jacobian, micro_numerator, micro_denominator, micro_numerator_jacobian,
-            micro_denominator_jacobian, micro_covariances_numerator, clipped_shares, stats, tilde_costs, omega_jacobian,
-            clipped_costs, errors
+            micro_denominator_jacobian, micro_covariances_numerator, weights_mapping, values_mapping, clipped_shares,
+            stats, tilde_costs, omega_jacobian, clipped_costs, errors
         )
 
     @NumericalErrorHandler(exceptions.DeltaNumericalError)
@@ -156,21 +159,23 @@ def safely_compute_xi_by_theta_jacobian(
     @NumericalErrorHandler(exceptions.MicroMomentsNumericalError)
     def safely_compute_micro_contributions(
             self, moments: Moments, delta: Array, probabilities: Optional[Array],
-            probabilities_tangent_mapping: Dict[int, Array], compute_jacobians: bool, compute_covariances: bool) -> (
-            Tuple[Array, Array, Array, Array, Array, List[Error]]):
+            probabilities_tangent_mapping: Dict[int, Array], compute_jacobians: bool, compute_covariances: bool,
+            keep_mappings: bool) -> (
+            Tuple[Array, Array, Array, Array, Array, Dict[MicroDataset, Array], Dict[int, Array], List[Error]]):
         """Compute micro moment value contributions, handling any numerical errors."""
         errors: List[Error] = []
         (
             micro_numerator, micro_denominator, micro_numerator_jacobian, micro_denominator_jacobian,
-            micro_covariances_numerator
+            micro_covariances_numerator, weights_mapping, values_mapping
         ) = (
             self.compute_micro_contributions(
-                moments, delta, probabilities, probabilities_tangent_mapping, compute_jacobians, compute_covariances
+                moments, delta, probabilities, probabilities_tangent_mapping, compute_jacobians, compute_covariances,
+                keep_mappings
             )
         )
         return (
             micro_numerator, micro_denominator, micro_numerator_jacobian, micro_denominator_jacobian,
-            micro_covariances_numerator, errors
+            micro_covariances_numerator, weights_mapping, values_mapping, errors
         )
 
     @NumericalErrorHandler(exceptions.CostsNumericalError)

diff --git a/pyblp/markets/simulation_results_market.py b/pyblp/markets/simulation_results_market.py
@@ -12,8 +12,9 @@ class SimulationResultsMarket(Market):
     """A market in a solved simulation of synthetic BLP data."""
 
     @NumericalErrorHandler(exceptions.SyntheticMicroMomentsNumericalError)
-    def safely_compute_micro_contributions(self, moments: Moments) -> Tuple[Array, Array, List[Error]]:
+    def safely_compute_micro_contributions(
+            self, moments: Moments) -> Tuple[Array, Array, List[Error]]:
         """Compute micro moment value contributions, handling any numerical errors."""
         errors: List[Error] = []
-        micro_numerator, micro_denominator, _, _, _ = self.compute_micro_contributions(moments)
+        micro_numerator, micro_denominator, _, _, _, _, _ = self.compute_micro_contributions(moments)
         return micro_numerator, micro_denominator, errors