Skip to content

Commit

Permalink
ENH: detect micro moment collinearity
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffgortmaker committed Feb 3, 2022
1 parent a9f011a commit 77f99fa
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 36 deletions.
75 changes: 62 additions & 13 deletions pyblp/economies/problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
from ..configurations.iteration import Iteration
from ..configurations.optimization import ObjectiveResults, Optimization
from ..markets.problem_market import ProblemMarket
from ..micro import MicroMoment, Moments
from ..micro import MicroDataset, MicroMoment, Moments
from ..parameters import Parameters
from ..primitives import Agents, Products
from ..results.problem_results import ProblemResults
from ..utilities.algebra import precisely_invert
from ..utilities.algebra import precisely_invert, precisely_identify_collinearity
from ..utilities.basics import (
Array, Bounds, Error, RecArray, SolverStats, format_number, format_seconds, format_table, generate_items, output,
update_matrices, compute_finite_differences
update_matrices, compute_finite_differences, warn
)
from ..utilities.statistics import IV, compute_gmm_moments_mean, compute_gmm_moments_jacobian_mean

Expand Down Expand Up @@ -493,6 +493,9 @@ def solve(
self._require_psd(micro_moment_covariances, "micro_moment_covariances")
self._detect_singularity(micro_moment_covariances, "micro_moment_covariances")

# determine whether to check micro moment collinearity
detect_micro_collinearity = moments.MM > 0 and (options.collinear_atol > 0 or options.collinear_rtol > 0)

# choose whether to do an initial update
if initial_update is None:
initial_update = bool(moments.MM > 0 and W is None)
Expand Down Expand Up @@ -605,11 +608,11 @@ def solve(
# define the objective function
def wrapper(new_theta: Array, iterations: int, evaluations: int) -> ObjectiveResults:
"""Compute and output progress associated with a single objective evaluation."""
nonlocal iteration_stats, smallest_objective, progress
nonlocal iteration_stats, smallest_objective, progress, detect_micro_collinearity
assert optimization is not None and shares_bounds is not None and costs_bounds is not None
progress = compute_step_progress(
new_theta, progress, optimization._compute_gradient, compute_hessian=False,
compute_micro_covariances=False
compute_micro_covariances=False, detect_micro_collinearity=detect_micro_collinearity
)
iteration_stats.append(progress.iteration_stats)
formatted_progress = progress.format(
Expand All @@ -618,6 +621,7 @@ def wrapper(new_theta: Array, iterations: int, evaluations: int) -> ObjectiveRes
if formatted_progress:
output(formatted_progress)
smallest_objective = min(smallest_objective, progress.objective)
detect_micro_collinearity = False
return progress.objective, progress.gradient if optimization._compute_gradient else None

# optimize theta if there are parameters to optimize and this isn't the initial update step
Expand Down Expand Up @@ -654,10 +658,11 @@ def wrapper(new_theta: Array, iterations: int, evaluations: int) -> ObjectiveRes
else:
output("Estimating standard errors ...")
final_progress = compute_step_progress(
theta, progress, compute_gradient, compute_hessian, compute_micro_covariances
theta, progress, compute_gradient, compute_hessian, compute_micro_covariances, detect_micro_collinearity
)
iteration_stats.append(final_progress.iteration_stats)
optimization_stats.evaluations += 1
detect_micro_collinearity = False
results = ProblemResults(
final_progress, last_results, step, last_step, step_start_time, optimization_start_time,
optimization_end_time, optimization_stats, iteration_stats, scale_objective, shares_bounds,
Expand Down Expand Up @@ -690,7 +695,7 @@ def _compute_progress(
error_behavior: str, error_punishment: float, delta_behavior: str, iteration: Iteration, fp_type: str,
shares_bounds: Bounds, costs_bounds: Bounds, finite_differences: bool, theta: Array,
progress: 'InitialProgress', compute_gradient: bool, compute_hessian: bool,
compute_micro_covariances: bool) -> 'Progress':
compute_micro_covariances: bool, detect_micro_collinearity: bool) -> 'Progress':
"""Compute demand- and supply-side contributions before recovering the linear parameters and structural error
terms. Then, form the GMM objective value and its gradient. Finally, handle any errors that were encountered
before structuring relevant progress information.
Expand Down Expand Up @@ -728,7 +733,9 @@ def _compute_progress(
else:
def market_factory(
s: Hashable) -> (
Tuple[ProblemMarket, Array, Array, Array, Moments, Iteration, str, Bounds, Bounds, bool, bool]):
Tuple[
ProblemMarket, Array, Array, Array, Moments, Iteration, str, Bounds, Bounds, bool, bool, bool
]):
"""Build a market along with arguments used to compute delta, micro moment contributions, transformed
marginal costs, and Jacobians.
"""
Expand All @@ -738,22 +745,30 @@ def market_factory(
last_tilde_costs_s = progress.tilde_costs[self._product_market_indices[s]]
return (
market_s, delta_s, last_delta_s, last_tilde_costs_s, moments, iteration, fp_type, shares_bounds,
costs_bounds, compute_jacobians, compute_micro_covariances
costs_bounds, compute_jacobians, compute_micro_covariances, detect_micro_collinearity
)

# if necessary, identify micro datasets in which there could possibly be collinearity issues
micro_collinearity_candidates: Dict[MicroDataset, List[int]] = {}
if detect_micro_collinearity:
for m, moment in enumerate(moments.micro_moments):
micro_collinearity_candidates.setdefault(moment.dataset, []).append(m)
micro_collinearity_candidates = {d: v for d, v in micro_collinearity_candidates.items() if len(v) > 1}

# compute delta, contributions to micro moment values, transformed marginal costs, Jacobians, and
# covariances market-by-market
micro_numerator_mapping: Dict[Hashable, Array] = {}
micro_denominator_mapping: Dict[Hashable, Array] = {}
micro_numerator_jacobian_mapping: Dict[Hashable, Array] = {}
micro_denominator_jacobian_mapping: Dict[Hashable, Array] = {}
micro_covariances_numerator_mapping: Dict[Hashable, Array] = {}
micro_collinearity_candidate_values: Dict[Hashable, Dict[MicroDataset, Array]] = {}
generator = generate_items(self.unique_market_ids, market_factory, ProblemMarket.solve)
for t, generated_t in generator:
(
delta_t, xi_jacobian_t, micro_numerator_t, micro_denominator_t, micro_numerator_jacobian_t,
micro_denominator_jacobian_t, micro_covariances_numerator_t, clipped_shares_t, iteration_stats_t,
tilde_costs_t, omega_jacobian_t, clipped_costs_t, errors_t
micro_denominator_jacobian_t, micro_covariances_numerator_t, weights_mapping_t, values_mapping_t,
clipped_shares_t, iteration_stats_t, tilde_costs_t, omega_jacobian_t, clipped_costs_t, errors_t
) = generated_t

delta[self._product_market_indices[t]] = delta_t
Expand All @@ -767,6 +782,14 @@ def market_factory(
micro_denominator_jacobian_mapping[t] = scipy.sparse.csr_matrix(micro_denominator_jacobian_t)
if compute_micro_covariances:
micro_covariances_numerator_mapping[t] = scipy.sparse.csr_matrix(micro_covariances_numerator_t)
if detect_micro_collinearity:
micro_collinearity_candidate_values[t] = {}
for dataset, moment_indices in micro_collinearity_candidates.items():
if dataset in weights_mapping_t:
nonzero = np.nonzero(weights_mapping_t[dataset])
micro_collinearity_candidate_values[t][dataset] = np.column_stack(
[values_mapping_t[m][nonzero].flatten() for m in moment_indices]
)
if self.K3 > 0:
tilde_costs[self._product_market_indices[t]] = tilde_costs_t
clipped_costs[self._product_market_indices[t]] = clipped_costs_t
Expand Down Expand Up @@ -822,6 +845,30 @@ def market_factory(
# fill the lower triangle
lower_indices = np.tril_indices(moments.MM, -1)
micro_covariances[lower_indices] = micro_covariances.T[lower_indices]
self._detect_singularity(micro_covariances, "the estimated covariance matrix of micro moments")

if detect_micro_collinearity:
for dataset, moment_indices in micro_collinearity_candidates.items():
market_ids = self.unique_market_ids if dataset.market_ids is None else dataset.market_ids
values = np.row_stack([micro_collinearity_candidate_values[t][dataset] for t in market_ids])
collinear, successful = precisely_identify_collinearity(values)
common_message = (
"To disable collinearity checks, set "
"options.collinear_atol = options.collinear_rtol = 0."
)
if not successful:
warn(
f"Failed to compute the QR decomposition for micro dataset '{dataset.name}' while "
f"checking for collinearity issues. {common_message}"
)
if collinear.any():
labels = [moments.micro_moments[m].name for m in moment_indices]
collinear_labels = ", ".join(f"'{l}'" for l, c in zip(labels, collinear) if c)
warn(
f"Detected collinearity issues with the values of micro moments "
f"[{collinear_labels}] and at least one other micro moment in micro dataset "
f"'{dataset.name}'. {common_message}"
)

# replace invalid elements in delta, micro moments, and transformed marginal costs with their last values
bad_delta_index = ~np.isfinite(delta)
Expand Down Expand Up @@ -863,7 +910,8 @@ def compute_perturbed_stack(perturbed_theta: Array) -> Array:
perturbed_progress = self._compute_progress(
parameters, moments, iv, W, scale_objective, error_behavior, error_punishment, delta_behavior,
iteration, fp_type, shares_bounds, costs_bounds, finite_differences=False, theta=perturbed_theta,
progress=progress, compute_gradient=False, compute_hessian=False, compute_micro_covariances=False
progress=progress, compute_gradient=False, compute_hessian=False, compute_micro_covariances=False,
detect_micro_collinearity=False
)
perturbed_stack = perturbed_progress.iv_delta
if moments.MM > 0:
Expand Down Expand Up @@ -972,7 +1020,8 @@ def compute_perturbed_gradient(perturbed_theta: Array) -> Array:
perturbed_progress = self._compute_progress(
parameters, moments, iv, W, scale_objective, error_behavior, error_punishment, delta_behavior,
iteration, fp_type, shares_bounds, costs_bounds, finite_differences, perturbed_theta, progress,
compute_gradient=True, compute_hessian=False, compute_micro_covariances=False
compute_gradient=True, compute_hessian=False, compute_micro_covariances=False,
detect_micro_collinearity=False
)
return perturbed_progress.gradient

Expand Down
12 changes: 8 additions & 4 deletions pyblp/markets/market.py
Original file line number Diff line number Diff line change
Expand Up @@ -1225,7 +1225,8 @@ def compute_omega_by_theta_jacobian(
def compute_micro_contributions(
self, moments: Moments, delta: Optional[Array] = None, probabilities: Optional[Array] = None,
probabilities_tangent_mapping: Optional[Dict[int, Array]] = None, compute_jacobians: bool = False,
compute_covariances: bool = False) -> Tuple[Array, Array, Array, Array, Array]:
compute_covariances: bool = False, keep_mappings: bool = False) -> (
Tuple[Array, Array, Array, Array, Array, Dict[MicroDataset, Array], Dict[int, Array]]):
"""Compute contributions to micro moment values, Jacobians, and covariances. By default, use the mean utilities
with which this market was initialized and do not compute Jacobian and covariance contributions.
"""
Expand Down Expand Up @@ -1454,6 +1455,10 @@ def compute_micro_contributions(
f"dataset's compute_weights."
)

# cache values if necessary
if compute_covariances or keep_mappings:
values_mapping[m] = values

# compute the contribution to the numerator and denominator
weighted_values = weights * values
micro_numerator[m] = weighted_values.sum()
Expand All @@ -1463,14 +1468,13 @@ def compute_micro_contributions(
micro_numerator_jacobian[m, p] = (weights_tangent_mapping[(dataset, p)] * values).sum()
micro_denominator_jacobian[m, p] = denominator_tangent_mapping[(dataset, p)]

# compute the contribution to the covariance numerator (cache values so they don't need to be re-computed)
# compute the contribution to the covariance numerator
if compute_covariances:
values_mapping[m] = values
for m2, moment2 in enumerate(moments.micro_moments):
if m2 <= m and moment2.dataset == moment.dataset:
micro_covariances_numerator[m2, m] = (weighted_values * values_mapping[m2]).sum()

return (
micro_numerator, micro_denominator, micro_numerator_jacobian, micro_denominator_jacobian,
micro_covariances_numerator
micro_covariances_numerator, weights_mapping, values_mapping
)
29 changes: 17 additions & 12 deletions pyblp/markets/problem_market.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .market import Market
from .. import exceptions, options
from ..configurations.iteration import Iteration
from ..micro import Moments
from ..micro import MicroDataset, Moments
from ..utilities.basics import Array, Bounds, Error, SolverStats, NumericalErrorHandler


Expand All @@ -18,9 +18,10 @@ class ProblemMarket(Market):
def solve(
self, delta: Array, last_delta: Array, last_tilde_costs: Array, moments: Moments, iteration: Iteration,
fp_type: str, shares_bounds: Bounds, costs_bounds: Bounds, compute_jacobians: bool,
compute_micro_covariances: bool) -> (
compute_micro_covariances: bool, keep_micro_mappings: bool) -> (
Tuple[
Array, Array, Array, Array, Array, Array, Array, Array, SolverStats, Array, Array, Array, List[Error]
Array, Array, Array, Array, Array, Array, Array, Dict[MicroDataset, Array], Dict[int, Array], Array,
SolverStats, Array, Array, Array, List[Error]
]):
"""Compute the mean utility for this market that equates market shares to observed values by solving a fixed
point problem. Then, if compute_jacobians is True, compute the Jacobian (holding beta fixed) of xi
Expand Down Expand Up @@ -81,15 +82,17 @@ def solve(
micro_numerator_jacobian = np.full((moments.MM, self.parameters.P), np.nan, options.dtype)
micro_denominator_jacobian = np.full((moments.MM, self.parameters.P), np.nan, options.dtype)
micro_covariances_numerator = np.full((moments.MM, moments.MM), np.nan, options.dtype)
weights_mapping = {}
values_mapping = {}
else:
assert probabilities is not None
(
micro_numerator, micro_denominator, micro_numerator_jacobian, micro_denominator_jacobian,
micro_covariances_numerator, micro_errors
micro_covariances_numerator, weights_mapping, values_mapping, micro_errors
) = (
self.safely_compute_micro_contributions(
moments, valid_delta, probabilities, probabilities_tangent_mapping, compute_jacobians,
compute_micro_covariances
compute_micro_covariances, keep_micro_mappings
)
)
errors.extend(micro_errors)
Expand Down Expand Up @@ -128,8 +131,8 @@ def solve(

return (
delta, xi_jacobian, micro_numerator, micro_denominator, micro_numerator_jacobian,
micro_denominator_jacobian, micro_covariances_numerator, clipped_shares, stats, tilde_costs, omega_jacobian,
clipped_costs, errors
micro_denominator_jacobian, micro_covariances_numerator, weights_mapping, values_mapping, clipped_shares,
stats, tilde_costs, omega_jacobian, clipped_costs, errors
)

@NumericalErrorHandler(exceptions.DeltaNumericalError)
Expand All @@ -156,21 +159,23 @@ def safely_compute_xi_by_theta_jacobian(
@NumericalErrorHandler(exceptions.MicroMomentsNumericalError)
def safely_compute_micro_contributions(
self, moments: Moments, delta: Array, probabilities: Optional[Array],
probabilities_tangent_mapping: Dict[int, Array], compute_jacobians: bool, compute_covariances: bool) -> (
Tuple[Array, Array, Array, Array, Array, List[Error]]):
probabilities_tangent_mapping: Dict[int, Array], compute_jacobians: bool, compute_covariances: bool,
keep_mappings: bool) -> (
Tuple[Array, Array, Array, Array, Array, Dict[MicroDataset, Array], Dict[int, Array], List[Error]]):
"""Compute micro moment value contributions, handling any numerical errors."""
errors: List[Error] = []
(
micro_numerator, micro_denominator, micro_numerator_jacobian, micro_denominator_jacobian,
micro_covariances_numerator
micro_covariances_numerator, weights_mapping, values_mapping
) = (
self.compute_micro_contributions(
moments, delta, probabilities, probabilities_tangent_mapping, compute_jacobians, compute_covariances
moments, delta, probabilities, probabilities_tangent_mapping, compute_jacobians, compute_covariances,
keep_mappings
)
)
return (
micro_numerator, micro_denominator, micro_numerator_jacobian, micro_denominator_jacobian,
micro_covariances_numerator, errors
micro_covariances_numerator, weights_mapping, values_mapping, errors
)

@NumericalErrorHandler(exceptions.CostsNumericalError)
Expand Down
5 changes: 3 additions & 2 deletions pyblp/markets/simulation_results_market.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ class SimulationResultsMarket(Market):
"""A market in a solved simulation of synthetic BLP data."""

@NumericalErrorHandler(exceptions.SyntheticMicroMomentsNumericalError)
def safely_compute_micro_contributions(self, moments: Moments) -> Tuple[Array, Array, List[Error]]:
def safely_compute_micro_contributions(
self, moments: Moments) -> Tuple[Array, Array, List[Error]]:
"""Compute micro moment value contributions, handling any numerical errors."""
errors: List[Error] = []
micro_numerator, micro_denominator, _, _, _ = self.compute_micro_contributions(moments)
micro_numerator, micro_denominator, _, _, _, _, _ = self.compute_micro_contributions(moments)
return micro_numerator, micro_denominator, errors

0 comments on commit 77f99fa

Please sign in to comment.