Skip to content

Commit

Permalink
ENH: only build as many nodes as needed for score computation
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffgortmaker committed Jun 19, 2022
1 parent 11c2ff9 commit 42930bf
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 14 deletions.
12 changes: 6 additions & 6 deletions pyblp/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from . import options
from .configurations.formulation import ColumnFormulation
from .primitives import Container
from .utilities.algebra import vech
from .utilities.basics import Array, Bounds, format_number, format_se, format_table

Expand All @@ -16,6 +15,7 @@
if TYPE_CHECKING:
from .economies.economy import Economy # noqa
from .markets.market import Market # noqa
from .primitives import Container


class Parameter(abc.ABC):
Expand All @@ -34,7 +34,7 @@ class Coefficient(Parameter):
"""Information about a single coefficient parameter in sigma, pi, beta, or gamma."""

@abc.abstractmethod
def get_product_formulation(self, container: Container) -> ColumnFormulation:
def get_product_formulation(self, container: 'Container') -> ColumnFormulation:
"""Get the product formulation associated with the parameter."""

@abc.abstractmethod
Expand All @@ -45,7 +45,7 @@ def get_product_characteristic(self, market: 'Market') -> Array:
class NonlinearCoefficient(Coefficient):
"""Information about a single nonlinear parameter in sigma or pi."""

def get_product_formulation(self, container: Container) -> ColumnFormulation:
def get_product_formulation(self, container: 'Container') -> ColumnFormulation:
"""Get the product formulation associated with the parameter."""
return container._X2_formulations[self.location[0]]

Expand Down Expand Up @@ -109,7 +109,7 @@ def get_group_associations(self, market: 'Market') -> Array:
class LinearCoefficient(Coefficient):
"""Information about a single linear parameter in beta or gamma."""

def get_product_formulation(self, container: Container) -> ColumnFormulation:
def get_product_formulation(self, container: 'Container') -> ColumnFormulation:
"""Get the product formulation associated with the parameter."""
return container._X2_formulations[self.location[0]]

Expand All @@ -122,15 +122,15 @@ def get_product_characteristic(self, market: 'Market') -> Array:
class BetaParameter(LinearCoefficient):
"""Information about a single linear parameter in beta."""

def get_product_formulation(self, container: Container) -> ColumnFormulation:
def get_product_formulation(self, container: 'Container') -> ColumnFormulation:
"""Get the product formulation associated with the parameter."""
return container._X1_formulations[self.location[0]]


class GammaParameter(LinearCoefficient):
"""Information about a single linear parameter in gamma."""

def get_product_formulation(self, container: Container) -> ColumnFormulation:
def get_product_formulation(self, container: 'Container') -> ColumnFormulation:
"""Get the product formulation associated with the parameter."""
return container._X3_formulations[self.location[0]]

Expand Down
7 changes: 5 additions & 2 deletions pyblp/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from . import options
from .configurations.formulation import ColumnFormulation, Formulation
from .configurations.integration import Integration
from .parameters import Parameters
from .utilities.basics import Array, Data, Groups, RecArray, extract_matrix, get_indices, structure_matrices, warn


Expand Down Expand Up @@ -372,7 +373,7 @@ class MicroAgents(object):
second_choice_indices: Array

def __new__(
cls, products: RecArray, micro_data: Mapping, demographics: Optional[Array] = None,
cls, products: RecArray, parameters: Parameters, micro_data: Mapping, demographics: Optional[Array] = None,
demographics_formulations: Sequence[ColumnFormulation] = (),
integration: Optional[Integration] = None) -> RecArray:
"""Structure agent data."""
Expand Down Expand Up @@ -442,7 +443,9 @@ def __new__(
raise ValueError("integration must be None or an Integration instance.")

# duplicate observations by as many rows as there are built nodes
micro_ids, nodes, weights = integration._build_many(K2, np.arange(market_ids.size))
micro_ids, nodes, weights = integration._build_many(
parameters.nonzero_sigma_index.sum(), np.arange(market_ids.size)
)
repeats = np.bincount(micro_ids)
duplicate = lambda x: np.repeat(x, repeats, axis=0) if x is not None else None
demographics = duplicate(demographics)
Expand Down
18 changes: 12 additions & 6 deletions pyblp/results/economy_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -1300,8 +1300,9 @@ def compute_micro_scores(
If specified, each row of ``micro_data`` is treated as corresponding to a unique observation :math:`n`,
and will be duplicated by as many rows of nodes as are created by the :class:`Integration` configuration.
Specifically, :math:`K_2` columns of nodes (the number of demand-side nonlinear product characteristics)
will be built for each observation :math:`n`.
Specifically, up to :math:`K_2` columns of nodes (the number of demand-side nonlinear product
characteristics) will be built for each observation :math:`n`. If there are zeros on the diagonal of
:math:`\Sigma`, nodes will not be built for those characteristics, to cut down on memory usage.
Returns
-------
Expand All @@ -1328,7 +1329,7 @@ def compute_micro_scores(
self._economy.products, micro_data, self._economy.agent_formulation
)
micro_agents = MicroAgents(
self._economy.products, micro_data, demographics, demographics_formulations, integration
self._economy.products, self._parameters, micro_data, demographics, demographics_formulations, integration
)
if micro_agents.choice_indices.size == 0:
raise KeyError("micro_data must have choice_indices.")
Expand Down Expand Up @@ -1417,7 +1418,8 @@ def compute_agent_scores(
self._economy.products, micro_data, self._economy.agent_formulation
)
micro_agents = MicroAgents(
self._economy.products, micro_data, demographics, demographics_formulations, integration
self._economy.products, self._parameters, micro_data, demographics, demographics_formulations,
integration
)
else:
if dataset.market_ids is None:
Expand All @@ -1436,7 +1438,8 @@ def compute_agent_scores(
'weights': np.ones(agents.size),
}
micro_agents = MicroAgents(
self._economy.products, micro_data, demographics, demographics_formulations, integration
self._economy.products, self._parameters, micro_data, demographics, demographics_formulations,
integration
)

# compute the contributions
Expand Down Expand Up @@ -1486,7 +1489,10 @@ def _compute_scores(
micro_indices = get_indices(micro_agents.micro_ids)

# verify that the micro data only has market IDs supported by the dataset
if dataset.market_ids is not None and set(unique_market_ids) - dataset.market_ids:
dataset_market_ids = dataset.market_ids
if dataset_market_ids is None:
dataset_market_ids = set(self._economy.unique_market_ids)
if set(unique_market_ids) - dataset_market_ids:
raise ValueError("The market_ids field of micro_data must not have IDs not supported by the dataset.")

def denominator_market_factory(s: Hashable) -> Tuple[EconomyResultsMarket, MicroDataset]:
Expand Down

0 comments on commit 42930bf

Please sign in to comment.