From d65b489306a033fd12c3bfb4cc238b2d021565df Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 19 Dec 2018 20:47:10 +0100 Subject: [PATCH] ENH: Add wald_test to panel model results Add wald_test as a convenience function for panel models --- linearmodels/iv/results.py | 32 +++------ linearmodels/panel/results.py | 70 +++++++++++++++++++- linearmodels/tests/iv/test_postestimation.py | 6 +- linearmodels/tests/iv/test_results.py | 2 +- linearmodels/tests/panel/test_results.py | 24 +++++++ linearmodels/utility.py | 23 +++++++ 6 files changed, 132 insertions(+), 25 deletions(-) diff --git a/linearmodels/iv/results.py b/linearmodels/iv/results.py index 1858c5d551..27f4913b06 100644 --- a/linearmodels/iv/results.py +++ b/linearmodels/iv/results.py @@ -6,17 +6,16 @@ import scipy.stats as stats from cached_property import cached_property -from numpy import array, asarray, c_, diag, empty, log, ones, sqrt, zeros +from numpy import array, c_, diag, empty, log, ones, sqrt, zeros from numpy.linalg import inv, pinv from pandas import DataFrame, Series, concat, to_numeric -from patsy import DesignInfo from statsmodels.iolib.summary import SimpleTable, fmt_2cols, fmt_params from statsmodels.iolib.table import default_txt_fmt from linearmodels.compat.statsmodels import Summary from linearmodels.iv._utility import annihilate, proj from linearmodels.utility import (InvalidTestStatistic, WaldTestStatistic, _ModelComparison, - _SummaryStr, _str, pval_format) + _SummaryStr, _str, pval_format, quadratic_form_test) def stub_concat(lists, sep='='): @@ -411,7 +410,7 @@ def summary(self): return smry - def test_linear_constraint(self, restriction=None, value=None, *, formula=None): + def wald_test(self, restriction=None, value=None, *, formula=None): r""" Test linear equality constraints using a Wald test @@ -461,25 +460,14 @@ def test_linear_constraint(self, restriction=None, value=None, *, formula=None): >>> formula = 'exper = I(exper**2) = 0' >>> res.test_linear_constraint(formula=formula) """ - if formula is not None and restriction is not None: - raise ValueError('restriction and formula cannot be used' - 'simultaneously.') - if formula is not None: - di = DesignInfo(list(self.params.index)) - lc = di.linear_constraint(formula) - restriction, value = lc.coefs, lc.constants - restriction = asarray(restriction) - if value is None: - value = zeros(restriction.shape[0]) - value = asarray(value).ravel()[:, None] - diff = restriction @ self.params.values[:, None] - value - rcov = restriction @ self.cov @ restriction.T - stat = float(diff.T @ inv(rcov) @ diff) - df = restriction.shape[0] - null = 'Linear equality constraint is valid' - name = 'Linear Equality Hypothesis Test' + return quadratic_form_test(self._params, self.cov, restriction=restriction, + value=value, formula=formula) - return WaldTestStatistic(stat, null, df, name=name) + def test_linear_constraint(self, restriction=None, value=None, *, formula=None): + import warnings + warnings.warn('test_linear_constraint is deprecated. Use wald_test ' + 'instead.', DeprecationWarning) + return self.wald_test(restriction, value, formula=formula) class _CommonIVResults(OLSResults): diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py index 1e7ef89f9e..f619f42de4 100644 --- a/linearmodels/panel/results.py +++ b/linearmodels/panel/results.py @@ -8,7 +8,8 @@ from linearmodels.compat.statsmodels import Summary from linearmodels.iv.results import default_txt_fmt, stub_concat, table_concat -from linearmodels.utility import (_ModelComparison, _SummaryStr, _str, pval_format) +from linearmodels.utility import (_ModelComparison, _SummaryStr, _str, pval_format, + quadratic_form_test) __all__ = ['PanelResults', 'PanelEffectsResults', 'RandomEffectsResults'] @@ -513,6 +514,73 @@ def loglik(self): """Log-likelihood of model""" return self._loglik + def wald_test(self, restriction=None, value=None, *, formula=None): + r""" + Test linear equality constraints using a Wald test + + Parameters + ---------- + restriction : {ndarray, DataFrame}, optional + q by nvar array containing linear weights to apply to parameters + when forming the restrictions. It is not possible to use both + restriction and formula. + value : {ndarray, Series}, optional + q element array containing the restricted values. + formula : Union[str, list[str]], optional + patsy linear constrains. The simplest formats are one of: + + * A single comma-separated string such as 'x1=0, x2+x3=1' + * A list of strings where each element is a single constraint + such as ['x1=0', 'x2+x3=1'] + * A single string without commas to test simple constraints such + as 'x1=x2=x3=0' + + It is not possible to use both restriction and formula. + + Returns + ------- + t: WaldTestStatistic + Test statistic for null that restrictions are valid. + + Notes + ----- + Hypothesis test examines whether :math:`H_0:C\theta=v` where the + matrix C is ``restriction`` and v is ``value``. The test statistic + has a :math:`\chi^2_q` distribution where q is the number of rows in C. + + Examples + -------- + >>> from linearmodels.datasets import wage_panel + >>> import statsmodels.api as sm + >>> import numpy as np + >>> import pandas as pd + >>> data = wage_panel.load() + >>> year = pd.Categorical(data.year) + >>> data = data.set_index(['nr', 'year']) + >>> data['year'] = year + >>> from linearmodels.panel import PanelOLS + >>> exog_vars = ['expersq', 'union', 'married', 'year'] + >>> exog = sm.add_constant(data[exog_vars]) + + >>> mod = PanelOLS(data.lwage, exog, entity_effects=True) + >>> fe_res = mod.fit() + + Test the restriction that union and married have 0 coefficients + + >>> restriction = np.zeros((2, 11)) + >>> restriction[0, 2] = 1 + >>> restriction[1, 3] = 1 + >>> value = np.array([0, 0]) + >>> fe_res.wald_test(restriction, value) + + The same test using formulas + + >>> formula = 'union = married = 0' + >>> fe_res.wald_test(formula=formula) + """ + return quadratic_form_test(self.params, self.cov, restriction=restriction, + value=value, formula=formula) + class PanelEffectsResults(PanelResults): """ diff --git a/linearmodels/tests/iv/test_postestimation.py b/linearmodels/tests/iv/test_postestimation.py index 177d782dbf..47abaee56e 100644 --- a/linearmodels/tests/iv/test_postestimation.py +++ b/linearmodels/tests/iv/test_postestimation.py @@ -111,9 +111,13 @@ def test_linear_restriction(data): res = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit(cov_type='robust') nvar = len(res.params) q = np.eye(nvar) - ts = res.test_linear_constraint(q, np.zeros(nvar)) + ts = res.wald_test(q, np.zeros(nvar)) p = res.params.values[:, None] c = res.cov.values stat = float(p.T @ np.linalg.inv(c) @ p) assert_allclose(stat, ts.stat) assert ts.df == nvar + + formula = ' = '.join(res.params.index) + ' = 0' + ts2 = res.wald_test(formula=formula) + assert_allclose(ts.stat, ts2.stat) diff --git a/linearmodels/tests/iv/test_results.py b/linearmodels/tests/iv/test_results.py index 9e4dd0e713..70b6a29cb7 100644 --- a/linearmodels/tests/iv/test_results.py +++ b/linearmodels/tests/iv/test_results.py @@ -22,7 +22,7 @@ def model(request): def result_checker(res): for attr in dir(res): - if attr.startswith('_') or attr in ('test_linear_constraint',): + if attr.startswith('_') or attr in ('test_linear_constraint', 'wald_test'): continue if attr == 'first_stage': result_checker(getattr(res, attr)) diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py index e59c265559..6fa2514608 100644 --- a/linearmodels/tests/panel/test_results.py +++ b/linearmodels/tests/panel/test_results.py @@ -1,6 +1,8 @@ from collections import OrderedDict from itertools import product +from numpy.testing import assert_allclose +import numpy as np import pytest import statsmodels.api as sm from pandas.testing import assert_series_equal @@ -137,3 +139,25 @@ def test_predict_no_selection(generated_data): res.predict(fitted=False) with pytest.raises(ValueError): res.predict(fitted=False, effects=False, idiosyncratic=False, missing=True) + + +def test_wald_test(data): + dependent = data.set_index(['nr', 'year']).lwage + exog = sm.add_constant(data.set_index(['nr', 'year'])[['expersq', 'married', 'union']]) + res = PanelOLS(dependent, exog, entity_effects=True, time_effects=True).fit() + + restriction = np.zeros((2, 4)) + restriction[0, 2] = 1 + restriction[1, 3] = 1 + t1 = res.wald_test(restriction) + t2 = res.wald_test(restriction, np.zeros(2)) + formula = 'married = union = 0' + t3 = res.wald_test(formula=formula) + p = res.params.values[:, None] + c = np.asarray(res.cov) + c = c[-2:, -2:] + p = p[-2:] + direct = p.T @ np.linalg.inv(c) @ p + assert_allclose(direct, t1.stat) + assert_allclose(direct, t2.stat) + assert_allclose(direct, t3.stat) diff --git a/linearmodels/utility.py b/linearmodels/utility.py index 0c58bced9d..c0b4e89043 100644 --- a/linearmodels/utility.py +++ b/linearmodels/utility.py @@ -5,6 +5,7 @@ import numpy as np from pandas import DataFrame, Series, MultiIndex +from patsy.design_info import DesignInfo from scipy.stats import chi2, f from statsmodels.iolib.summary import SimpleTable, fmt_params @@ -575,3 +576,25 @@ def panel_to_frame(x, items, major_axis, minor_axis, swap=False): df.index.set_levels(final_levels, [0, 1], inplace=True) df.index.names = ['major', 'minor'] return df + + +def quadratic_form_test(params, cov, restriction=None, value=None, formula=None): + if formula is not None and restriction is not None: + raise ValueError('restriction and formula cannot be used' + 'simultaneously.') + if formula is not None: + di = DesignInfo(list(params.index)) + lc = di.linear_constraint(formula) + restriction, value = lc.coefs, lc.constants + restriction = np.asarray(restriction) + if value is None: + value = np.zeros(restriction.shape[0]) + value = np.asarray(value).ravel()[:, None] + diff = restriction @ params.values[:, None] - value + rcov = restriction @ cov @ restriction.T + stat = float(diff.T @ np.linalg.inv(rcov) @ diff) + df = restriction.shape[0] + null = 'Linear equality constraint is valid' + name = 'Linear Equality Hypothesis Test' + + return WaldTestStatistic(stat, null, df, name=name)