BUG: Fix t-test and f-test for multidim params

Fix t-test and f-test to also work with multidimensional parameters as in MNLogit or VAR
bashtage · Jul 25, 2019 · 471edba · 471edba
1 parent 2b28619
commit 471edba
Show file tree

Hide file tree

Showing 3 changed files with 38 additions and 16 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -29,22 +29,24 @@ env:
     - # Doctr deploy key for statsmodels/statsmodels.github.io
     - secure: "AzwB23FWdilHKVcEJnj57AsoY5yKTWT8cQKzsH2ih9i08wIXvZXP/Ui8XRDygV9tDKfqGVltC7HpBBDE3C4ngeMlis4uuKWlkp0O1757YQe+OdDnimuDZhrh3ILEk7xW3ab5YizjLeyv3iiBW7cNS5z8W3Yu8HeJPkr6Ck30gAA="
     - SM_CYTHON_COVERAGE=false # Run takes > 1 hour and so not feasible
+    - PYTEST_OPTIONS=--skip-slow  # skip slow on travis since tested on azure
 
 matrix:
   fast_finish: true
   include:
+    # Documentation build (on Python 3.7 + cutting edge packages). Slowest build
+    - python: 3.7
+      env:
+        - PYTHON=3.7
+        - DOCBUILD=true
     # Python 3.7 + fixed pandas
     - python: 3.7
       env:
         - PYTHON=3.7
         - PANDAS=0.24
         - LINT=true
         - COVERAGE=true
-    # Documentation build (on Python 3.7 + cutting edge packages)
-    - python: 3.7
-      env:
-        - PYTHON=3.7
-        - DOCBUILD=true
+        - PYTEST_OPTIONS=
     # Python 3.6 + legacy blas + older pandas
     - python: 3.6
       env:
@@ -54,6 +56,7 @@ matrix:
         - SCIPY=1.1
         - BLAS="nomkl blas=*=openblas"
         - COVERAGE=true
+        - PYTEST_OPTIONS=
     # Python 3.5 + oldest packages
     - python: 3.5
       env:
@@ -133,8 +136,8 @@ script:
     else
         export XDIST_OPTS=""
     fi
-  - echo pytest -r a ${COVERAGE_OPTS} statsmodels --skip-examples ${XDIST_OPTS}
-  - pytest -r a ${COVERAGE_OPTS} statsmodels --skip-examples ${XDIST_OPTS}
+  - echo pytest -r a ${COVERAGE_OPTS} statsmodels --skip-examples ${XDIST_OPTS} ${PYTEST_OPTIONS}
+  - pytest -r a ${COVERAGE_OPTS} statsmodels --skip-examples ${XDIST_OPTS} ${PYTEST_OPTIONS}
   - ./lint.sh
 
 after_success:

diff --git a/statsmodels/base/model.py b/statsmodels/base/model.py
@@ -1498,7 +1498,7 @@ def t_test(self, r_matrix, cov_p=None, scale=None, use_t=None):
                           DeprecationWarning)
 
         from patsy import DesignInfo
-        names = self.model.data.param_names
+        names = self.model.data.cov_names
         LC = DesignInfo(names).linear_constraint(r_matrix)
         r_matrix, q_matrix = LC.coefs, LC.constants
         num_ttests = r_matrix.shape[0]
@@ -1508,7 +1508,8 @@ def t_test(self, r_matrix, cov_p=None, scale=None, use_t=None):
                 not hasattr(self, 'cov_params_default')):
             raise ValueError('Need covariance of parameters for computing '
                              'T statistics')
-        if num_params != self.params.shape[0]:
+        params = self.params.ravel()
+        if num_params != params.shape[0]:
             raise ValueError('r_matrix and params are not aligned')
         if q_matrix is None:
             q_matrix = np.zeros(num_ttests)
@@ -1524,10 +1525,7 @@ def t_test(self, r_matrix, cov_p=None, scale=None, use_t=None):
             # switch to use_t false if undefined
             use_t = (hasattr(self, 'use_t') and self.use_t)
 
-        _t = _sd = None
-
-        _effect = np.dot(r_matrix, self.params)
-        # nan_dot multiplies with the convention nan * 0 = 0
+        _effect = np.dot(r_matrix, params)
 
         # Perform the test
         if num_ttests > 1:
@@ -1718,7 +1716,8 @@ def wald_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None,
             use_f = (hasattr(self, 'use_t') and self.use_t)
 
         from patsy import DesignInfo
-        names = self.model.data.param_names
+        names = self.model.data.cov_names
+        params = self.params.ravel()
         LC = DesignInfo(names).linear_constraint(r_matrix)
         r_matrix, q_matrix = LC.coefs, LC.constants
 
@@ -1727,7 +1726,7 @@ def wald_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None,
             raise ValueError('need covariance of parameters for computing '
                              'F statistics')
 
-        cparams = np.dot(r_matrix, self.params[:, None])
+        cparams = np.dot(r_matrix, params[:, None])
         J = float(r_matrix.shape[0])  # number of restrictions
 
         if q_matrix is None:

diff --git a/statsmodels/discrete/tests/test_discrete.py b/statsmodels/discrete/tests/test_discrete.py
@@ -782,7 +782,7 @@ def test_t_test(self):
         assert_almost_equal(t_unreg.effect, t_reg.effect[:m], DECIMAL_3)
         assert_almost_equal(t_unreg.sd, t_reg.sd[:m], DECIMAL_3)
         assert_almost_equal(np.nan, t_reg.sd[m])
-        assert_almost_equal(t_unreg.tvalue, t_reg.tvalue[:m, :m], DECIMAL_3)
+        assert_almost_equal(t_unreg.tvalue, t_reg.tvalue[:m], DECIMAL_3)
 
     @pytest.mark.skip("Skipped test_f_test for MNLogit")
     def test_f_test(self):
@@ -2390,3 +2390,23 @@ def test_cov_confint_pandas():
     assert_index_equal(ci.index, cov.index)
     assert_index_equal(cov.index, cov.columns)
     assert isinstance(ci.index, pd.MultiIndex)
+
+
+def test_t_test():
+    # GH669, check t_test works in multivariate model
+    data = sm.datasets.anes96.load(as_pandas=True)
+    exog = sm.add_constant(data.exog, prepend=False)
+    res1 = sm.MNLogit(data.endog, exog).fit(disp=0)
+    r = np.ones(res1.cov_params().shape[0])
+    t1 = res1.t_test(r)
+    f1 = res1.f_test(r)
+
+    data = sm.datasets.anes96.load(as_pandas=True)
+    exog = sm.add_constant(data.exog, prepend=False)
+    endog, exog = np.asarray(data.endog), np.asarray(exog)
+    res2 = sm.MNLogit(endog, exog).fit(disp=0)
+    t2 = res2.t_test(r)
+    f2 = res2.f_test(r)
+
+    assert_allclose(t1.effect, t2.effect)
+    assert_allclose(f1.statistic, f2.statistic)