REF: Change of BIC formula

bashtage · Aug 3, 2020 · 0a5b1fb · 0a5b1fb
1 parent 48d77d6
commit 0a5b1fb
Show file tree

Hide file tree

Showing 6 changed files with 145 additions and 35 deletions.
diff --git a/setup.cfg b/setup.cfg
@@ -53,6 +53,7 @@ filterwarnings =
     error:unbiased is deprecated in factor of adjusted:FutureWarning
     error:categorical is deprecated:FutureWarning
     error:After 0.13 initialization:FutureWarning
+    error:The bic value:FutureWarning
 markers =
     example: mark a test that runs example code
     matplotlib: mark a test that requires matplotlib

diff --git a/statsmodels/base/tests/test_shrink_pickle.py b/statsmodels/base/tests/test_shrink_pickle.py
@@ -52,7 +52,7 @@ def test_remove_data_pickle(self):
         pred1 = results.predict(xf, **pred_kwds)
         # create some cached attributes
         results.summary()
-        res = results.summary2()  # SMOKE test also summary2
+        results.summary2()  # SMOKE test also summary2
 
         # uncomment the following to check whether tests run (7 failures now)
         # np.testing.assert_equal(res, 1)
@@ -237,8 +237,13 @@ def test_cached_data_removed(self):
         for name in names:
             assert name in res._cache
             assert res._cache[name] is not None
+        import warnings
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", FutureWarning)
+            # FutureWarning for BIC changes
+            res.remove_data()
 
-        res.remove_data()
         for name in names:
             assert res._cache[name] is None
 
@@ -249,7 +254,7 @@ def test_cached_values_evaluated(self):
         assert res._cache == {}
         with pytest.warns(FutureWarning, match="Anscombe residuals"):
             res.remove_data()
-        assert 'bic' in res._cache
+        assert 'aic' in res._cache
 
 
 class TestRemoveDataPickleGLMConstrained(RemoveDataPickle):

diff --git a/statsmodels/discrete/tests/test_constrained.py b/statsmodels/discrete/tests/test_constrained.py
@@ -410,7 +410,12 @@ def test_glm(self):
         # see issue GH#1733
         assert_allclose(res1.aic, res2.infocrit[4], rtol=1e-10)
 
-        assert_allclose(res1.bic, res2.bic, rtol=1e-10)
+        import warnings
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", FutureWarning)
+            # FutureWarning for BIC changes
+            assert_allclose(res1.bic, res2.bic, rtol=1e-10)
         # bic is deviance based
         # FIXME: dont leave commented-out
         #  assert_allclose(res1.bic, res2.infocrit[5], rtol=1e-10)
@@ -480,7 +485,13 @@ def test_summary(self):
     @pytest.mark.smoke
     def test_summary2(self):
         # trailing text in summary, assumes it's the first extra string
-        summ = self.res1m.summary2()
+        import warnings
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", FutureWarning)
+            # FutureWarning for BIC changes
+            summ = self.res1m.summary2()
+
         assert_('linear equality constraints' in summ.extra_txt[0])
 
     def test_fit_constrained_wrap(self):

diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
@@ -30,7 +30,7 @@
 import statsmodels.regression.linear_model as lm
 import statsmodels.base.wrapper as wrap
 import statsmodels.regression._tools as reg_tools
-from warnings import warn
+import warnings
 
 from statsmodels.graphics._regressionplots_doc import (
     _plot_added_variable_doc,
@@ -55,6 +55,24 @@ def _check_convergence(criterion, iteration, atol, rtol):
                        atol=atol, rtol=rtol)
 
 
+# Remove after 0.13 when bic changes to bic llf
+class _ModuleVariable:
+    _value = None
+
+    @property
+    def use_bic_llf(self):
+        return self._value
+
+    def set_use_bic_llf(self, val):
+        if val not in (True, False, None):
+            raise ValueError("Must be True, False or None")
+        self._value = bool(val) if val is not None else val
+
+
+_use_bic_helper = _ModuleVariable()
+SET_USE_BIC_LLF = _use_bic_helper.set_use_bic_llf
+
+
 class GLM(base.LikelihoodModel):
     __doc__ = """
     Generalized Linear Models
@@ -272,10 +290,10 @@ def __init__(self, endog, exog, family=None, offset=None,
         if (family is not None) and not isinstance(family.link,
                                                    tuple(family.safe_links)):
 
-            warn(("The %s link function does not respect the domain "
-                  "of the %s family.") %
-                  (family.link.__class__.__name__,
-                  family.__class__.__name__), DomainWarning)
+            warnings.warn((f"The {type(family.link).__name__} link function "
+                           "does not respect the domain of the "
+                           f"{type(family).__name__} family."),
+                          DomainWarning)
 
         if exposure is not None:
             exposure = np.log(exposure)
@@ -1108,8 +1126,8 @@ def _fit_gradient(self, start_params=None, method="newton",
         try:
             cov_p = np.linalg.inv(-self.hessian(rslt.params, observed=oim)) / scale
         except LinAlgError:
-            warn('Inverting hessian failed, no bse or cov_params '
-                 'available', HessianInversionWarning)
+            warnings.warn('Inverting hessian failed, no bse or cov_params '
+                          'available', HessianInversionWarning)
             cov_p = None
 
         results_class = getattr(self, '_results_class', GLMResults)
@@ -1302,8 +1320,7 @@ def fit_regularized(self, method="elastic_net", alpha=0.,
         self.scale = self.estimate_scale(self.mu)
 
         if not result.converged:
-            msg = "Elastic net fitting did not converge"
-            warn(msg)
+            warnings.warn("Elastic net fitting did not converge")
 
         return result
 
@@ -1678,16 +1695,65 @@ def llf(self):
     def aic(self):
         """
         Akaike Information Criterion
-        -2 * `llf` + 2*(`df_model` + 1)
+        -2 * `llf` + 2 * (`df_model` + 1)
         """
         return -2 * self.llf + 2 * (self.df_model + 1)
 
-    @cached_value
+    @property
     def bic(self):
         """
         Bayes Information Criterion
-        Old: `deviance` - `df_resid` * log(`nobs`)
-        New: -2 * `llf` + (df_model+1)*log*(n)
+
+        `deviance` - `df_resid` * log(`nobs`)
+
+        .. warning::
+
+            The current definition is base don the deviance rather than the
+            log-likelihood. This is not consistent with the AIC definition,
+            and after 0.13 both will make use of the log-likelihood definition.
+
+        Notes
+        -----
+        The log-likelihood version is defined
+        -2 * `llf` + (`df_model` + 1)*log(n)
+        """
+        if _use_bic_helper.use_bic_llf not in (True, False):
+            warnings.warn(
+                "The bic value is computed using the deviance formula. After "
+                "0.13 this will change to the log-likelihood based formula. "
+                "This change has no impact on the relative rank of models "
+                "compared using BIC. You can directly access the "
+                "log-likelihood version using the `bic_llf` attribute. You "
+                "can suppress this message by calling "
+                "statsmodels.genmod.generalized_linear_model.SET_USE_BIC_LLF "
+                "with True to get the LLF-based version now or False to retain"
+                "the deviance version.",
+                FutureWarning
+            )
+        if bool(_use_bic_helper.use_bic_llf):
+            return self.bic_llf
+
+        return self.bic_deviance
+
+    @cached_value
+    def bic_deviance(self):
+        """
+        Bayes Information Criterion
+
+        Based on the deviance,
+        `deviance` - `df_resid` * log(`nobs`)
+        """
+        return (self.deviance -
+                (self.model.wnobs - self.df_model - 1) *
+                np.log(self.model.wnobs))
+
+    @cached_value
+    def bic_llf(self):
+        """
+        Bayes Information Criterion
+
+        Based on the log-likelihood,
+        -2 * `llf` + log(n) * (`df_model` + 1)
         """
         return -2*self.llf + (self.df_model+1)*np.log(
             self.df_model+self.df_resid+1
@@ -1920,8 +1986,10 @@ def summary2(self, yname=None, xname=None, title=None, alpha=.05,
         self.method = 'IRLS'
         from statsmodels.iolib import summary2
         smry = summary2.Summary()
-        smry.add_base(results=self, alpha=alpha, float_format=float_format,
-                      xname=xname, yname=yname, title=title)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", FutureWarning)
+            smry.add_base(results=self, alpha=alpha, float_format=float_format,
+                          xname=xname, yname=yname, title=title)
         if hasattr(self, 'constraints'):
             smry.add_text('Model has been estimated subject to linear '
                           'equality constraints.')

diff --git a/statsmodels/genmod/tests/test_constrained.py b/statsmodels/genmod/tests/test_constrained.py
@@ -213,10 +213,14 @@ def test_resid(self):
         assert_allclose(res1.resid_response, res2.resid_response, rtol=1e-8)
 
     def test_glm_attr(self):
-        for attr in ['llf', 'null_deviance', 'aic', 'bic', 'df_resid',
+        for attr in ['llf', 'null_deviance', 'aic', 'df_resid',
                      'df_model', 'pearson_chi2', 'scale']:
             assert_allclose(getattr(self.res1, attr),
                             getattr(self.res2, attr), rtol=1e-10)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", FutureWarning)
+            # FutureWarning to silence BIC warning
+            assert_allclose(self.res1.bic, self.res2.bic, rtol=1e-10)
 
     def test_wald(self):
         res1 = self.res1
@@ -246,8 +250,10 @@ def test_wald(self):
 
         # smoke
         with warnings.catch_warnings():
-            warnings.simplefilter('ignore', ValueWarning)
             # RuntimeWarnings because of truedivide and scipy distributions
+            # Future to silence BIC warning
+            warnings.simplefilter("ignore", FutureWarning)
+            warnings.simplefilter('ignore', ValueWarning)
             warnings.simplefilter('ignore', RuntimeWarning)
             self.res1.summary()
             self.res1.summary2()

diff --git a/statsmodels/genmod/tests/test_glm.py b/statsmodels/genmod/tests/test_glm.py
@@ -12,7 +12,7 @@
 from scipy import stats
 
 import statsmodels.api as sm
-from statsmodels.genmod.generalized_linear_model import GLM
+from statsmodels.genmod.generalized_linear_model import GLM, SET_USE_BIC_LLF
 from statsmodels.tools.tools import add_constant
 from statsmodels.tools.sm_exceptions import PerfectSeparationError
 from statsmodels.discrete import discrete_model as discrete
@@ -46,6 +46,13 @@ def teardown_module():
         pdf.close()
 
 
+@pytest.fixture(scope="module")
+def iris():
+    cur_dir = os.path.dirname(os.path.abspath(__file__))
+    return np.genfromtxt(os.path.join(cur_dir, 'results', 'iris.csv'),
+                         delimiter=",", skip_header=1)
+
+
 class CheckModelResultsMixin(object):
     '''
     res2 should be either the results from RModelWrap
@@ -150,8 +157,11 @@ def test_null_deviance(self):
 
     decimal_bic = DECIMAL_4
     def test_bic(self):
-        assert_almost_equal(self.res1.bic, self.res2.bic_Stata,
-                self.decimal_bic)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            assert_almost_equal(self.res1.bic,
+                                self.res2.bic_Stata,
+                                self.decimal_bic)
 
     def test_degrees(self):
         assert_equal(self.res1.model.df_resid,self.res2.df_resid)
@@ -845,10 +855,7 @@ def test_predict(self):
         assert_almost_equal(pred2, pred1+offset)
 
 
-def test_perfect_pred():
-    cur_dir = os.path.dirname(os.path.abspath(__file__))
-    iris = np.genfromtxt(os.path.join(cur_dir, 'results', 'iris.csv'),
-                         delimiter=",", skip_header=1)
+def test_perfect_pred(iris):
     y = iris[:, -1]
     X = iris[:, :-1]
     X = X[y != 2]
@@ -1403,7 +1410,9 @@ def test_null_deviance(self):
     decimal_bic = DECIMAL_4
 
     def test_bic(self):
-        assert_allclose(self.res1.bic, self.res2.bic,  atol=1e-6, rtol=1e-6)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            assert_allclose(self.res1.bic, self.res2.bic,  atol=1e-6, rtol=1e-6)
 
     decimal_fittedvalues = DECIMAL_4
 
@@ -2320,7 +2329,6 @@ def test_non_invertible_hessian_fails_summary():
         res.summary()
 
 
-
 def test_int_scale():
     # GH-6627, make sure it works with int scale
     data = longley.load(as_pandas=True)
@@ -2342,13 +2350,24 @@ def test_int_exog(dtype):
     assert isinstance(res.params, np.ndarray)
 
 
-def test_glm_bic():
-    cur_dir = os.path.dirname(os.path.abspath(__file__))
-    iris = np.genfromtxt(os.path.join(cur_dir, 'results', 'iris.csv'),
-                         delimiter=",", skip_header=1)
+def test_glm_bic(iris):
     X = np.c_[np.ones(100), iris[50:, :4]]
     y = np.array(iris)[50:, 4].astype(np.int32)
     y -= 1
+    SET_USE_BIC_LLF(True)
     model = GLM(y, X, family=sm.families.Binomial()).fit()
     # 34.9244 is what glm() of R yields
     assert_almost_equal(model.bic, 34.9244, decimal=3)
+    assert_almost_equal(model.bic_llf, 34.9244, decimal=3)
+    SET_USE_BIC_LLF(False)
+    assert_almost_equal(model.bic, model.bic_deviance, decimal=3)
+    SET_USE_BIC_LLF(None)
+
+
+def test_glm_bic_warning(iris):
+    X = np.c_[np.ones(100), iris[50:, :4]]
+    y = np.array(iris)[50:, 4].astype(np.int32)
+    y -= 1
+    model = GLM(y, X, family=sm.families.Binomial()).fit()
+    with pytest.warns(FutureWarning, match="The bic"):
+        assert isinstance(model.bic, float)