BUG/ENH: Improve RLM in the case of perfect fit

Ensure RLM doesn't crash if perfect fit Clean norms so that they don't crash either Avoid calculations the load to nan Improve testing of RLM Replace fabs with abs closes statsmodels#5585 closes statsmodels#1341 closes statsmodels#5878 closes statsmodels#5356 closes statsmodels#3319 xref statsmodels#55
bashtage · Jul 17, 2019 · 5c3e3a7 · 5c3e3a7
1 parent 4acd7a0
commit 5c3e3a7
Show file tree

Hide file tree

Showing 4 changed files with 148 additions and 52 deletions.
diff --git a/statsmodels/robust/norms.py b/statsmodels/robust/norms.py
@@ -189,7 +189,7 @@ def _subset(self, z):
         Huber's T is defined piecewise over the range for z
         """
         z = np.asarray(z)
-        return np.less_equal(np.fabs(z), self.t)
+        return np.less_equal(np.abs(z), self.t)
 
     def rho(self, z):
         r"""
@@ -210,7 +210,7 @@ def rho(self, z):
         z = np.asarray(z)
         test = self._subset(z)
         return (test * 0.5 * z**2 +
-                (1 - test) * (np.fabs(z) * self.t - 0.5 * self.t**2))
+                (1 - test) * (np.abs(z) * self.t - 0.5 * self.t**2))
 
     def psi(self, z):
         r"""
@@ -254,7 +254,7 @@ def weights(self, z):
         """
         z = np.asarray(z)
         test = self._subset(z)
-        absz = np.fabs(z)
+        absz = np.abs(z)
         absz[test] = 1.0
         return test + (1 - test) * self.t / absz
 
@@ -266,10 +266,10 @@ def psi_deriv(self, z):
         -----
         Used to estimate the robust covariance matrix.
         """
-        return np.less_equal(np.fabs(z), self.t)
+        return np.less_equal(np.abs(z), self.t)
 
 
-#TODO: untested, but looks right.  RamsayE not available in R or SAS?
+# TODO: untested, but looks right.  RamsayE not available in R or SAS?
 class RamsayE(RobustNorm):
     """
     Ramsay's Ea for M estimation.
@@ -303,8 +303,8 @@ def rho(self, z):
             rho(z) = a**-2 * (1 - exp(-a*\|z\|)*(1 + a*\|z\|))
         """
         z = np.asarray(z)
-        return (1 - np.exp(-self.a * np.fabs(z)) *
-                (1 + self.a * np.fabs(z))) / self.a**2
+        return (1 - np.exp(-self.a * np.abs(z)) *
+                (1 + self.a * np.abs(z))) / self.a**2
 
     def psi(self, z):
         r"""
@@ -323,7 +323,7 @@ def psi(self, z):
             psi(z) = z*exp(-a*\|z\|)
         """
         z = np.asarray(z)
-        return z * np.exp(-self.a * np.fabs(z))
+        return z * np.exp(-self.a * np.abs(z))
 
     def weights(self, z):
         r"""
@@ -343,7 +343,7 @@ def weights(self, z):
         """
 
         z = np.asarray(z)
-        return np.exp(-self.a * np.fabs(z))
+        return np.exp(-self.a * np.abs(z))
 
     def psi_deriv(self, z):
         """
@@ -353,9 +353,12 @@ def psi_deriv(self, z):
         -----
         Used to estimate the robust covariance matrix.
         """
-
-        return np.exp(-self.a * np.fabs(z)) + z**2*\
-                np.exp(-self.a*np.fabs(z))*-self.a/np.fabs(z)
+        a = self.a
+        x = np.exp(-a * np.abs(z))
+        dx = -a * x * np.sign(z)
+        y = z
+        dy = 1
+        return x * dy + y * dx
 
 
 class AndrewWave(RobustNorm):
@@ -381,7 +384,7 @@ def _subset(self, z):
         Andrew's wave is defined piecewise over the range of z.
         """
         z = np.asarray(z)
-        return np.less_equal(np.fabs(z), self.a * np.pi)
+        return np.less_equal(np.abs(z), self.a * np.pi)
 
     def rho(self, z):
         r"""
@@ -451,7 +454,16 @@ def weights(self, z):
         a = self.a
         z = np.asarray(z)
         test = self._subset(z)
-        return test * np.sin(z / a) / (z / a)
+        ratio = z / a
+        small = np.abs(ratio) < np.finfo(np.double).eps
+        if np.any(small):
+            weights = np.ones_like(ratio)
+            large = ~small
+            ratio = ratio[large]
+            weights[large] = test[large] * np.sin(ratio) / ratio
+        else:
+            weights = test * np.sin(ratio) / ratio
+        return weights
 
     def psi_deriv(self, z):
         """
@@ -491,7 +503,7 @@ def _subset(self, z):
         """
 
         z = np.asarray(z)
-        return np.less_equal(np.fabs(z), self.c)
+        return np.less_equal(np.abs(z), self.c)
 
     def rho(self, z):
         r"""
@@ -599,7 +611,7 @@ def _subset(self, z):
         """
         Hampel's function is defined piecewise over the range of z
         """
-        z = np.fabs(np.asarray(z))
+        z = np.abs(np.asarray(z))
         t1 = np.less_equal(z, self.a)
         t2 = np.less_equal(z, self.b) * np.greater(z, self.a)
         t3 = np.less_equal(z, self.c) * np.greater(z, self.b)
@@ -626,7 +638,7 @@ def rho(self, z):
             rho(z) = a*(b + c - a)                  for \|z\| > c
         """
 
-        z = np.fabs(z)
+        z = np.abs(z)
         a = self.a
         b = self.b
         c = self.c
@@ -665,7 +677,7 @@ def psi(self, z):
         c = self.c
         t1, t2, t3 = self._subset(z)
         s = np.sign(z)
-        z = np.fabs(z)
+        z = np.abs(z)
         v = s * (t1 * z +
                  t2 * a*s +
                  t3 * a*s * (c - z) / (c - b))
@@ -699,15 +711,25 @@ def weights(self, z):
         b = self.b
         c = self.c
         t1, t2, t3 = self._subset(z)
-        v = (t1 +
-            t2 * a/np.fabs(z) +
-            t3 * a*(c-np.fabs(z))/(np.fabs(z)*(c-b)))
-        v[np.where(np.isnan(v))]=1. # for some reason 0 returns a nan?
+
+        v = np.zeros_like(z)
+        v[t1] = 1.0
+        abs_z = np.abs(z)
+        v[t2] = a / abs_z[t2]
+        abs_zt3 = abs_z[t3]
+        v[t3] = a * (c - abs_zt3) / (abs_zt3 * (c - b))
+        v[np.where(np.isnan(v))] = 1.  # for some reason 0 returns a nan?
         return v
 
     def psi_deriv(self, z):
         t1, t2, t3 = self._subset(z)
-        return t1 + t3 * (self.a*np.sign(z)*z)/(np.fabs(z)*(self.c-self.b))
+        a, b, c = self.a, self.b, self.c
+        # default is t1
+        d = np.zeros_like(z)
+        d[t1] = 1.0
+        zt3 = z[t3]
+        d[t3] = (a * np.sign(zt3) * zt3) / (np.abs(zt3) * (c - b))
+        return d
 
 
 class TukeyBiweight(RobustNorm):
@@ -733,7 +755,7 @@ def _subset(self, z):
         """
         Tukey's biweight is defined piecewise over the range of z
         """
-        z = np.fabs(np.asarray(z))
+        z = np.abs(np.asarray(z))
         return np.less_equal(z, self.c)
 
     def rho(self, z):
@@ -856,7 +878,7 @@ def estimate_location(a, scale, norm=None, axis=0, initial=None,
     for iter in range(maxiter):
         W = norm.weights((a-mu)/scale)
         nmu = np.sum(W*a, axis) / np.sum(W, axis)
-        if np.alltrue(np.less(np.fabs(mu - nmu), scale * tol)):
+        if np.alltrue(np.less(np.abs(mu - nmu), scale * tol)):
             return nmu
         else:
             mu = nmu

diff --git a/statsmodels/robust/robust_linear_model.py b/statsmodels/robust/robust_linear_model.py
@@ -17,6 +17,7 @@
 import scipy.stats as stats
 
 from statsmodels.tools.decorators import cache_readonly
+from statsmodels.tools.sm_exceptions import ConvergenceWarning
 import statsmodels.regression.linear_model as lm
 import statsmodels.regression._tools as reg_tools
 import statsmodels.robust.norms as norms
@@ -28,7 +29,7 @@
 
 
 def _check_convergence(criterion, iteration, tol, maxiter):
-    cond = np.fabs(criterion[iteration] - criterion[iteration - 1])
+    cond = np.abs(criterion[iteration] - criterion[iteration - 1])
     return not (np.any(cond > tol) and iteration < maxiter)
 
 
@@ -140,18 +141,14 @@ def predict(self, params, exog=None):
 
         Parameters
         ----------
-        params : array_like, optional after fit has been called
+        params : array_like
             Parameters of a linear model
         exog : array_like, optional.
             Design / exogenous data. Model exog is used if None.
 
         Returns
         -------
         An array of fitted values
-
-        Notes
-        -----
-        If the model as not yet been fit, params is not optional.
         """
         # copied from linear_model
         if exog is None:
@@ -240,8 +237,8 @@ def fit(self, maxiter=50, tol=1e-8, scale_est='mad', init=None, cov='H1',
 
         Returns
         -------
-        results : object
-            statsmodels.rlm.RLMresults
+        results : statsmodels.rlm.RLMresults
+            Results instance
         """
         if cov.upper() not in ["H1", "H2", "H3"]:
             raise ValueError("Covariance matrix %s not understood" % cov)
@@ -286,6 +283,12 @@ def fit(self, maxiter=50, tol=1e-8, scale_est='mad', init=None, cov='H1',
         iteration = 1
         converged = 0
         while not converged:
+            if self.scale == 0.0:
+                import warnings
+                warnings.warn('Estimated scale is 0.0 indicating that the most'
+                              ' last iteration produced a perfect fit of the '
+                              'weighted data.', ConvergenceWarning)
+                break
             self.weights = self.M.weights(wls_results.resid / self.scale)
             wls_results = reg_tools._MinimalWLS(self.endog, self.exog,
                                                 weights=self.weights,
@@ -347,7 +350,7 @@ class RLMResults(base.LikelihoodModelResults):
         errors are taken from the robust covariance matrix specified in the
         argument to fit.
     chisq : array
-        An array of the chi-squared values of the paramter estimates.
+        An array of the chi-squared values of the parameter estimates.
     df_model
         See RLM.df_model
     df_resid
@@ -419,6 +422,10 @@ def resid(self):
 
     @cache_readonly
     def sresid(self):
+        if self.scale == 0.0:
+            sresid = self.resid.copy()
+            sresid[:] = 0.0
+            return sresid
         return self.resid / self.scale
 
     @cache_readonly

diff --git a/statsmodels/robust/scale.py b/statsmodels/robust/scale.py
@@ -42,7 +42,7 @@ def mad(a, c=Gaussian.ppf(3/4.), axis=0, center=np.median):
     a = np.asarray(a)
     if callable(center):
         center = np.apply_over_axes(center, a, axis)
-    return np.median((np.fabs(a-center))/c, axis=axis)
+    return np.median((np.abs(a-center))/c, axis=axis)
 
 
 class Huber(object):
@@ -157,16 +157,18 @@ def _estimate_both(self, a, scale, mu, axis, est_mu, n):
                 nmu = mu.squeeze()
             nmu = tools.unsqueeze(nmu, axis, a.shape)
 
-            subset = np.less_equal(np.fabs((a - mu)/scale), self.c)
+            subset = np.less_equal(np.abs((a - mu)/scale), self.c)
             card = subset.sum(axis)
 
-            nscale = np.sqrt(np.sum(subset * (a - nmu)**2, axis) \
-                    / (n * self.gamma - (a.shape[axis] - card) * self.c**2))
+            nscale = np.sqrt(np.sum(subset * (a - nmu) ** 2, axis)
+                             / (n * self.gamma -
+                                (a.shape[axis] - card) * self.c ** 2))
             nscale = tools.unsqueeze(nscale, axis, a.shape)
 
-            test1 = np.alltrue(np.less_equal(np.fabs(scale - nscale),
-                        nscale * self.tol))
-            test2 = np.alltrue(np.less_equal(np.fabs(mu - nmu), nscale*self.tol))
+            test1 = np.alltrue(np.less_equal(np.abs(scale - nscale),
+                                             nscale * self.tol))
+            test2 = np.alltrue(
+                np.less_equal(np.abs(mu - nmu), nscale * self.tol))
             if not (test1 and test2):
                 mu = nmu
                 scale = nscale
@@ -219,21 +221,28 @@ def __init__(self, d=2.5, tol=1e-08, maxiter=30):
         self.maxiter = maxiter
 
     def __call__(self, df_resid, nobs, resid):
-        h = (df_resid)/nobs*(self.d**2 + (1-self.d**2)*\
-                    Gaussian.cdf(self.d)-.5 - self.d/(np.sqrt(2*np.pi))*\
-                    np.exp(-.5*self.d**2))
+        h = (df_resid) / nobs * (self.d ** 2 + (1 - self.d ** 2) *
+                                 Gaussian.cdf(self.d) - .5 - self.d / (
+                                     np.sqrt(2 * np.pi)) *
+                                 np.exp(-.5 * self.d ** 2))
         s = mad(resid)
-        subset = lambda x: np.less(np.fabs(resid/x),self.d)
-        chi = lambda s: subset(s)*(resid/s)**2/2+(1-subset(s))*(self.d**2/2)
-        scalehist = [np.inf,s]
+
+        def subset(x):
+            return np.less(np.abs(resid / x), self.d)
+
+        def chi(s):
+            return subset(s) * (resid / s) ** 2 / 2 + (1 - subset(s)) * \
+                   (self.d ** 2 / 2)
+
+        scalehist = [np.inf, s]
         niter = 1
-        while (np.abs(scalehist[niter-1] - scalehist[niter])>self.tol \
-                and niter < self.maxiter):
-            nscale = np.sqrt(1/(nobs*h)*np.sum(chi(scalehist[-1]))*\
-                    scalehist[-1]**2)
+        while (np.abs(scalehist[niter - 1] - scalehist[niter]) > self.tol
+               and niter < self.maxiter):
+            nscale = np.sqrt(1 / (nobs * h) * np.sum(chi(scalehist[-1])) *
+                             scalehist[-1] ** 2)
             scalehist.append(nscale)
             niter += 1
-            #if niter == self.maxiter:
+            # if niter == self.maxiter:
             #    raise ValueError("Huber's scale failed to converge")
         return scalehist[-1]