ENH: Add cdf and ppf for each distribution

Add cdf and ppf for each distribution to simplify some tasks closes #253
bashtage · Nov 12, 2018 · 8b3a141 · 8b3a141
1 parent 7131de2
commit 8b3a141
Show file tree

Hide file tree

Showing 3 changed files with 136 additions and 26 deletions.
diff --git a/arch/bootstrap/_samplers.pyx b/arch/bootstrap/_samplers.pyx
@@ -1,10 +1,10 @@
+#!python
+#cython: language_level=3, boundscheck=False, wraparound=False, cdivision=True
+
 import numpy as np
 cimport numpy as np
 cimport cython
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.cdivision(True)
 def stationary_bootstrap_sample(np.int64_t[:] indices,
                                 double[:] u,
                                 double p):
@@ -34,4 +34,4 @@ def stationary_bootstrap_sample(np.int64_t[:] indices,
             if indices[i] == num_items:
                 indices[i] = 0
 
-    return indices
+    return indices
diff --git a/arch/tests/univariate/test_distribution.py b/arch/tests/univariate/test_distribution.py
@@ -76,10 +76,10 @@ def test_skewstudent(self):
 
         resids = self.resids / self.sigma2 ** .5
         pow = (-(eta + 1) / 2)
-        pdf = const_b * const_c / self.sigma2 ** .5 * \
-            (1 + 1 / (eta - 2) *
-             ((const_b * resids + const_a) /
-              (1 + np.sign(resids + const_a / const_b) * lam)) ** 2) ** pow
+        pdf = (const_b * const_c / self.sigma2 ** .5 *
+               (1 + 1 / (eta - 2) *
+                ((const_b * resids + const_a) /
+                 (1 + np.sign(resids + const_a / const_b) * lam)) ** 2) ** pow)
 
         ll2 = np.log(pdf).sum()
         assert_almost_equal(ll1, ll2)
@@ -144,3 +144,28 @@ def test_ged(self):
 def test_bad_input():
     with pytest.raises(TypeError):
         Normal(random_state='random_state')
+
+
+DISTRIBUTIONS = [(Normal, ()), (StudentsT, (8.0,)), (StudentsT, (3.0,)),
+                 (GeneralizedError, (1.5,)), (GeneralizedError, (2.1,)),
+                 (SkewStudent, (8.0, -0.5))]
+
+
+@pytest.mark.parametrize('distribution', DISTRIBUTIONS)
+def test_roundtrip_cdf_ppf(distribution):
+    pits = np.arange(1, 100.0) / 100.0
+    dist, param = distribution
+    dist = dist()
+    x = dist.ppf(pits, param)
+    p = dist.cdf(x, param)
+    assert_almost_equal(pits, p)
+
+
+def test_invalid_params():
+    pits = np.arange(1, 100.0) / 100.0
+    dist = Normal()
+    with pytest.raises(ValueError):
+        dist.ppf(pits, [1.0])
+    dist = StudentsT()
+    with pytest.raises(ValueError):
+        dist.ppf(pits, [1.0])
diff --git a/arch/univariate/distribution.py b/arch/univariate/distribution.py
@@ -36,6 +36,19 @@ def __init__(self, name, random_state=None):
         if not isinstance(self._random_state, RandomState):
             raise TypeError('random_state must by a NumPy RandomState instance')
 
+    def _check_constraints(self, params):
+        bounds = self.bounds(None)
+        if params is not None or len(bounds) > 0:
+            if len(params) != len(bounds):
+                raise ValueError('parameters must have {0} elements'.format(len(bounds)))
+        if params is None or len(bounds) == 0:
+            return
+        params = asarray(params)
+        for p, n, b in zip(params, self.name, bounds):
+            if not (b[0] <= p <= b[1]):
+                raise ValueError('{0} does not satisfy the bounds requirement '
+                                 'of ({1}, {2})'.format(n, *b))
+
     @property
     def random_state(self):
         """The NumPy RandomState attached to the distribution"""
@@ -165,6 +178,44 @@ def parameter_names(self):
         """
         pass
 
+    @abstractmethod
+    def ppf(self, pits, parameters=None):
+        """
+        Inverse cumulative density function (ICDF)
+
+        Parameters
+        ----------
+        pits : ndarray
+            Probability-integral-transformed values in the interval (0, 1).
+        parameters : ndarray, optional
+            Distribution parameters.
+
+        Returns
+        -------
+        i : ndarray
+            Inverse CDF values
+        """
+        pass
+
+    @abstractmethod
+    def cdf(self, resids, parameters=None):
+        """
+        Cumulative distribution function
+
+        Parameters
+        ----------
+        resids : ndarray
+            Values at which to evaluate the cdf
+        parameters : ndarray, optional
+            Distribution parameters.
+
+        Returns
+        -------
+        f : ndarray
+            CDF values
+        """
+        pass
+
     def __str__(self):
         return self._description()
 
@@ -240,6 +291,14 @@ def simulate(self, parameters):
     def parameter_names(self):
         return []
 
+    def cdf(self, resids, parameters=None):
+        self._check_constraints(parameters)
+        return stats.norm.cdf(resids)
+
+    def ppf(self, pits, parameters=None):
+        self._check_constraints(parameters)
+        return stats.norm.ppf(pits)
+
 
 class StudentsT(Distribution):
     """
@@ -340,6 +399,18 @@ def simulate(self, parameters):
     def parameter_names(self):
         return ['nu']
 
+    def cdf(self, resids, parameters=None):
+        self._check_constraints(parameters)
+        nu = parameters[0]
+        var = nu / (nu - 2)
+        return stats.t(nu, scale=1.0 / sqrt(var)).cdf(resids)
+
+    def ppf(self, pits, parameters=None):
+        self._check_constraints(parameters)
+        nu = parameters[0]
+        var = nu / (nu - 2)
+        return stats.t(nu, scale=1.0 / sqrt(var)).ppf(pits)
+
 
 class SkewStudent(Distribution):
     r"""
@@ -531,36 +602,38 @@ def __const_c(parameters):
         # return gamma((eta+1)/2) / ((pi*(eta-2))**.5 * gamma(eta/2))
         return gammaln((eta + 1) / 2) - gammaln(eta / 2) - log(pi * (eta - 2)) / 2
 
-    def ppf(self, arg, parameters):
-        """Inverse cumulative density function (ICDF).
+    def cdf(self, resids, parameters=None):
+        self._check_constraints(parameters)
+        eta, lam = parameters
 
-        Parameters
-        ----------
-        arg : ndarray
-            Grid of point to evaluate ICDF at. Must belong to (0, 1)
-        parameters : ndarray
-            Shape parameters of the skew-t distribution
+        a = self.__const_a(parameters)
+        b = self.__const_b(parameters)
 
-        Returns
-        -------
-        array
-            ICDF values. Same shape as the input.
+        var = eta / (eta - 2)
+        y1 = (b * resids + a) / (1 - lam) * sqrt(var)
+        y2 = (b * resids + a) / (1 + lam) * sqrt(var)
+        tcdf = stats.t(eta).cdf
+        p = (1 - lam) * tcdf(y1) * (resids < (-a / b))
+        p += (resids >= (-a / b)) * ((1 - lam) / 2 + (1 + lam) * (tcdf(y2) - 0.5))
 
-        """
+        return p
+
+    def ppf(self, resids, parameters=None):
+        self._check_constraints(parameters)
         eta, lam = parameters
 
         a = self.__const_a(parameters)
         b = self.__const_b(parameters)
 
-        cond = arg < (1 - lam) / 2
+        cond = resids < (1 - lam) / 2
 
-        icdf1 = stats.t.ppf(arg[cond] / (1 - lam), eta)
-        icdf2 = stats.t.ppf(.5 + (arg[~cond] - (1 - lam) / 2) / (1 + lam), eta)
-        icdf = -999.99 * ones_like(arg)
+        icdf1 = stats.t.ppf(resids[cond] / (1 - lam), eta)
+        icdf2 = stats.t.ppf(.5 + (resids[~cond] - (1 - lam) / 2) / (1 + lam), eta)
+        icdf = -999.99 * ones_like(resids)
         icdf[cond] = icdf1
         icdf[~cond] = icdf2
         icdf = (icdf *
-                (1 + sign(arg - (1 - lam) / 2) * lam) * (1 - 2 / eta) ** .5 -
+                (1 + sign(resids - (1 - lam) / 2) * lam) * (1 - 2 / eta) ** .5 -
                 a)
         icdf = icdf / b
 
@@ -670,3 +743,15 @@ def simulate(self, parameters):
 
     def parameter_names(self):
         return ['nu']
+
+    def ppf(self, pits, parameters=None):
+        self._check_constraints(parameters)
+        nu = parameters[0]
+        var = stats.gennorm(nu).var()
+        return stats.gennorm(nu, scale=1.0 / sqrt(var)).ppf(pits)
+
+    def cdf(self, resids, parameters=None):
+        self._check_constraints(parameters)
+        nu = parameters[0]
+        var = stats.gennorm(nu).var()
+        return stats.gennorm(nu, scale=1.0 / sqrt(var)).cdf(resids)