Skip to content

Commit

Permalink
ENH: Add cdf and ppf for each distribution
Browse files Browse the repository at this point in the history
Add cdf and ppf for each distribution to simplify some tasks

closes #253
  • Loading branch information
bashtage committed Nov 12, 2018
1 parent 7131de2 commit 8b3a141
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 26 deletions.
8 changes: 4 additions & 4 deletions arch/bootstrap/_samplers.pyx
@@ -1,10 +1,10 @@
#!python
#cython: language_level=3, boundscheck=False, wraparound=False, cdivision=True

import numpy as np
cimport numpy as np
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def stationary_bootstrap_sample(np.int64_t[:] indices,
double[:] u,
double p):
Expand Down Expand Up @@ -34,4 +34,4 @@ def stationary_bootstrap_sample(np.int64_t[:] indices,
if indices[i] == num_items:
indices[i] = 0

return indices
return indices
33 changes: 29 additions & 4 deletions arch/tests/univariate/test_distribution.py
Expand Up @@ -76,10 +76,10 @@ def test_skewstudent(self):

resids = self.resids / self.sigma2 ** .5
pow = (-(eta + 1) / 2)
pdf = const_b * const_c / self.sigma2 ** .5 * \
(1 + 1 / (eta - 2) *
((const_b * resids + const_a) /
(1 + np.sign(resids + const_a / const_b) * lam)) ** 2) ** pow
pdf = (const_b * const_c / self.sigma2 ** .5 *
(1 + 1 / (eta - 2) *
((const_b * resids + const_a) /
(1 + np.sign(resids + const_a / const_b) * lam)) ** 2) ** pow)

ll2 = np.log(pdf).sum()
assert_almost_equal(ll1, ll2)
Expand Down Expand Up @@ -144,3 +144,28 @@ def test_ged(self):
def test_bad_input():
with pytest.raises(TypeError):
Normal(random_state='random_state')


DISTRIBUTIONS = [(Normal, ()), (StudentsT, (8.0,)), (StudentsT, (3.0,)),
(GeneralizedError, (1.5,)), (GeneralizedError, (2.1,)),
(SkewStudent, (8.0, -0.5))]


@pytest.mark.parametrize('distribution', DISTRIBUTIONS)
def test_roundtrip_cdf_ppf(distribution):
pits = np.arange(1, 100.0) / 100.0
dist, param = distribution
dist = dist()
x = dist.ppf(pits, param)
p = dist.cdf(x, param)
assert_almost_equal(pits, p)


def test_invalid_params():
pits = np.arange(1, 100.0) / 100.0
dist = Normal()
with pytest.raises(ValueError):
dist.ppf(pits, [1.0])
dist = StudentsT()
with pytest.raises(ValueError):
dist.ppf(pits, [1.0])
121 changes: 103 additions & 18 deletions arch/univariate/distribution.py
Expand Up @@ -36,6 +36,19 @@ def __init__(self, name, random_state=None):
if not isinstance(self._random_state, RandomState):
raise TypeError('random_state must by a NumPy RandomState instance')

def _check_constraints(self, params):
bounds = self.bounds(None)
if params is not None or len(bounds) > 0:
if len(params) != len(bounds):
raise ValueError('parameters must have {0} elements'.format(len(bounds)))
if params is None or len(bounds) == 0:
return
params = asarray(params)
for p, n, b in zip(params, self.name, bounds):
if not (b[0] <= p <= b[1]):
raise ValueError('{0} does not satisfy the bounds requirement '
'of ({1}, {2})'.format(n, *b))

@property
def random_state(self):
"""The NumPy RandomState attached to the distribution"""
Expand Down Expand Up @@ -165,6 +178,44 @@ def parameter_names(self):
"""
pass

@abstractmethod
def ppf(self, pits, parameters=None):
"""
Inverse cumulative density function (ICDF)
Parameters
----------
pits : ndarray
Probability-integral-transformed values in the interval (0, 1).
parameters : ndarray, optional
Distribution parameters.
Returns
-------
i : ndarray
Inverse CDF values
"""
pass

@abstractmethod
def cdf(self, resids, parameters=None):
"""
Cumulative distribution function
Parameters
----------
resids : ndarray
Values at which to evaluate the cdf
parameters : ndarray, optional
Distribution parameters.
Returns
-------
f : ndarray
CDF values
"""
pass

def __str__(self):
return self._description()

Expand Down Expand Up @@ -240,6 +291,14 @@ def simulate(self, parameters):
def parameter_names(self):
return []

def cdf(self, resids, parameters=None):
self._check_constraints(parameters)
return stats.norm.cdf(resids)

def ppf(self, pits, parameters=None):
self._check_constraints(parameters)
return stats.norm.ppf(pits)


class StudentsT(Distribution):
"""
Expand Down Expand Up @@ -340,6 +399,18 @@ def simulate(self, parameters):
def parameter_names(self):
return ['nu']

def cdf(self, resids, parameters=None):
self._check_constraints(parameters)
nu = parameters[0]
var = nu / (nu - 2)
return stats.t(nu, scale=1.0 / sqrt(var)).cdf(resids)

def ppf(self, pits, parameters=None):
self._check_constraints(parameters)
nu = parameters[0]
var = nu / (nu - 2)
return stats.t(nu, scale=1.0 / sqrt(var)).ppf(pits)


class SkewStudent(Distribution):
r"""
Expand Down Expand Up @@ -531,36 +602,38 @@ def __const_c(parameters):
# return gamma((eta+1)/2) / ((pi*(eta-2))**.5 * gamma(eta/2))
return gammaln((eta + 1) / 2) - gammaln(eta / 2) - log(pi * (eta - 2)) / 2

def ppf(self, arg, parameters):
"""Inverse cumulative density function (ICDF).
def cdf(self, resids, parameters=None):
self._check_constraints(parameters)
eta, lam = parameters

Parameters
----------
arg : ndarray
Grid of point to evaluate ICDF at. Must belong to (0, 1)
parameters : ndarray
Shape parameters of the skew-t distribution
a = self.__const_a(parameters)
b = self.__const_b(parameters)

Returns
-------
array
ICDF values. Same shape as the input.
var = eta / (eta - 2)
y1 = (b * resids + a) / (1 - lam) * sqrt(var)
y2 = (b * resids + a) / (1 + lam) * sqrt(var)
tcdf = stats.t(eta).cdf
p = (1 - lam) * tcdf(y1) * (resids < (-a / b))
p += (resids >= (-a / b)) * ((1 - lam) / 2 + (1 + lam) * (tcdf(y2) - 0.5))

"""
return p

def ppf(self, resids, parameters=None):
self._check_constraints(parameters)
eta, lam = parameters

a = self.__const_a(parameters)
b = self.__const_b(parameters)

cond = arg < (1 - lam) / 2
cond = resids < (1 - lam) / 2

icdf1 = stats.t.ppf(arg[cond] / (1 - lam), eta)
icdf2 = stats.t.ppf(.5 + (arg[~cond] - (1 - lam) / 2) / (1 + lam), eta)
icdf = -999.99 * ones_like(arg)
icdf1 = stats.t.ppf(resids[cond] / (1 - lam), eta)
icdf2 = stats.t.ppf(.5 + (resids[~cond] - (1 - lam) / 2) / (1 + lam), eta)
icdf = -999.99 * ones_like(resids)
icdf[cond] = icdf1
icdf[~cond] = icdf2
icdf = (icdf *
(1 + sign(arg - (1 - lam) / 2) * lam) * (1 - 2 / eta) ** .5 -
(1 + sign(resids - (1 - lam) / 2) * lam) * (1 - 2 / eta) ** .5 -
a)
icdf = icdf / b

Expand Down Expand Up @@ -670,3 +743,15 @@ def simulate(self, parameters):

def parameter_names(self):
return ['nu']

def ppf(self, pits, parameters=None):
self._check_constraints(parameters)
nu = parameters[0]
var = stats.gennorm(nu).var()
return stats.gennorm(nu, scale=1.0 / sqrt(var)).ppf(pits)

def cdf(self, resids, parameters=None):
self._check_constraints(parameters)
nu = parameters[0]
var = stats.gennorm(nu).var()
return stats.gennorm(nu, scale=1.0 / sqrt(var)).cdf(resids)

0 comments on commit 8b3a141

Please sign in to comment.