From e82bb57aaf3ce1d5cb9530a64fde579d2ed0423f Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sun, 12 Oct 2014 17:36:04 -0400 Subject: [PATCH] ENH: Bootstrap framework Introduces a bootstrap framework for use by multiple comparison procedures --- README.md | 26 +- arch/bootstrap/__init__.py | 3 +- arch/bootstrap/_samplers.pyx | 37 + arch/bootstrap/_samplers_python.py | 31 + arch/bootstrap/base.py | 851 +++++++++++++++++- arch/bootstrap/tests/__init__.py | 0 arch/bootstrap/tests/test_bootstrap.py | 581 ++++++++++++ arch/compat/numba.py | 9 + arch/tests/test_deprecations.py | 3 +- arch/tests/test_utils.py | 12 - arch/univariate/mean.py | 28 +- arch/univariate/recursions_python.py | 10 +- arch/univariate/tests/test_mean.py | 10 +- arch/univariate/tests/test_volatility.py | 4 +- arch/univariate/volatility.py | 12 +- doc/source/1.0.rst | 6 - doc/source/bootstrap/background.rst | 17 + doc/source/bootstrap/bootstrap.rst | 19 + doc/source/bootstrap/confidence-intervals.rst | 247 +++++ doc/source/bootstrap/iid-bootstraps.rst | 9 + doc/source/bootstrap/low-level-interface.rst | 70 ++ .../parameter-covariance-estimation.rst | 2 + .../bootstrap/timeseries-bootstraps.rst | 29 + doc/source/changes.rst | 1 + doc/source/changes/1.0.txt | 11 + doc/source/conf.py | 27 + doc/source/index.rst | 12 +- doc/source/{ => univariate}/background.rst | 0 doc/source/{ => univariate}/distribution.rst | 0 doc/source/{ => univariate}/mean.rst | 0 doc/source/univariate/univariate.rst | 11 + doc/source/{ => univariate}/volatility.rst | 0 ...b => univariate_volatility_modeling.ipynb} | 0 setup.py | 54 +- 34 files changed, 1999 insertions(+), 133 deletions(-) create mode 100644 arch/bootstrap/_samplers.pyx create mode 100644 arch/bootstrap/_samplers_python.py create mode 100644 arch/bootstrap/tests/__init__.py create mode 100644 arch/bootstrap/tests/test_bootstrap.py create mode 100644 arch/compat/numba.py delete mode 100644 doc/source/1.0.rst create mode 100644 doc/source/bootstrap/background.rst create mode 100644 doc/source/bootstrap/bootstrap.rst create mode 100644 doc/source/bootstrap/confidence-intervals.rst create mode 100644 doc/source/bootstrap/iid-bootstraps.rst create mode 100644 doc/source/bootstrap/low-level-interface.rst create mode 100644 doc/source/bootstrap/parameter-covariance-estimation.rst create mode 100644 doc/source/bootstrap/timeseries-bootstraps.rst create mode 100644 doc/source/changes.rst create mode 100644 doc/source/changes/1.0.txt rename doc/source/{ => univariate}/background.rst (100%) rename doc/source/{ => univariate}/distribution.rst (100%) rename doc/source/{ => univariate}/mean.rst (100%) create mode 100644 doc/source/univariate/univariate.rst rename doc/source/{ => univariate}/volatility.rst (100%) rename examples/{examples.ipynb => univariate_volatility_modeling.ipynb} (100%) diff --git a/README.md b/README.md index bc8fac9eef..59848679ae 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Documentation Status](https://readthedocs.org/projects/arch/badge/?version=latest)](https://readthedocs.org/projects/arch/?badge=latest) +[![Documentation Status](https://readthedocs.org/projects/arch/badge/?version=latest)](http://arch.readthedocs.org/en/latest/) [![CI Status](https://travis-ci.org/bashtage/arch.svg?branch=master)](https://travis-ci.org/bashtage/arch) [![Coverage Status](https://coveralls.io/repos/bashtage/arch/badge.png?branch=master)](https://coveralls.io/r/bashtage/arch?branch=master) @@ -11,20 +11,20 @@ This is a work-in-progress for ARCH and related models, written in Python ## What is this repository for? * Mean models - * Constant mean - * Heterogeneous Autoregression (HAR) - * Autoregression (AR) - * Zero mean - * Models with and without exogensou regressors + * Constant mean + * Heterogeneous Autoregression (HAR) + * Autoregression (AR) + * Zero mean + * Models with and without exogenous regressors * Volatility models - * ARCH - * GARCH - * TARCH - * EGARCH - * EWMA/RiskMetrics + * ARCH + * GARCH + * TARCH + * EGARCH + * EWMA/RiskMetrics * Distributions - * Normal - * Student's T + * Normal + * Student's T ## Examples diff --git a/arch/bootstrap/__init__.py b/arch/bootstrap/__init__.py index 0afb285984..9fca291a50 100644 --- a/arch/bootstrap/__init__.py +++ b/arch/bootstrap/__init__.py @@ -1 +1,2 @@ -__author__ = 'kevin.sheppard' +from .base import IIDBootstrap, CircularBlockBootstrap, MovingBlockBootstrap, \ + StationaryBootstrap diff --git a/arch/bootstrap/_samplers.pyx b/arch/bootstrap/_samplers.pyx new file mode 100644 index 0000000000..cb7caa598e --- /dev/null +++ b/arch/bootstrap/_samplers.pyx @@ -0,0 +1,37 @@ +import numpy as np +cimport numpy as np +cimport cython + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +def stationary_bootstrap_sample(int[:] indices, + double[:] u, + double p): + """ + Parameters + ------- + indices: 1-s arrah + Array containing draws from randint with the same size as the data in + the range of [0,nobs) + u : 1-d array + Array of standard uniforms + p : float + Probability that a new block is started in the stationary bootstrap. + The multiplicative reciprocal of the window length + + Returns + ------- + indices: 1-d array + Indices for an iteration of the stationary bootstrap + """ + cdef Py_ssize_t num_items, i + num_items = indices.shape[0] + + for i in range(1, num_items): + if u[i] > p: + indices[i] = indices[i - 1] + 1 + if indices[i] == num_items: + indices[i] = 0 + + return indices \ No newline at end of file diff --git a/arch/bootstrap/_samplers_python.py b/arch/bootstrap/_samplers_python.py new file mode 100644 index 0000000000..5051080cc5 --- /dev/null +++ b/arch/bootstrap/_samplers_python.py @@ -0,0 +1,31 @@ +from __future__ import absolute_import, division +from ..compat.numba import jit + + +@jit +def stationary_bootstrap_sample(indices, u, p): + """ + Parameters + ------- + indices: 1-s arrah + Array containing draws from randint with the same size as the data in + the range of [0,nobs) + u : 1-d array + Array of standard uniforms + p : float + Probability that a new block is started in the stationary bootstrap. + The multiplicative reciprocal of the window length + + Returns + ------- + indices: 1-d array + Indices for an iteration of the stationary bootstrap + """ + num_items = indices.shape[0] + for i in range(1, num_items): + if u[i] > p: + indices[i] = indices[i - 1] + 1 + if indices[i] == num_items: + indices[i] = 0 + + return indices diff --git a/arch/bootstrap/base.py b/arch/bootstrap/base.py index dcd1a75fa1..bc8417fc9c 100644 --- a/arch/bootstrap/base.py +++ b/arch/bootstrap/base.py @@ -1,25 +1,139 @@ +from __future__ import absolute_import, division +import copy + import numpy as np from numpy.random import RandomState +import pandas as pd +import scipy.stats as stats -from arch.compat.python import iteritems, itervalues, add_metaclass +from arch.compat.python import iteritems, itervalues, add_metaclass, range from arch.utils import DocStringInheritor +try: + from ._samplers import stationary_bootstrap_sample +except ImportError: # pragma: no cover + from ._samplers_python import stationary_bootstrap_sample + + +def _loo_jackknife(func, nobs, args, kwargs): + """ + Leave one out jackknife estimation + + Parameters + ---------- + func : callable + Function that computes parameters. Called using func(*args, **kwargs) + nobs : int + Number of observation in the data + args : list + List of positional inputs + kwargs: dict + List of keyword inputs + + Returns + ------- + results : array + Array containing the jackknife results where row i corresponds to + leaving observation i out of the sample + """ + results = [] + for i in range(nobs): + items = np.r_[0:i, i + 1:nobs] + args_copy = [arg[items] for arg in args] + kwargs_copy = {k: v[items] for k, v in iteritems(kwargs)} + results.append(func(*args_copy, **kwargs_copy)) + return np.array(results) + + +def _add_extra_kwargs(kwargs, extra_kwargs=None): + """ + Safely add additional keyword arguments to an existing dictionary + + Parameters + ---------- + kwargs : dict + Keyword argument dictionary + extra_kwargs : dict, optional + Keyword argument dictionary to add + + Returns + ------- + augmented_kwargs : dict + Keyword dictionary with added keyword arguments + + Notes + ----- + There is no checking for duplicate keys + """ + if extra_kwargs is None: + return kwargs + else: + return dict(list(kwargs.items()) + list(extra_kwargs.items())) + + @add_metaclass(DocStringInheritor) class IIDBootstrap(object): + """ + Bootstrap using uniform resampling + + Parameters + ---------- + args + Positional arguments to bootstrap + kwargs + Keyword arguments to bootstrap + + Attributes + ---------- + index : array + The current index of the bootstrap + data : tuple + Two-element tuple with the pos_data in the first position and kw_data + in the second (pos_data, kw_data) + pos_data : tuple + Tuple containing the positional arguments (in the order entered) + kw_data : dict + Dictionary containing the keyword arguments + random_state : RandomState + RandomState instance used by bootstrap + + Notes + ----- + Supports numpy arrays and pandas Series and DataFrames. Data returned has + the same type as the input date. + + Data entered using keyword arguments is directly accessibly as an attribute. + + Examples + -------- + Data can be accessed in a number of ways. Positional data is retained in + the same order as it was entered when the bootstrap was initialized. + Keyword data is available both as an attribute or using a dictionary syntax + on kw_data. + + >>> from arch.bootstrap import IIDBootstrap + >>> from numpy.random import standard_normal + >>> y = standard_normal((500, 1)) + >>> x = standard_normal((500,2)) + >>> z = standard_normal(500) + >>> bs = IIDBootstrap(x, y=y, z=z) + >>> for data in bs.bootstrap(100): + ... bs_x = data[0][0] + ... bs_y = data[1]['y'] + ... bs_z = bs.z + """ def __init__(self, *args, **kwargs): - self.iterations = 0 - self._iter_count = 0 self.random_state = RandomState() - self.initial_state = self.random_state.get_state() - self.data = args + self._initial_state = self.random_state.get_state() self._args = args self._kwargs = kwargs if args: self._num_items = len(args[0]) elif kwargs: - self._num_items = len(kwargs[kwargs.keys()[0]]) + key = list(kwargs.keys())[0] + self._num_items = len(kwargs[key]) all_args = list(args) all_args.extend([v for v in itervalues(kwargs)]) @@ -28,77 +142,736 @@ def __init__(self, *args, **kwargs): if len(arg) != self._num_items: raise ValueError("All inputs must have the same number of " "elements in axis 0") + self._index = np.arange(self._num_items) + + self._parameters = [] self.pos_data = args - self.named_data = kwargs + self.kw_data = kwargs + self.data = (args, kwargs) + + self._base = None + self._results = None + self._studentized_results = None + self._last_func = None - self.indices = np.arange(self._num_items) for key, value in iteritems(kwargs): - self.__setattr__(key, value) + attr = getattr(self, key, None) + if attr is None: + self.__setattr__(key, value) + else: + raise ValueError(key + ' is a reserved name') + + @property + def index(self): + """ + Returns the current index of the bootstrap + """ + return self._index def get_state(self): + """ + Gets the state of the bootstrap's random number generator + + Returns + ------- + state : RandomState state vector + Array containing the state + """ return self.random_state.get_state() - def set_state(self): - return self.random_state.set_state() + def set_state(self, state): + """ + Sets the state of the bootstrap's random number generator + + Parameters + ---------- + state : RandomState state vector + Array containing the state + """ + + return self.random_state.set_state(state) def seed(self, value): + """ + Seeds the bootstrap's random number generator + + Parameters + ---------- + value : int + Integer to use as the seed + """ self.random_state.seed(value) return None - def reset(self): - self.random_state.set_state(self.initial_state) + def reset(self, seed=None): + """ + Resets the bootstrap to its initial condition + """ + self._index = np.arange(self._num_items) + self._resample() + self.random_state.set_state(self._initial_state) + if seed is not None: + self.seed(seed) + return None + + def bootstrap(self, reps): + """ + Iterator for use when bootstrapping + + Parameters + ---------- + reps : int + Number of bootstrap replications + + Example + ------- + The key steps are problem dependent and so this example shows the use + as an iterator that does not produce any output + + >>> from arch.bootstrap import IIDBootstrap + >>> import numpy as np + >>> bs = IIDBootstrap(np.arange(100), x=np.random.randn(100)) + >>> for posdata, kwdata in bs.bootstrap(1000): + ... # Do something with the positional data and/or keyword data + ... pass + + .. note:: + + Note this is a generic example and so the class used should be the + name of the required bootstrap + + Notes + ----- + The iterator returns a tuple containing the data entered in positional + arguments as a tuple and the data entered using keywords as a + dictionary + """ + for _ in range(reps): + indices = np.asarray(self.update_indices()) + self._index = indices + yield self._resample() + + def conf_int(self, func, reps=1000, method='basic', size=0.95, tail='two', + extra_kwargs=None, reuse=False, sampling='nonparametric', + std_err_func=None, studentize_reps=1000): + """ + Parameters + ---------- + func : callable + Function the computes parameter values. See Notes for requirements + reps : int, optional + Number of bootstrap replications + method : string, optional + One of 'basic', 'percentile', 'studentized', 'norm' (identical to + 'var', 'cov'), 'bc' (identical to 'debiased', 'bias-corrected'), or + 'bca' + size : float, optional + Coverage of confidence interval + tail : string, optional + One of 'two', 'upper' or 'lower'. + reuse : bool, optional + Flag indicating whether to reuse previously computed bootstrap + results. This allows alternative methods to be compared without + rerunning the bootstrap simulation. Reuse is ignored if reps is + not the same across multiple runs, func changes across calls, or + method is 'studentized'. + sampling : string, optional + Type of sampling to use: 'nonparametric', 'semi-parametric' (or + 'semi') or 'parametric'. The default is 'nonparametric'. See + notes about the changes to func required when using 'semi' or + 'parametric'. + extra_kwargs : dict, optional + Extra keyword arguments to use when calling func and std_err_func, + when appropriate + std_err_func : callable, optional + studentize_reps : int, optional + + Returns + ------- + intervals : 2-d array + Computed confidence interval. Row 0 contains the lower bounds, and + row 1 contains the upper bounds. Each column corresponds to a + parameter. When tail is 'lower', all upper bounds are inf. + Similarly, 'upper' sets all lower bounds to -inf. + + Examples + -------- + >>> import numpy as np + >>> def func(x): + ... return x.mean(0) + >>> y = np.random.randn(1000,2) + >>> from arch.bootstrap import IIDBootstrap + >>> bs = IIDBootstrap(y) + >>> ci = bs.conf_int(func, 1000) + + Notes + ----- + When there are no extra keyword arguments, the function is called + + .. code:: python + + func(params, *args, **kwargs) + + where args and kwargs are the bootstrap version of the data provided + when setting up the bootstrap. When extra keyword arguments are used, + these are appended to kwargs before calling func. + + The bootstraps are: + + * 'basic' - Basic confidence using the estimated parameter and + difference between the estimated parameter and the bootstrap parameters + * 'percentile' - Direct use of bootstrap percentiles + * 'norm' - Makes use of normal approximation and bootstrap covariance + estimator + * 'studentized' - Uses either a standard error function or a nested + bootstrap to estimate percentiles and the bootstrap covariance for scale + * 'bc' - Bias corrected using estimate bootstrap bias correction + * 'bca' - Bias corrected and accelerated, adding acceleration parameter + to 'bc' method + """ + studentized = 'studentized' + if not 0.0 < size < 1.0: + raise ValueError('size must be strictly between 0 and 1') + tail = tail.lower() + if tail not in ('two', 'lower', 'upper'): + raise ValueError('tail must be one of two-sided, lower or upper') + studentize_reps = studentize_reps if method == studentized else 0 + + _reuse = False + if reuse: + # check conditions for reuse + _reuse = (self._results is not None and len(self._results) == reps + and method != studentized and self._last_func is func) + + if not _reuse: + if reuse: + import warnings + + warn = 'The conditions to reuse the previous bootstrap has ' \ + 'not been satisfied. A new bootstrap will be constructed' + warnings.warn(warn, RuntimeWarning) + self._construct_bootstrap_estimates(func, reps, extra_kwargs, + std_err_func=std_err_func, + studentize_reps=studentize_reps, + sampling=sampling) + + base, results = self._base, self._results + studentized_results = self._studentized_results + + std_err = [] + if method in ('norm', 'var', 'cov', studentized): + errors = results - results.mean(axis=0) + std_err = np.sqrt(np.diag(errors.T.dot(errors) / reps)) + + if tail == 'two': + alpha = (1.0 - size) / 2 + else: + alpha = (1.0 - size) + + percentiles = [alpha, 1.0 - alpha] + norm_quantiles = stats.norm.ppf(percentiles) + + if method in ('norm', 'var', 'cov'): + lower = base + norm_quantiles[0] * std_err + upper = base + norm_quantiles[1] * std_err + + elif method in ('percentile', 'basic', studentized, + 'debiased', 'bc', 'bias-corrected', 'bca'): + values = results + if method == studentized: + # studentized uses studentized parameter estimates + values = studentized_results + + if method in ('debiased', 'bc', 'bias-corrected', 'bca'): + # bias corrected uses modified percentiles, but is + # otherwise identical to the percentile method + p = (results < base).mean(axis=0) + b = stats.norm.ppf(p) + b = b[:, None] + if method == 'bca': + # TODO : Check dimensions of a + nobs = self._num_items + jk_params = _loo_jackknife(func, nobs, self._args, + self._kwargs) + u = (nobs - 1) * (jk_params - base) + numer = np.sum(u ** 3, 0) + denom = 6 * (np.sum(u ** 2, 0) ** (3.0 / 2.0)) + small = denom < (np.abs(numer) * np.finfo(np.float64).eps) + if small.any(): + message = 'Jackknife variance estimate {jk_var} is ' \ + 'too small to use BCa' + raise RuntimeError(message.format(jk_var=denom)) + a = numer / denom + a = a[:, None] + else: + a = 0.0 + + percentiles = stats.norm.cdf(b + (b + norm_quantiles) / + (1.0 - a * (b + norm_quantiles))) + percentiles = list(100 * percentiles) + else: + percentiles = [100 * p for p in percentiles] # Rescale + + if method not in ('bc', 'debiased', 'bias-corrected', 'bca'): + ci = np.asarray(np.percentile(values, percentiles, axis=0)) + lower = ci[0, :] + upper = ci[1, :] + else: + k = values.shape[1] + lower = np.zeros(k) + upper = np.zeros(k) + for i in range(k): + lower[i], upper[i] = np.percentile(values[:, i], + list(percentiles[i])) + + # Basic and studentized use the lower empirical quantile to + # compute upper and vice versa. Bias corrected and percentile use + # upper to estimate the upper, and lower to estimate the lower + if method == 'basic': + lower_copy = lower + 0.0 + lower = 2.0 * base - upper + upper = 2.0 * base - lower_copy + elif method == studentized: + lower_copy = lower + 0.0 + lower = base - upper * std_err + upper = base - lower_copy * std_err + + else: + raise ValueError('Unknown method') + + if tail == 'lower': + upper = np.zeros_like(base) + upper.fill(np.inf) + elif tail == 'upper': + lower = np.zeros_like(base) + lower.fill(-1 * np.inf) + + return np.vstack((lower, upper)) + + def apply(self, func, reps, extra_kwargs=None): + """ + Applies a function to bootstrap replicated data + + Paramters + --------- + func : callable + Function the computes parameter values. See Notes for requirements + reps : int + Number of bootstrap replications + extra_kwargs : dict, optional + Extra keyword arguments to use when calling func. Must not conflict + with keyword arguments used to initialize bootstrap + Returns + ------- + results : array + reps by nparam array of computed function values where each row + corresponds to a bootstrap iteration - def bootstrap(self, num): - self.iterations = num + Notes + ----- + When there are no extra keyword arguments, the function is called - def __iter__(self): - for i in range(self.num): - indices = self.update_indices() - yield self._resample(indices) + .. code:: python + + func(params, *args, **kwargs) + + where args and kwargs are the bootstrap version of the data provided + when setting up the bootstrap. When extra keyword arguments are used, + these are appended to kwargs before calling func + + Examples + -------- + >>> import numpy as np + >>> x = np.random.randn(1000,1) + >>> from arch.bootstrap import IIDBootstrap + >>> bs = IIDBootstrap(x) + >>> def func(y): + return y.mean() + >>> results = bs.apply(func, 100) + """ + kwargs = _add_extra_kwargs(self._kwargs, extra_kwargs) + base = func(*self._args, **kwargs) + + results = np.zeros(reps, base.shape[0]) + count = 0 + for pos_data, kw_data in self.bootstrap(reps): + kwargs = _add_extra_kwargs(kw_data, extra_kwargs) + results[count] = func(*pos_data, **kwargs) + count += 1 + return results + + def _construct_bootstrap_estimates(self, func, reps, extra_kwargs=None, + std_err_func=None, studentize_reps=0, + sampling='nonparametric'): + # Private, more complicated version of apply + self._last_func = func + semi = parametric = False + if sampling == 'parametric': + parametric = True + elif sampling == 'semiparametric': + semi = True + + if extra_kwargs is not None: + if any(k in self._kwargs for k in extra_kwargs): + raise ValueError('extra_kwargs contains keys used for variable' + ' names in the bootstrap') + kwargs = _add_extra_kwargs(self._kwargs, extra_kwargs) + base = func(*self._args, **kwargs) + + num_params = 1 if np.isscalar(base) else base.shape[0] + results = np.zeros((reps, num_params)) + studentized_results = np.zeros((reps, num_params)) + + count = 0 + for pos_data, kw_data in self.bootstrap(reps): + kwargs = _add_extra_kwargs(kw_data, extra_kwargs) + if parametric: + kwargs['state'] = self.random_state + kwargs['params'] = base + elif semi: + kwargs['params'] = base + results[count] = func(*pos_data, **kwargs) + if std_err_func is not None: + std_err = std_err_func(results[count], *pos_data, **kwargs) + studentized_results[count] = (results[count] - base) / std_err + elif studentize_reps > 0: + # Need new bootstrap of same type + pos_data_aug = copy.deepcopy(self._parameters) + pos_data_aug.extend(pos_data) + nested_bs = self.__class__(*pos_data_aug, **kw_data) + # Set the seed to ensure reproducability + seed = self.random_state.randint(2 ** 31 - 1) + nested_bs.seed(seed) + cov = nested_bs.cov(func, studentize_reps, + extra_kwargs=extra_kwargs) + std_err = np.sqrt(np.diag(cov)) + studentized_results[count] = (results[count] - base) / std_err + count += 1 + + self._base = np.asarray(base) + self._results = np.asarray(results) + self._studentized_results = np.asarray(studentized_results) + + def cov(self, func, reps=1000, recenter=True, extra_kwargs=None): + """ + Parameters + ---------- + func : callable + Callable function that returns the statistic of interest as a + 1-d array + reps : int, optional + Number of bootstrap replications + recenter : bool, optional + Whether to center the bootstrap variance estimator on the average + of the bootstrap samples (True) or to center on the original sample + estimate (False). Default is True. + extra_kwargs: dict, optional + Dictionary of extra keyword arguments to pass to func + + Returns + ------- + cov: array + Bootstrap covariance estimator + + Notes + ----- + func must have the signature + + .. code:: python + + func(params, *args, **kwargs) + + where params are a 1-dimensional array, and `*args` and `**kwargs` are + data used in the the bootstrap. The first argument, params, will be + none when called using the original data, and will contain the estimate + computed using the original data in bootstrap replications. This + parameter is passed to allow parametric bootstrap simulation. + + Example + ------- + Bootstrap covariance of the mean + + >>> from arch.bootstrap import IIDBootstrap + >>> import numpy as np + >>> def func(x): + ... return x.mean(axis=0) + >>> y = np.random.randn(1000,3) + >>> bs = IIDBootstrap(y) + >>> cov = bs.cov(func, 1000) + + Bootstrap covariance using a function that takes additional input + + >>> def func(x, stat='mean'): + ... if stat=='mean': + ... return x.mean(axis=0) + ... elif stat=='var': + ... return x.var(axis=0) + >>> cov = bs.cov(func, 1000, extra_kwargs={'stat':'var'}) + + .. note:: + + Note this is a generic example and so the class used should be the + name of the required bootstrap + + """ + self._construct_bootstrap_estimates(func, reps, extra_kwargs) + base, results = self._base, self._results + + if recenter: + errors = results - np.mean(results, 0) + else: + errors = results - base + + return errors.T.dot(errors) / reps def update_indices(self): - raise NotImplementedError("Subclasses must implement") + """ + Update indices for the next iteration of the bootstrap. This must + be overridden when creating new bootstraps. + """ + return self.random_state.randint(self._num_items, + size=self._num_items) + + def _resample(self): + """ + Resample all data using the values in _index + """ + indices = self._index + pos_data = [] + for values in self._args: + if isinstance(values, (pd.Series, pd.DataFrame)): + pos_data.append(values.iloc[indices]) + else: + pos_data.append(values[indices]) + named_data = {} + for key, values in iteritems(self._kwargs): + if isinstance(values, (pd.Series, pd.DataFrame)): + named_data[key] = values.iloc[indices] + else: + named_data[key] = values[indices] + setattr(self, key, named_data[key]) - def _resample(self, indices): - pos_data = [arg[indices] for arg in self._args] - named_data = {k: v[indices] for k,v in iteritems(self._kwargs)} self.pos_data = pos_data - self.named_data = named_data + self.kw_data = named_data self.data = (pos_data, named_data) return self.data + class CircularBlockBootstrap(IIDBootstrap): + """ + Bootstrap based on blocks of the same length with end-to-start wrap around + + Parameters + ---------- + block_size : int + Size of block to use + args + Positional arguments to bootstrap + kwargs + Keyword arguments to bootstrap + + Attributes + ---------- + index : array + The current index of the bootstrap + data : tuple + Two-element tuple with the pos_data in the first position and kw_data + in the second (pos_data, kw_data) + pos_data : tuple + Tuple containing the positional arguments (in the order entered) + kw_data : dict + Dictionary containing the keyword arguments + random_state : RandomState + RandomState instance used by bootstrap + + Notes + ----- + Supports numpy arrays and pandas Series and DataFrames. Data returned has + the same type as the input date. + + Data entered using keyword arguments is directly accessibly as an attribute. + + Examples + -------- + Data can be accessed in a number of ways. Positional data is retained in + the same order as it was entered when the bootstrap was initialized. + Keyword data is available both as an attribute or using a dictionary syntax + on kw_data. + + >>> from arch.bootstrap import CircularBlockBootstrap + >>> from numpy.random import standard_normal + >>> y = standard_normal((500, 1)) + >>> x = standard_normal((500,2)) + >>> z = standard_normal(500) + >>> bs = CircularBlockBootstrap(17, x, y=y, z=z) + >>> for data in bs.bootstrap(100): + ... bs_x = data[0][0] + ... bs_y = data[1]['y'] + ... bs_z = bs.z + """ + def __init__(self, block_size, *args, **kwargs): - super(IIDBootstrap, self).__init__(*args, **kwargs) self.block_size = block_size + self._parameters = [block_size] + super(CircularBlockBootstrap, self).__init__(*args, **kwargs) def update_indices(self): - raise NotImplementedError + num_blocks = self._num_items // self.block_size + if num_blocks * self.block_size < self._num_items: + num_blocks += 1 + indices = self.random_state.randint(self._num_items, size=num_blocks) + indices = indices[:, None] + np.arange(self.block_size) + indices = indices.flatten() + indices %= self._num_items + + if indices.shape[0] > self._num_items: + return indices[:self._num_items] + else: + return indices + + +class StationaryBootstrap(CircularBlockBootstrap): + """ + Politis and Romano (1994) bootstrap with expon. distributed block sizes + + Parameters + ---------- + block_size : int + Average size of block to use + args + Positional arguments to bootstrap + kwargs + Keyword arguments to bootstrap + + Attributes + ---------- + index : array + The current index of the bootstrap + data : tuple + Two-element tuple with the pos_data in the first position and kw_data + in the second (pos_data, kw_data) + pos_data : tuple + Tuple containing the positional arguments (in the order entered) + kw_data : dict + Dictionary containing the keyword arguments + random_state : RandomState + RandomState instance used by bootstrap + + Notes + ----- + Supports numpy arrays and pandas Series and DataFrames. Data returned has + the same type as the input date. + + Data entered using keyword arguments is directly accessibly as an attribute. + + Examples + -------- + Data can be accessed in a number of ways. Positional data is retained in + the same order as it was entered when the bootstrap was initialized. + Keyword data is available both as an attribute or using a dictionary syntax + on kw_data. + + >>> from arch.bootstrap import StationaryBootstrap + >>> from numpy.random import standard_normal + >>> y = standard_normal((500, 1)) + >>> x = standard_normal((500,2)) + >>> z = standard_normal(500) + >>> bs = StationaryBootstrap(12, x, y=y, z=z) + >>> for data in bs.bootstrap(100): + ... bs_x = data[0][0] + ... bs_y = data[1]['y'] + ... bs_z = bs.z + """ -class StationaryBootstrap(IIDBootstrap): def __init__(self, block_size, *args, **kwargs): - super(IIDBootstrap, self).__init__() - self.block_size = block_size + super(StationaryBootstrap, self).__init__(block_size, *args, **kwargs) self._p = 1.0 / block_size def update_indices(self): - raise NotImplementedError + indices = self.random_state.randint(self._num_items, + size=self._num_items) + indices = indices.astype(np.int32) + u = self.random_state.random_sample(self._num_items) + return stationary_bootstrap_sample(indices, u, self._p) + + +class MovingBlockBootstrap(CircularBlockBootstrap): + """ + Bootstrap based on blocks of the same length without wrap around + + Parameters + ---------- + block_size : int + Size of block to use + args + Positional arguments to bootstrap + kwargs + Keyword arguments to bootstrap + + Attributes + ---------- + index : array + The current index of the bootstrap + data : tuple + Two-element tuple with the pos_data in the first position and kw_data + in the second (pos_data, kw_data) + pos_data : tuple + Tuple containing the positional arguments (in the order entered) + kw_data : dict + Dictionary containing the keyword arguments + random_state : RandomState + RandomState instance used by bootstrap + + Notes + ----- + Supports numpy arrays and pandas Series and DataFrames. Data returned has + the same type as the input date. + + Data entered using keyword arguments is directly accessibly as an attribute. + + Examples + -------- + Data can be accessed in a number of ways. Positional data is retained in + the same order as it was entered when the bootstrap was initialized. + Keyword data is available both as an attribute or using a dictionary syntax + on kw_data. + + >>> from arch.bootstrap import MovingBlockBootstrap + >>> from numpy.random import standard_normal + >>> y = standard_normal((500, 1)) + >>> x = standard_normal((500,2)) + >>> z = standard_normal(500) + >>> bs = MovingBlockBootstrap(7, x, y=y, z=z) + >>> for data in bs.bootstrap(100): + ... bs_x = data[0][0] + ... bs_y = data[1]['y'] + ... bs_z = bs.z + """ -class MovingBlockBootstrap(IIDBootstrap): def __init__(self, block_size, *args, **kwargs): - super(IIDBootstrap, self).__init__(*args, **kwargs) - self.block_size = block_size + super(MovingBlockBootstrap, self).__init__(block_size, *args, **kwargs) def update_indices(self): - raise NotImplementedError + num_blocks = self._num_items // self.block_size + if num_blocks * self.block_size < self._num_items: + num_blocks += 1 + max_index = self._num_items - self.block_size + 1 + indices = self.random_state.randint(max_index, size=num_blocks) + indices = indices[:, None] + np.arange(self.block_size) + indices = indices.flatten() + if indices.shape[0] > self._num_items: + return indices[:self._num_items] + else: + return indices -class MOONBootstrap(IIDBootstrap): - def __init__(self, block_size, *args, **kwargs): - super(IIDBootstrap, self).__init__(*args, **kwargs) + +class MOONBootstrap(IIDBootstrap): # pragma: no cover + + def __init__(self, block_size, *args, **kwargs): # pragma: no cover + super(MOONBootstrap, self).__init__(*args, **kwargs) self.block_size = block_size - def update_indices(self): + def update_indices(self): # pragma: no cover raise NotImplementedError - diff --git a/arch/bootstrap/tests/__init__.py b/arch/bootstrap/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/arch/bootstrap/tests/test_bootstrap.py b/arch/bootstrap/tests/test_bootstrap.py new file mode 100644 index 0000000000..13dfd51c82 --- /dev/null +++ b/arch/bootstrap/tests/test_bootstrap.py @@ -0,0 +1,581 @@ +from __future__ import absolute_import, division +import warnings +from unittest import TestCase + +import numpy as np +from numpy.testing import assert_equal, assert_raises, assert_allclose +import scipy.stats as stats +import pandas as pd +from pandas.util.testing import assert_frame_equal, assert_series_equal + +from arch.bootstrap import IIDBootstrap, StationaryBootstrap, \ + MovingBlockBootstrap, CircularBlockBootstrap +from arch.bootstrap.base import _loo_jackknife + + +warnings.simplefilter("always", RuntimeWarning) +warnings.simplefilter("always") + + +class TestBootstrap(TestCase): + @classmethod + def setUpClass(cls): + warnings.simplefilter("always", RuntimeWarning) + warnings.simplefilter("always") + + np.random.seed(1234) + cls.y = np.random.randn(1000) + cls.x = np.random.randn(1000, 2) + cls.z = np.random.randn(1000, 1) + + cls.y_series = pd.Series(cls.y) + cls.z_df = pd.DataFrame(cls.z) + cls.x_df = pd.DataFrame(cls.x) + + def func(y): + return y.mean(axis=0) + + cls.func = func + + + def test_numpy(self): + x, y, z = self.x, self.y, self.z + bs = IIDBootstrap(y) + bs.seed(23456) + for data, kwdata in bs.bootstrap(10): + index = bs.index + assert_equal(len(kwdata.keys()), 0) + assert_equal(y[index], data[0]) + # Ensure no changes to original data + assert_equal(bs._args[0], y) + + bs = IIDBootstrap(y=y) + bs.seed(23456) + for data, kwdata in bs.bootstrap(10): + index = bs.index + assert_equal(len(data), 0) + assert_equal(y[index], kwdata['y']) + assert_equal(y[index], bs.y) + # Ensure no changes to original data + assert_equal(bs._kwargs['y'], y) + + bs = IIDBootstrap(x, y, z) + bs.seed(23456) + for data, kwdata in bs.bootstrap(10): + index = bs.index + assert_equal(len(data), 3) + assert_equal(len(kwdata.keys()), 0) + assert_equal(x[index], data[0]) + assert_equal(y[index], data[1]) + assert_equal(z[index], data[2]) + + bs = IIDBootstrap(x, y=y, z=z) + bs.seed(23456) + for data, kwdata in bs.bootstrap(10): + index = bs.index + assert_equal(len(data), 1) + assert_equal(len(kwdata.keys()), 2) + assert_equal(x[index], data[0]) + assert_equal(y[index], kwdata['y']) + assert_equal(z[index], kwdata['z']) + assert_equal(y[index], bs.y) + assert_equal(z[index], bs.z) + + def test_pandas(self): + x, y, z = self.x_df, self.y_series, self.z_df + bs = IIDBootstrap(y) + bs.seed(23456) + for data, kwdata in bs.bootstrap(10): + index = bs.index + assert_equal(len(kwdata.keys()), 0) + assert_series_equal(y.iloc[index], data[0]) + # Ensure no changes to original data + assert_series_equal(bs._args[0], y) + + bs = IIDBootstrap(y=y) + bs.seed(23456) + for data, kwdata in bs.bootstrap(10): + index = bs.index + assert_equal(len(data), 0) + assert_series_equal(y.iloc[index], kwdata['y']) + assert_series_equal(y.iloc[index], bs.y) + # Ensure no changes to original data + assert_series_equal(bs._kwargs['y'], y) + + bs = IIDBootstrap(x, y, z) + bs.seed(23456) + for data, kwdata in bs.bootstrap(10): + index = bs.index + assert_equal(len(data), 3) + assert_equal(len(kwdata.keys()), 0) + assert_frame_equal(x.iloc[index], data[0]) + assert_series_equal(y.iloc[index], data[1]) + assert_frame_equal(z.iloc[index], data[2]) + + bs = IIDBootstrap(x, y=y, z=z) + bs.seed(23456) + for data, kwdata in bs.bootstrap(10): + index = bs.index + assert_equal(len(data), 1) + assert_equal(len(kwdata.keys()), 2) + assert_frame_equal(x.iloc[index], data[0]) + assert_series_equal(y.iloc[index], kwdata['y']) + assert_frame_equal(z.iloc[index], kwdata['z']) + assert_series_equal(y.iloc[index], bs.y) + assert_frame_equal(z.iloc[index], bs.z) + + def test_mixed_types(self): + x, y, z = self.x_df, self.y_series, self.z + bs = IIDBootstrap(y, x=x, z=z) + bs.seed(23456) + for data, kwdata in bs.bootstrap(10): + index = bs.index + assert_equal(len(data), 1) + assert_equal(len(kwdata.keys()), 2) + assert_frame_equal(x.iloc[index], kwdata['x']) + assert_frame_equal(x.iloc[index], bs.x) + assert_series_equal(y.iloc[index], data[0]) + assert_equal(z[index], kwdata['z']) + assert_equal(z[index], bs.z) + + def test_state(self): + bs = IIDBootstrap(np.arange(100)) + bs.seed(23456) + state = bs.get_state() + for data, kwdata in bs.bootstrap(10): + final = data[0] + bs.seed(23456) + for data, kwdata in bs.bootstrap(10): + final_seed = data[0] + bs.set_state(state) + for data, kwdata in bs.bootstrap(10): + final_state = data[0] + assert_equal(final, final_seed) + assert_equal(final, final_state) + + def test_reset(self): + bs = IIDBootstrap(np.arange(100)) + state = bs.get_state() + for data, kwdata in bs.bootstrap(10): + final = data[0] + bs.reset() + state_reset = bs.get_state() + for data, kwdata in bs.bootstrap(10): + final_reset = data[0] + assert_equal(final, final_reset) + assert_equal(state, state_reset) + + def test_errors(self): + x = np.arange(10) + y = np.arange(100) + assert_raises(ValueError, IIDBootstrap, x, y) + assert_raises(ValueError, IIDBootstrap, index=x) + bs = IIDBootstrap(y) + + def func(y): + return y.mean(axis=0) + + assert_raises(ValueError, bs.conf_int, func, method='unknown') + assert_raises(ValueError, bs.conf_int, func, tail='dragon') + assert_raises(ValueError, bs.conf_int, func, size=95) + + def test_cov(self): + def func(y): + return y.mean(axis=0) + + bs = IIDBootstrap(self.x) + num_bootstrap = 10 + cov = bs.cov(func=func, reps=num_bootstrap, recenter=False) + bs.reset() + + results = np.zeros((num_bootstrap, 2)) + count = 0 + for data, kw in bs.bootstrap(num_bootstrap): + results[count] = data[0].mean(axis=0) + count += 1 + errors = results - self.x.mean(axis=0) + direct_cov = errors.T.dot(errors) / num_bootstrap + assert_allclose(cov, direct_cov) + + bs.reset() + cov = bs.cov(func=func, recenter=True, reps=num_bootstrap) + errors = results - results.mean(axis=0) + direct_cov = errors.T.dot(errors) / num_bootstrap + assert_allclose(cov, direct_cov) + + bs = IIDBootstrap(self.x_df) + cov = bs.cov(func=func, reps=num_bootstrap, recenter=False) + bs.reset() + results = np.zeros((num_bootstrap, 2)) + count = 0 + for data, kw in bs.bootstrap(num_bootstrap): + results[count] = data[0].mean(axis=0) + count += 1 + errors = results - self.x.mean(axis=0) + direct_cov = errors.T.dot(errors) / num_bootstrap + assert_allclose(cov, direct_cov) + + bs.reset() + cov = bs.cov(func=func, recenter=True, reps=num_bootstrap) + errors = results - results.mean(axis=0) + direct_cov = errors.T.dot(errors) / num_bootstrap + assert_allclose(cov, direct_cov) + + def test_smoke(self): + num_bootstrap = 20 + + def func(y): + return y.mean(axis=0) + + bs = StationaryBootstrap(13, self.y) + cov = bs.cov(func, reps=num_bootstrap) + bs = MovingBlockBootstrap(13, self.y) + cov = bs.cov(func, reps=num_bootstrap) + bs = CircularBlockBootstrap(13, self.y) + cov = bs.cov(func, reps=num_bootstrap) + bs = MovingBlockBootstrap(10, self.y) + cov = bs.cov(func, reps=num_bootstrap) + bs = CircularBlockBootstrap(10, self.y) + cov = bs.cov(func, reps=num_bootstrap) + + def test_conf_int_basic(self): + num_bootstrap = 200 + bs = IIDBootstrap(self.x) + + def func(y): + return y.mean(axis=0) + + ci = bs.conf_int(func, reps=num_bootstrap, size=0.90, method='basic') + bs.reset() + ci_u = bs.conf_int(func, tail='upper', reps=num_bootstrap, size=0.95, + method='basic') + bs.reset() + ci_l = bs.conf_int(func, tail='lower', reps=num_bootstrap, size=0.95, + method='basic') + bs.reset() + results = np.zeros((num_bootstrap, 2)) + count = 0 + for pos, kw in bs.bootstrap(num_bootstrap): + results[count] = func(*pos) + count += 1 + mu = func(self.x) + upper = mu + (mu - np.percentile(results, 5, axis=0)) + lower = mu + (mu - np.percentile(results, 95, axis=0)) + + assert_allclose(lower, ci[0, :]) + assert_allclose(upper, ci[1, :]) + + assert_allclose(ci[1, :], ci_u[1, :]) + assert_allclose(ci[0, :], ci_l[0, :]) + inf = np.empty_like(ci_l[0, :]) + inf.fill(np.inf) + assert_equal(inf, ci_l[1, :]) + assert_equal(-1 * inf, ci_u[0, :]) + + def test_conf_int_percentile(self): + num_bootstrap = 200 + bs = IIDBootstrap(self.x) + + def func(y): + return y.mean(axis=0) + + ci = bs.conf_int(func, reps=num_bootstrap, size=0.90, + method='percentile') + bs.reset() + ci_u = bs.conf_int(func, tail='upper', reps=num_bootstrap, size=0.95, + method='percentile') + bs.reset() + ci_l = bs.conf_int(func, tail='lower', reps=num_bootstrap, size=0.95, + method='percentile') + bs.reset() + results = np.zeros((num_bootstrap, 2)) + count = 0 + for pos, kw in bs.bootstrap(num_bootstrap): + results[count] = func(*pos) + count += 1 + + upper = np.percentile(results, 95, axis=0) + lower = np.percentile(results, 5, axis=0) + + assert_allclose(lower, ci[0, :]) + assert_allclose(upper, ci[1, :]) + + assert_allclose(ci[1, :], ci_u[1, :]) + assert_allclose(ci[0, :], ci_l[0, :]) + inf = np.empty_like(ci_l[0, :]) + inf.fill(np.inf) + assert_equal(inf, ci_l[1, :]) + assert_equal(-1 * inf, ci_u[0, :]) + + def test_conf_int_norm(self): + num_bootstrap = 200 + bs = IIDBootstrap(self.x) + + def func(y): + return y.mean(axis=0) + + ci = bs.conf_int(func, reps=num_bootstrap, size=0.90, + method='norm') + bs.reset() + ci_u = bs.conf_int(func, tail='upper', reps=num_bootstrap, size=0.95, + method='var') + bs.reset() + ci_l = bs.conf_int(func, tail='lower', reps=num_bootstrap, size=0.95, + method='cov') + bs.reset() + cov = bs.cov(func, reps=num_bootstrap) + mu = func(self.x) + std_err = np.sqrt(np.diag(cov)) + upper = mu + stats.norm.ppf(0.95) * std_err + lower = mu + stats.norm.ppf(0.05) * std_err + assert_allclose(lower, ci[0, :]) + assert_allclose(upper, ci[1, :]) + + assert_allclose(ci[1, :], ci_u[1, :]) + assert_allclose(ci[0, :], ci_l[0, :]) + inf = np.empty_like(ci_l[0, :]) + inf.fill(np.inf) + assert_equal(inf, ci_l[1, :]) + assert_equal(-1 * inf, ci_u[0, :]) + + def test_reuse(self): + num_bootstrap = 100 + bs = IIDBootstrap(self.x) + + def func(y): + return y.mean(axis=0) + + ci = bs.conf_int(func, reps=num_bootstrap) + old_results = bs._results.copy() + ci_reuse = bs.conf_int(func, reps=num_bootstrap, reuse=True) + results = bs._results + assert_equal(results, old_results) + assert_equal(ci, ci_reuse) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always", RuntimeWarning) + warnings.simplefilter("always") + bs.conf_int(func, tail='lower', reps=num_bootstrap // 2, reuse=True) + assert_equal(len(w), 1) + + def test_studentized(self): + num_bootstrap = 20 + bs = IIDBootstrap(self.x) + bs.seed(23456) + + def func(y): + return y.mean(axis=0) + + def std_err_func(mu, y): + errors = y - mu + var = (errors ** 2.0).mean(axis=0) + return np.sqrt(var / y.shape[0]) + + ci = bs.conf_int(func, reps=num_bootstrap, method='studentized', + std_err_func=std_err_func) + bs.reset(seed=23456) + base = func(self.x) + results = np.zeros((num_bootstrap, 2)) + stud_results = np.zeros((num_bootstrap, 2)) + count = 0 + for pos, kwdata in bs.bootstrap(reps=num_bootstrap): + results[count] = func(*pos) + std_err = std_err_func(results[count], *pos) + stud_results[count] = (results[count] - base) / std_err + count += 1 + + assert_allclose(results, bs._results) + assert_allclose(stud_results, bs._studentized_results) + errors = results - results.mean(0) + std_err = np.sqrt(np.mean(errors ** 2.0, axis=0)) + ci_direct = np.zeros((2, 2)) + for i in range(2): + ci_direct[0, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 97.5) + ci_direct[1, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 2.5) + assert_allclose(ci, ci_direct) + + bs.reset(seed=23456) + ci = bs.conf_int(func, reps=num_bootstrap, method='studentized', + studentize_reps=50) + + bs.reset(seed=23456) + base = func(self.x) + results = np.zeros((num_bootstrap, 2)) + stud_results = np.zeros((num_bootstrap, 2)) + count = 0 + for pos, kwdata in bs.bootstrap(reps=num_bootstrap): + results[count] = func(*pos) + inner_bs = IIDBootstrap(*pos) + seed = bs.random_state.randint(2 ** 31 - 1) + inner_bs.seed(seed) + cov = inner_bs.cov(func, reps=50) + std_err = np.sqrt(np.diag(cov)) + stud_results[count] = (results[count] - base) / std_err + count += 1 + + assert_allclose(results, bs._results) + assert_allclose(stud_results, bs._studentized_results) + errors = results - results.mean(0) + std_err = np.sqrt(np.mean(errors ** 2.0, axis=0)) + + ci_direct = np.zeros((2, 2)) + for i in range(2): + ci_direct[0, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 97.5) + ci_direct[1, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 2.5) + assert_allclose(ci, ci_direct) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + bs.conf_int(func, reps=num_bootstrap, method='studentized', + std_err_func=std_err_func, reuse=True) + assert_equal(len(w), 1) + + def test_conf_int_bias_corrected(self): + num_bootstrap = 20 + bs = IIDBootstrap(self.x) + bs.seed(23456) + + def func(y): + return y.mean(axis=0) + + ci = bs.conf_int(func, reps=num_bootstrap, method='bc') + bs.reset(seed=23456) + ci_db = bs.conf_int(func, reps=num_bootstrap, method='debiased') + assert_equal(ci, ci_db) + base, results = bs._base, bs._results + p = np.zeros(2) + p[0] = np.mean(results[:, 0] < base[0]) + p[1] = np.mean(results[:, 1] < base[1]) + b = stats.norm.ppf(p) + q = stats.norm.ppf(np.array([0.025, 0.975])) + q = q[:, None] + percentiles = 100 * stats.norm.cdf(2 * b + q) + + ci = np.zeros((2, 2)) + for i in range(2): + ci[i] = np.percentile(results[:, i], list(percentiles[:, i])) + ci = ci.T + assert_allclose(ci_db, ci) + + def test_conf_int_parametric(self): + def param_func(x, params=None, state=None): + if state is not None: + mu = params + e = state.standard_normal(x.shape) + return (mu + e).mean(0) + else: + return x.mean(0) + + def semi_func(x, params=None): + if params is not None: + mu = params + e = x - mu + return (mu + e).mean(0) + else: + return x.mean(0) + + reps = 100 + bs = IIDBootstrap(self.x) + bs.seed(23456) + + ci = bs.conf_int(func=param_func, reps=reps, sampling='parametric') + bs.reset(seed=23456) + results = np.zeros((reps, 2)) + count = 0 + mu = self.x.mean(0) + for pos, kw in bs.bootstrap(100): + results[count] = param_func(*pos, params=mu, + state=bs.random_state) + count += 1 + assert_equal(bs._results, results) + + bs.reset(seed=23456) + ci = bs.conf_int(func=semi_func, reps=100, sampling='semi') + bs.reset(seed=23456) + results = np.zeros((reps, 2)) + count = 0 + for pos, kw in bs.bootstrap(100): + results[count] = semi_func(*pos, params=mu) + count += 1 + assert_allclose(bs._results, results) + + def test_extra_kwargs(self): + extra_kwargs = {'axis': 0} + bs = IIDBootstrap(self.x) + bs.seed(23456) + num_bootstrap = 100 + + def func(y, axis=0): + return y.mean(axis=axis) + + bs.cov(func, reps=num_bootstrap, extra_kwargs=extra_kwargs) + + bs = IIDBootstrap(axis=self.x) + bs.seed(23456) + assert_raises(ValueError, bs.cov, func, + reps=num_bootstrap, extra_kwargs=extra_kwargs) + + def test_jackknife(self): + def func(x): + return x.mean(axis=0) + + x = self.x + results = _loo_jackknife(func, len(x), (x,), {}) + + direct_results = np.zeros_like(x) + for i in range(len(x)): + if i == 0: + y = x[1:] + elif i == len(x - 1): + y = x[:-1] + else: + temp = list(x[:i]) + temp.extend(list(x[i + 1:])) + y = np.array(temp) + direct_results[i] = func(y) + assert_allclose(direct_results, results) + + def test_bca(self): + num_bootstrap = 20 + bs = IIDBootstrap(self.x) + bs.seed(23456) + + def func(y): + return y.mean(axis=0) + + ci_direct = bs.conf_int(func, reps=num_bootstrap, method='bca') + bs.reset(seed=23456) + base, results = bs._base, bs._results + p = np.zeros(2) + p[0] = np.mean(results[:, 0] < base[0]) + p[1] = np.mean(results[:, 1] < base[1]) + b = stats.norm.ppf(p) + b = b[:,None] + q = stats.norm.ppf(np.array([0.025, 0.975])) + + + base = func(self.x) + nobs = self.x.shape[0] + jk = _loo_jackknife(func, nobs, [self.x], {}) + u = (nobs - 1) * (jk - base) + u2 = np.sum(u * u, 0) + u3 = np.sum(u * u * u, 0) + a = u3 / (6.0 * (u2 ** 1.5)) + a = a[:, None] + percentiles = 100 * stats.norm.cdf(b + (b + q) / (1 - a * (b + q))) + + ci = np.zeros((2, 2)) + for i in range(2): + ci[i] = np.percentile(results[:, i], list(percentiles[i])) + ci = ci.T + assert_allclose(ci_direct, ci) + + def test_pandas_integer_index(self): + x = self.x + x_int = self.x_df.copy() + x_int.index = 10 + np.arange(x.shape[0]) + bs = IIDBootstrap(x,x_int) + bs.seed(23456) + for pdata, kwdata in bs.bootstrap(10): + assert_equal(pdata[0], pdata[1].values) + + diff --git a/arch/compat/numba.py b/arch/compat/numba.py new file mode 100644 index 0000000000..b7accb3757 --- /dev/null +++ b/arch/compat/numba.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import, division + +try: + from numba import jit +except ImportError: + def jit(func): + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + return wrapper \ No newline at end of file diff --git a/arch/tests/test_deprecations.py b/arch/tests/test_deprecations.py index 0b37786e2e..4dd190eb89 100644 --- a/arch/tests/test_deprecations.py +++ b/arch/tests/test_deprecations.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import, division + from unittest import TestCase import warnings @@ -35,4 +37,3 @@ def test_distribution(self): warnings.simplefilter("always") error_distribution = dist() assert_equal(len(w), 1) - \ No newline at end of file diff --git a/arch/tests/test_utils.py b/arch/tests/test_utils.py index ca83b3d6c3..36fb658de4 100644 --- a/arch/tests/test_utils.py +++ b/arch/tests/test_utils.py @@ -45,7 +45,6 @@ def test_ensure1d(self): y = DataFrame(np.reshape(np.arange(10), (5, 2))) assert_raises(ValueError, ensure1d, y, 'y') - def test_parse_dataframe(self): s = Series(np.arange(10.0), name='variable') out = parse_dataframe(s, 'y') @@ -92,7 +91,6 @@ def test_date_to_index(self): y = Series(np.arange(3000.0), index=dr) date_index = y.index import datetime as dt - # TODO: Finish Test index = date_to_index(date_index[0], date_index) assert_equal(index, 0) index = date_to_index(date_index[-1], date_index) @@ -113,13 +111,3 @@ def test_date_to_index(self): num_index = z.index assert_raises(ValueError, date_to_index, dt.datetime(2009, 8, 1), num_index) - - - - - - - - - - diff --git a/arch/univariate/mean.py b/arch/univariate/mean.py index db82d591df..b44a80ab28 100644 --- a/arch/univariate/mean.py +++ b/arch/univariate/mean.py @@ -68,18 +68,18 @@ class HARX(ARCHModel): Examples -------- >>> import numpy as np - >>> from arch.mean import HARX + >>> from arch.univariate import HARX >>> y = np.random.randn(100) >>> harx = HARX(y, lags=[1, 5, 22]) >>> res = harx.fit() >>> from pandas import Series, date_range >>> import datetime as dt - >>> index = date_range('2000-01-01', freq='M', periods=120) + >>> index = date_range('2000-01-01', freq='M', periods=y.shape[0]) >>> y = Series(y, name='y', index=index) >>> start = dt.datetime(2001, 1, 1) >>> end = dt.datetime(2008, 12, 31) - >>> HARX(y, lags=[1, 5, 22], hold_back=start, last_obs=end) + >>> har = HARX(y, lags=[1, 6], hold_back=start, last_obs=end) Notes ----- @@ -260,8 +260,7 @@ def simulate(self, params, nobs, burn=500, initial_value=None, x=None, Examples -------- >>> import numpy as np - >>> from arch.mean import HARX - >>> from arch.volatility import GARCH + >>> from arch.univariate import HARX, GARCH >>> harx = HARX(lags=[1, 5, 22]) >>> harx.volatility = GARCH() >>> harx_params = np.array([1, 0.2, 0.3, 0.4]) @@ -591,7 +590,7 @@ class ConstantMean(HARX): Examples -------- >>> import numpy as np - >>> from arch.mean import ConstantMean + >>> from arch.univariate import ConstantMean >>> y = np.random.randn(100) >>> cm = ConstantMean(y) >>> res = cm.fit() @@ -656,8 +655,7 @@ def simulate(self, params, nobs, burn=500, initial_value_vol=None): Basic data simulation with a constant mean and volatility >>> import numpy as np - >>> from arch.mean import ConstantMean - >>> from arch.volatility import GARCH + >>> from arch.univariate import ConstantMean, GARCH >>> cm = ConstantMean() >>> cm.volatility = GARCH() >>> cm_params = np.array([1]) @@ -712,7 +710,7 @@ class ZeroMean(HARX): Examples -------- >>> import numpy as np - >>> from arch.mean import ZeroMean + >>> from arch.univariate import ZeroMean >>> y = np.random.randn(100) >>> zm = ZeroMean(y) >>> res = zm.fit() @@ -777,13 +775,13 @@ def simulate(self, params, nobs, burn=500, initial_value_vol=None): -------- Basic data simulation with no mean and constant volatility - >>> from arch.mean import ZeroMean + >>> from arch.univariate import ZeroMean >>> zm = ZeroMean() >>> sim_data = zm.simulate([1.0], 1000) Simulating data with a non-trivial volatility process - >>> from arch.volatility import GARCH + >>> from arch.univariate import GARCH >>> zm.volatility = GARCH(p=1, o=1, q=1) >>> sim_data = zm.simulate([0.05, 0.1, 0.1, 0.8], 300) """ @@ -841,13 +839,13 @@ class ARX(HARX): Examples -------- >>> import numpy as np - >>> from arch.mean import ARX + >>> from arch.univariate import ARX >>> y = np.random.randn(100) >>> arx = ARX(y, lags=[1, 5, 22]) >>> res = arx.fit() Estimating an AR with GARCH(1,1) errors - >>> from arch.volatility import GARCH + >>> from arch.univariate import GARCH >>> arx.volatility = GARCH() >>> res = arx.fit(iter=0, disp='off') @@ -947,7 +945,7 @@ class LS(HARX): Examples -------- >>> import numpy as np - >>> from arch.mean import LS + >>> from arch.univariate import LS >>> y = np.random.randn(100) >>> x = np.random.randn(100,2) >>> ls = LS(y, x) @@ -1024,7 +1022,7 @@ def arch_model(y, x=None, mean='Constant', lags=0, vol='Garch', p=1, o=0, q=1, A basic GARCH(1,1) with a constant mean can be constructed using only the return data - >>> from arch.mean import arch_model + >>> from arch.univariate import arch_model >>> am = arch_model(returns) Alternative mean and volatility processes can be directly specified diff --git a/arch/univariate/recursions_python.py b/arch/univariate/recursions_python.py index 74a980efb9..09ac6efa70 100644 --- a/arch/univariate/recursions_python.py +++ b/arch/univariate/recursions_python.py @@ -8,15 +8,7 @@ import numpy as np from ..compat.python import range - -try: - from numba import jit -except ImportError: - def jit(func): - def wrapper(*args, **kwargs): - return func(*args, **kwargs) - - return wrapper +from ..compat.numba import jit __all__ = ['harch_recursion', 'arch_recursion', 'garch_recursion', 'egarch_recursion'] diff --git a/arch/univariate/tests/test_mean.py b/arch/univariate/tests/test_mean.py index 5d33f56914..59b173edf6 100644 --- a/arch/univariate/tests/test_mean.py +++ b/arch/univariate/tests/test_mean.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import, division + import unittest import warnings @@ -371,16 +373,16 @@ def test_errors(self): def test_warnings(self): with warnings.catch_warnings(record=True) as w: ARX(self.y, lags=[1, 2, 3, 12], hold_back=5) - assert_equal(len(w), 1) + assert_equal(len(w), 1) with warnings.catch_warnings(record=True) as w: HARX(self.y, lags=[[1, 1, 1], [2, 5, 22]], use_rotated=True) - assert_equal(len(w), 1) + assert_equal(len(w), 1) har = HARX() with warnings.catch_warnings(record=True) as w: har.fit() - assert_equal(len(w), 1) + assert_equal(len(w), 1) def test_har_lag_specifications(self): """ Test equivalence of alternative lag specifications""" @@ -416,7 +418,7 @@ def test_starting_values(self): sv = np.array([1.0, 0.3, 0.8]) with warnings.catch_warnings(record=True) as w: am.fit(starting_values=sv, iter=0) - assert_equal(len(w), 1) + assert_equal(len(w), 1) def test_no_param_volatility(self): cm = ConstantMean(self.y) diff --git a/arch/univariate/tests/test_volatility.py b/arch/univariate/tests/test_volatility.py index 8d895771e1..ec1bcb365c 100644 --- a/arch/univariate/tests/test_volatility.py +++ b/arch/univariate/tests/test_volatility.py @@ -572,13 +572,13 @@ def test_warnings(self): studt = StudentsT() with warnings.catch_warnings(record=True) as w: garch.simulate(parameters, 1000, studt.simulate([4.0])) - assert_equal(len(w), 1) + assert_equal(len(w), 1) harch = HARCH(lags=[1, 5, 22]) parameters = np.array([0.1, 0.2, 0.4, 0.5]) with warnings.catch_warnings(record=True) as w: harch.simulate(parameters, 1000, studt.simulate([4.0])) - assert_equal(len(w), 1) + assert_equal(len(w), 1) def test_model_names(self): diff --git a/arch/univariate/volatility.py b/arch/univariate/volatility.py index 645ac4282b..35dd2a237e 100644 --- a/arch/univariate/volatility.py +++ b/arch/univariate/volatility.py @@ -325,7 +325,7 @@ class GARCH(VolatilityProcess): Examples -------- - >>> from arch.volatility import GARCH + >>> from arch.univariate import GARCH Standard GARCH(1,1) @@ -584,7 +584,7 @@ class HARCH(VolatilityProcess): Examples -------- - >>> from arch.volatility import HARCH + >>> from arch.univariate import HARCH Lag-1 HARCH, which is identical to an ARCH(1) >>> harch = HARCH() @@ -727,7 +727,7 @@ class ARCH(GARCH): -------- ARCH(1) process - >>> from arch.volatility import ARCH + >>> from arch.univariate import ARCH ARCH(5) process @@ -783,7 +783,7 @@ class EWMAVariance(VolatilityProcess): -------- Daily RiskMetrics EWMA process - >>> from arch.volatility import EWMAVariance + >>> from arch.univariate import EWMAVariance >>> rm = EWMAVariance(0.94) Notes @@ -872,7 +872,7 @@ class RiskMetrics2006(VolatilityProcess): -------- Daily RiskMetrics 2006 process - >>> from arch.volatility import RiskMetrics2006 + >>> from arch.univariate import RiskMetrics2006 >>> rm = RiskMetrics2006() Notes @@ -1031,7 +1031,7 @@ class EGARCH(VolatilityProcess): Examples -------- - >>> from arch.volatility import EGARCH + >>> from arch.univariate import EGARCH Symmetric EGARCH(1,1) diff --git a/doc/source/1.0.rst b/doc/source/1.0.rst deleted file mode 100644 index a141d5bfaa..0000000000 --- a/doc/source/1.0.rst +++ /dev/null @@ -1,6 +0,0 @@ -Changes since 1.0 ------------------ - -- Refactored to move the univariate routines to `arch.univariate` and added - deprecation warnings in the old locations -- Enable `numba` jit compilation in the python recursions diff --git a/doc/source/bootstrap/background.rst b/doc/source/bootstrap/background.rst new file mode 100644 index 0000000000..0a226f98fd --- /dev/null +++ b/doc/source/bootstrap/background.rst @@ -0,0 +1,17 @@ +References +---------- + +The bootstrap is a large area with a number of high-quality books. Leading +references include + +[1] Efron + +[2] ?? + +[3] Davidson Hinkley + +[4] Romano Wolf + +Articles used in the creation of this module include + +[4] Efron's BCA diff --git a/doc/source/bootstrap/bootstrap.rst b/doc/source/bootstrap/bootstrap.rst new file mode 100644 index 0000000000..5af12b324f --- /dev/null +++ b/doc/source/bootstrap/bootstrap.rst @@ -0,0 +1,19 @@ +Bootstrapping +------------- +The bootstrap module provides both high- and low-level interfaces for +bootstrapping data contained in NumPy arrays or pandas Series or DataFrames. + +All bootstraps have the same interfaces and only differ in their name, setup +parameters and the (internally generated) sampling scheme. + +.. toctree:: + :maxdepth: 1 + + Confidence Interval Construction + Parameter Covariance Estimation + Low-level Interface + Bootstraps for IID data + Bootstraps for Time-series Data + Background and References + + diff --git a/doc/source/bootstrap/confidence-intervals.rst b/doc/source/bootstrap/confidence-intervals.rst new file mode 100644 index 0000000000..0dbe5e3f14 --- /dev/null +++ b/doc/source/bootstrap/confidence-intervals.rst @@ -0,0 +1,247 @@ +Confidence Intervals +-------------------- +The confidence interval function allows three types of confidence intervals to +be constructed: + +* Nonparametric, which only resample the data +* Semi-parametric, which use resampled residuals +* Parametric, which simulate residuals + +Confidence intervals can then be computed using one of 5 methods: + +* Basic (`basic`) +* Percentile (`percentile`) +* Studentized (`studentized`) +* Bias-corrected (`bc`, `bias-corrected` or `debiased`) +* Bias-corrected and accelerated (`bca`) + +Finally, the studentized bootstrap can be conducted using either an analytic +parameter variance formula or using a nested-bootstrap. + +Setup +===== + +All examples will construct confidence intervals for the Sharp ratio of the +S&P 500, which is the ratio of the assumalized mean to the annualized standard +deviation. The parameters will be the annualized mean, the annualized standard +deviation and the Sharp ratio. + +The setup makes use of return data downloaded from Yahoo! + +.. ipython:: python + + import datetime as dt + import pandas.io.data as web + start = dt.datetime(1951,1,1) + end = dt.datetime(2014,1,1) + sp500 = web.get_data_yahoo('^GSPC', start=start, end=end) + start = sp500.index.min() + end = sp500.index.max() + monthly_dates = pd.date_range(start, end, freq='M') + monthly = sp500.reindex(monthly_dates, method='ffill') + returns = 100 * monthly['Adj Close'].pct_change().dropna() + +The main function used will return a 3-element array containing the parameters. + +.. ipython:: python + + def estimate_params(x) + mu, sigma = x.mean(), np.sqrt(12 * x.var()) + return mu, sigma, mu / sigma + +Confidence Interval Types +========================= + +Three types of confidence intervals can be computed. The simplest are +non-parametric which only make use of parameter estimates from both the original +data as well as the bootstrap resamples. Semi-parametric mix the original data +with a limited form or resamples, usually for residuals. Finally, parametric +bootstrap confidence intervals make use of a parametric distribution to +construct "as-if" exact confidence intervals. + +Nonparametric Confidence Intervals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Non-parametric sampling is the simplest method to construct confidence +intervals. + +This example makes use of the percentile bootstrap which is conceptually the +simplest method - it constructs many bootstrap replications and simply return +the required order statistics from these empirical distributions. + +.. ipython:: python + + from arch.bootstrap import IIDBootstrap + bs = IIDBootstrap(returns) + ci = bs.conf_int(estimate_params, 1000, method='percentile') + +.. note:: + + While returns have little serial correlation, squared returns are highly + persistent. The IID bootstrap is not a good choice here. Instead a + time-series bootstrap with an appropriately chosen block size should be + used. + +Semi-parametric Confidence Intervals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This example will construct semi-parametric confidence intervals for the +parameters of a basic CAP-M style regression of the returns of IBM on the +S&P 500. + +.. ipython:: python + + +Parametric Confidence Intervals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Parametric confidence intervals require a complete model specified up to the +distribution of the errors. The parametric confidence interval example will +start with the same example as in the semi-parametric confidence interval, but +will also assume the the errors are homoskedastic and normally distributed. + + +.. ipython:: python + +Confidence Interval Methods +=========================== + +Basic (`basic`) +~~~~~~~~~~~~~~~ + +Basic confidence intervals construct many bootstrap replications +:math:`\hat{\theta}_b^\star` and then constructs the confidence interval as + +.. math:: + + \left[\hat{\theta} + \left(\hat{\theta} - \hat{\theta}^{\star}_{u} \right), + \hat{\theta} + \left(\hat{\theta} - \hat{\theta}^{\star}_{l} \right) \right] + +where :math:`\hat{\theta}^{\star}_{l}` and :math:`\hat{\theta}^{\star}_{u}` are +the :math:`\alpha/2` and :math:`1-\alpha/2` empirical quantiles of the bootstrap +distribution. When :math:`\theta` is a vector, the empirical quantiles are +computed element-by-element. + +.. ipython:: python + + from arch.bootstrap import IIDBootstrap + bs = IIDBootstrap(returns) + ci = bs.conf_int(estimate_params, 1000, method='basic') + + +Percentile (`percentile`) +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Basic confidence intervals construct many bootstrap replications +:math:`\hat{\theta}_b^\star` and then constructs the confidence interval as + +.. math:: + + \left[\hat{\theta}^{\star}_{l}, \hat{\theta}^{\star}_{u} \right] + +where :math:`\hat{\theta}^{\star}_{l}` and :math:`\hat{\theta}^{\star}_{u}` are +the :math:`\alpha/2` and :math:`1-\alpha/2` empirical quantiles of the bootstrap +distribution. + +.. ipython:: python + + from arch.bootstrap import IIDBootstrap + bs = IIDBootstrap(returns) + ci = bs.conf_int(estimate_params, 1000, method='percentile') + +Asymptotic Normal Approximation (`norm`, `cov` or `var`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The asymptotic normal approximation methos simply estimates the covairance of +the parameters and then uses this with the usual quantiles from a normal +distribution. The confidence interval is then + +.. math:: + + \left[\hat{\theta} + \hat{\sigma}\Phi^{-1}\left(\alpha/2\right), + \hat{\theta} - \hat{\sigma}\Phi^{-1}\left(\alpha/2\right), \right] + +where :math:`\hat{\sigma}` is the bootstrap estimate of the parameter standard +error. + +.. ipython:: python + + from arch.bootstrap import IIDBootstrap + bs = IIDBootstrap(returns) + ci = bs.conf_int(estimate_params, 1000, method='norm') + +Studentized (`studentized`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The studentized bootstrap may be more accurate than some of the other methods. +The studentized bootstrap makes use of either a standard error function, when +parameter standard errors can be analytically computed, or a nested bootstrap, +to bootstrap studentized versions of the original statistic. This can produce +higher-order refinements in some circumstances. + +The confidence interval is then + +.. math:: + + \left[\hat{\theta} + \hat{\sigma}\hat{G}^{-1}\left(\alpha/2\right), + \hat{\theta} - \hat{\sigma}\hat{G}^{-1}\left(\alpha/2\right), \right] + +where :math:`\hat{G}` is the estimated quantile function for the studentized +data. + +The version that uses a nested bootstrap is simple to implement although it can +be slow since it requires :math:`B` inner bootstraps of each of the :math:`B` +outer bootstraps. + +.. ipython:: python + + from arch.bootstrap import IIDBootstrap + bs = IIDBootstrap(returns) + ci = bs.conf_int(estimate_params, 1000, method='studentized') + +Demonstrating the use of the standard error function is simpler in the CAP-M +example. Assuming the data are homoskedastic, the parameter standard errors +can be computed by + +.. ipython:: python + + def ols_se(params, y, x): + e = y - x.dot(params + nobs = y.shape[0] + sigma2 = e.dot(e) / (nobs - x.shape[1]) + xpx = x.T.dot(x) / nobs + vcv = sigma2 * np.inv(xpx) + return np.sqrt(np.diag(vcv)) + +.. note:: + + Standard error functions must return a 1-d array with the same number + of element as params + +.. note:: + + Standard error functions must match the patters + ``std_err_func(params, *args, **kwargs)`` where ``params`` is an array + of estimated parameters constructed using ``*args`` and ``**kwargs``. + +Bias-corrected (`bc`, `bias-corrected` or `debiased`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The bias corrected bootstrap makes use of a bootstrap estimate of the bias to +improve confidence intervals. + + +.. ipython:: python + + from arch.bootstrap import IIDBootstrap + bs = IIDBootstrap(returns) + ci = bs.conf_int(estimate_params, 1000, method='bc') + + +Bias-corrected and accelerated (`bca`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. ipython:: python + + from arch.bootstrap import IIDBootstrap + bs = IIDBootstrap(returns) + ci = bs.conf_int(estimate_params, 1000, method='bca') diff --git a/doc/source/bootstrap/iid-bootstraps.rst b/doc/source/bootstrap/iid-bootstraps.rst new file mode 100644 index 0000000000..d0ed80cc56 --- /dev/null +++ b/doc/source/bootstrap/iid-bootstraps.rst @@ -0,0 +1,9 @@ +.. py:currentmodule:: arch.bootstrap + +Independent, Identical Distributed Data (i.i.d.) +------------------------------------------------ +`IIDBootstrap` is the standard bootstrap that is appropriate for data that is +either i.i.d. or at least not serially dependant. + +.. autoclass:: IIDBootstrap + :members: conf_int, cov, apply, bootstrap, reset, seed, set_state, get_state diff --git a/doc/source/bootstrap/low-level-interface.rst b/doc/source/bootstrap/low-level-interface.rst new file mode 100644 index 0000000000..b84382592e --- /dev/null +++ b/doc/source/bootstrap/low-level-interface.rst @@ -0,0 +1,70 @@ +Low-level Interfaces +-------------------- + +Constructing Parameter Estimates +================================ +The bootstrap method apply can be use to directly compute parameter estimates +from a function and the bootstrapped data. + +This example makes use of monthly S&P 500 data. + +.. ipython:: python + + import datetime as dt + import pandas.io.data as web + start = dt.datetime(1951,1,1) + end = dt.datetime(2014,1,1) + sp500 = web.get_data_yahoo('^GSPC', start=start, end=end) + start = sp500.index.min() + end = sp500.index.max() + monthly_dates = pd.date_range(start, end, freq='M') + monthly = sp500.reindex(monthly_dates, method='ffill') + returns = 100 * monthly['Adj Close'].pct_change().dropna() + +The function will compute the Sharp ratio -- the (annualized) mean divided by +the (annualized) standard deviation. + +.. ipython:: python + + import numpy as np + def sr(x): + return 12 * x.mean() / np.sqrt(12 * x.var()) + +The bootstrapped Sharp ratios can be directly computed using `apply`. + +.. ipython:: python + + from arch.bootstrap import IIDBootstrap + bs = IIDBootstrap(returns) + sharp_ratios = bs.apply(sr, 1000) + import seaborn + pd.DataFrame(sharp_ratios).hist(bins=20) + +.. image:: bootstrap_histogram.png + +The Bootstrap Iterator +====================== +The lowest-level method to use a boostrap is the iterator. This is used +internally in all higher-level methods that estimate a function using multiple +bootstrap replications. The iterator returns a two-element tuple where the +first element contains all positional arguments (in the order input) passed when +constructing the bootstrap instance, and the second contains the all keyword +arguments passed when constructing the instance. + +This example makes uses of simulated data to demonstrate how to use the +bootstrap iterator. + +.. ipython:: python + + import pandas as pd + import numpy as np + from arch.bootstrap import IIDBootstrap + x = np.random.randn(1000,2) + y = pd.DataFrame(np.random.randn(1000,3)) + z = np.random.rand(1000,10) + bs = IIDBootstrap(x, y=y, z=z) + + for pos, kw in bs.bootstrap() + xstar = pos[0] # pos is always a tuple, even when a singleton + ystar = kw['y'] # A dictionary + zstar = kw['z'] # A dictionary \ No newline at end of file diff --git a/doc/source/bootstrap/parameter-covariance-estimation.rst b/doc/source/bootstrap/parameter-covariance-estimation.rst new file mode 100644 index 0000000000..e2fa3dfd78 --- /dev/null +++ b/doc/source/bootstrap/parameter-covariance-estimation.rst @@ -0,0 +1,2 @@ +Covariance Estimation +===================== diff --git a/doc/source/bootstrap/timeseries-bootstraps.rst b/doc/source/bootstrap/timeseries-bootstraps.rst new file mode 100644 index 0000000000..221579b35a --- /dev/null +++ b/doc/source/bootstrap/timeseries-bootstraps.rst @@ -0,0 +1,29 @@ +.. py:currentmodule:: arch.bootstrap + +Time-series Bootstraps +---------------------- +Bootstraps for time-series data come in a variety of forms. The three contained +in this package are the stationary boostrap, which uses blocks with an +exponentially distributed lengths, the circular block bootstrap, which uses +fixed length blocks, and the moving block bootstrap which also uses fixed +length blocks. The moving block bootstrap does *not* wrap around and so +observations near the start or end of the series will be systematically +under-sampled. It is not recommended for this reason. + +The Stationary Bootstrap +======================== + +.. autoclass:: StationaryBootstrap + :members: conf_int, cov, apply, bootstrap, reset, seed, set_state, get_state + +The Circular Block Bootstrap +============================ + +.. autoclass:: CircularBlockBootstrap + :members: conf_int, cov, apply, bootstrap, reset, seed, set_state, get_state + +The Moving Block Bootstrap +========================== + +.. autoclass:: MovingBlockBootstrap + :members: conf_int, cov, apply, bootstrap, reset, seed, set_state, get_state diff --git a/doc/source/changes.rst b/doc/source/changes.rst new file mode 100644 index 0000000000..0dfe070c29 --- /dev/null +++ b/doc/source/changes.rst @@ -0,0 +1 @@ +.. include:: changes/1.0.txt diff --git a/doc/source/changes/1.0.txt b/doc/source/changes/1.0.txt new file mode 100644 index 0000000000..3088686182 --- /dev/null +++ b/doc/source/changes/1.0.txt @@ -0,0 +1,11 @@ +Changes since 1.0 +----------------- + +- Refactored to move the univariate routines to `arch.univariate` and added + deprecation warnings in the old locations +- Enable `numba` jit compilation in the python recursions +- Added a boostrap framework, which will be used in future versions. + The boostrap framework is general purpose and can be used via high-level + functions such as `conf_int` or `cov`, or as a low level iterator using + `bootstrap` + diff --git a/doc/source/conf.py b/doc/source/conf.py index dab562cf5a..9d41ecbc02 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -77,6 +77,14 @@ def __getattr__(cls, name): 'sphinx.ext.doctest', 'sphinx.ext.todo', 'sphinx.ext.mathjax', + 'ipython_sphinxext.ipython_directive', + 'ipython_sphinxext.ipython_console_highlighting', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.pngmath', + 'sphinx.ext.ifconfig', + 'matplotlib.sphinxext.only_directives', + 'matplotlib.sphinxext.plot_directive', ] # Add any paths that contain templates here, relative to this directory. @@ -315,3 +323,22 @@ def __getattr__(cls, name): # If true, do not generate a @detailmenu in the "Top" node's menu. #texinfo_no_detailmenu = False + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + 'statsmodels': ('http://statsmodels.sourceforge.net/devel/', None), + 'matplotlib': ('http://matplotlib.org/', None), + 'python': ('http://docs.python.org/', None), + 'numpy': ('http://docs.scipy.org/doc/numpy', None), + 'pandas': ('FIXME', None) +} + +ipython_exec_lines = [ + 'import numpy as np', + 'import pandas as pd', + # This ensures correct rendering on system with console encoding != utf8 + # (windows). It forces pandas to encode its output reprs using utf8 + # whereever the docs are built. The docs' target is the browser, not + # the console, so this is fine. + 'pd.options.display.encoding="utf8"' + ] \ No newline at end of file diff --git a/doc/source/index.rst b/doc/source/index.rst index 86009a479f..1df81bcdf9 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -1,3 +1,8 @@ +ARCH +==== +The ARCH toolbox currently contains routines for univariate +volatility models and bootstrapping. + ARCH Models =========== ARCH models are a popular class of volatility models that use observed values @@ -54,10 +59,9 @@ In either case, model parameters are estimated using .. toctree:: :maxdepth: 1 - Examples - Mean Models - Volatility Processes - Distributions + Univariate Models + Bootstrapping + Change Log Model Constructor diff --git a/doc/source/background.rst b/doc/source/univariate/background.rst similarity index 100% rename from doc/source/background.rst rename to doc/source/univariate/background.rst diff --git a/doc/source/distribution.rst b/doc/source/univariate/distribution.rst similarity index 100% rename from doc/source/distribution.rst rename to doc/source/univariate/distribution.rst diff --git a/doc/source/mean.rst b/doc/source/univariate/mean.rst similarity index 100% rename from doc/source/mean.rst rename to doc/source/univariate/mean.rst diff --git a/doc/source/univariate/univariate.rst b/doc/source/univariate/univariate.rst new file mode 100644 index 0000000000..f618967b21 --- /dev/null +++ b/doc/source/univariate/univariate.rst @@ -0,0 +1,11 @@ +Univariate Volatility Models +---------------------------- + +.. toctree:: + :maxdepth: 1 + + Examples + Mean Models + Volatility Processes + Distributions + Background and References diff --git a/doc/source/volatility.rst b/doc/source/univariate/volatility.rst similarity index 100% rename from doc/source/volatility.rst rename to doc/source/univariate/volatility.rst diff --git a/examples/examples.ipynb b/examples/univariate_volatility_modeling.ipynb similarity index 100% rename from examples/examples.ipynb rename to examples/univariate_volatility_modeling.ipynb diff --git a/setup.py b/setup.py index 05142dc4aa..cea40ce6ef 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ import re from distutils.version import StrictVersion -from setuptools import setup, Extension +from setuptools import setup, Extension, find_packages from setuptools.dist import Distribution from Cython.Distutils import build_ext @@ -21,6 +21,8 @@ if not '--no-binary' in sys.argv: ext_modules.append(Extension("arch.univariate.recursions", ["./arch/univariate/recursions.pyx"])) + ext_modules.append(Extension("arch.bootstrap._samplers", + ["./arch/bootstrap/_samplers.pyx"])) else: sys.argv.remove('--no-binary') @@ -110,28 +112,39 @@ def strip_rc(version): try: from IPython.nbformat import current as nbformat from IPython.nbconvert import RSTExporter - - f = open(os.path.join(cwd, 'examples', 'examples.ipynb'), 'rt') - example_nb = f.read() - f.close() - - example_nb = nbformat.reads_json(example_nb) - rst_export = RSTExporter() - (body, resources) = rst_export.from_notebook_node(example_nb) - f = open(os.path.join(cwd, 'doc', 'source', 'examples.rst'), 'wt') - f.write(body) - f.close() - for key in resources['outputs'].keys(): - if key.endswith('.png'): - f = open(os.path.join(cwd, 'doc', 'source', key), 'wb') - f.write(resources['outputs'][key]) - f.close() + import glob + notebooks = glob.glob(os.path.join(cwd, 'examples', '*.ipynb')) + for notebook in notebooks: + f = open(notebook, 'rt') + example_nb = f.read() + f.close() + + example_nb = nbformat.reads_json(example_nb) + rst_export = RSTExporter() + (body, resources) = rst_export.from_notebook_node(example_nb) + rst_path = os.path.join(cwd, 'doc', 'source') + path_parts = os.path.split(notebook) + nb_filename = path_parts[-1] + nb_filename = nb_filename.split('.')[0] + source_dir = nb_filename.split('_')[0] + rst_filename = os.path.join(cwd, 'doc', 'source', + source_dir, nb_filename + '.rst') + f = open(rst_filename, 'wt') + f.write(body) + f.close() + for key in resources['outputs'].keys(): + if key.endswith('.png'): + resource_filename = os.path.join(cwd, 'doc', 'source', + source_dir, key) + f = open(resource_filename, 'wb') + f.write(resources['outputs'][key]) + f.close() except: import warnings warnings.warn('Unable to convert examples.ipynb to examples.rst. This only' - 'affects documentation generation and not operation of the ' - 'module.') + 'affects documentation generation and not the operation of the' + ' module.') # Read version information from plain VERSION file version = None @@ -153,8 +166,7 @@ def strip_rc(version): author='Kevin Sheppard', author_email='kevin.sheppard@economics.ox.ac.uk', url='http://github.com/bashtage/arch', - packages=['arch', 'arch.tests', 'arch.compat', - 'arch.univariate', 'arch.univariate.tests'], + packages=find_packages(), ext_modules=ext_modules, package_dir={'arch': './arch'}, cmdclass={'build_ext': build_ext},