Skip to content

Commit

Permalink
BUG: Fix bug in multistep simulation forecast
Browse files Browse the repository at this point in the history
Correct but when using exog variables in multistep simulation-based forecasts
by correcting the index laction

closes #551
  • Loading branch information
bashtage committed Feb 4, 2022
1 parent 98ddb9a commit 4e25259
Show file tree
Hide file tree
Showing 33 changed files with 248 additions and 151 deletions.
16 changes: 8 additions & 8 deletions arch/bootstrap/base.py
Expand Up @@ -118,10 +118,10 @@ def _single_optimal_block(x: Float64Array) -> Tuple[float, float]:
lam = 1 if k / m <= 1 / 2 else 2 * (1 - k / m)
g += 2 * lam * k * acv[k]
lr_acv += 2 * lam * acv[k]
d_sb = 2 * lr_acv ** 2
d_cb = 4 / 3 * lr_acv ** 2
b_sb = ((2 * g ** 2) / d_sb) ** (1 / 3) * nobs ** (1 / 3)
b_cb = ((2 * g ** 2) / d_cb) ** (1 / 3) * nobs ** (1 / 3)
d_sb = 2 * lr_acv**2
d_cb = 4 / 3 * lr_acv**2
b_sb = ((2 * g**2) / d_sb) ** (1 / 3) * nobs ** (1 / 3)
b_cb = ((2 * g**2) / d_cb) ** (1 / 3) * nobs ** (1 / 3)
b_sb = min(b_sb, b_max)
b_cb = min(b_cb, b_max)
return b_sb, b_cb
Expand Down Expand Up @@ -222,8 +222,8 @@ def _get_acceleration(jk_params: Float64Array) -> float:
Value of the acceleration parameter "a" used in the BCa bootstrap.
"""
u = jk_params.mean() - jk_params
numer = np.sum(u ** 3, 0)
denom = 6 * (np.sum(u ** 2, 0) ** (3.0 / 2.0))
numer = np.sum(u**3, 0)
denom = 6 * (np.sum(u**2, 0) ** (3.0 / 2.0))
small = denom < (np.abs(numer) * np.finfo(np.float64).eps)
if small.any():
message = "Jackknife variance estimate {jk_var} is too small to use BCa"
Expand Down Expand Up @@ -1100,7 +1100,7 @@ def _construct_bootstrap_estimates(
studentized_results[count] = (results[count] - base) / std_err
elif studentize_reps > 0:
# Set the seed to ensure reproducibility
seed = _get_random_integers(self._generator, 2 ** 31 - 1, size=1)
seed = _get_random_integers(self._generator, 2**31 - 1, size=1)
# Need new bootstrap of same type
nested_bs = self.clone(*pos_data, **kw_data, seed=seed[0])
cov = nested_bs.cov(func, studentize_reps, extra_kwargs=extra_kwargs)
Expand Down Expand Up @@ -1275,7 +1275,7 @@ def var(
else:
errors = results - base

return (errors ** 2).sum(0) / reps
return (errors**2).sum(0) / reps

def update_indices(self) -> BootstrapIndexT:
"""
Expand Down
6 changes: 3 additions & 3 deletions arch/bootstrap/multiple_comparison.py
Expand Up @@ -223,7 +223,7 @@ def _compute_r(self) -> None:
bootstrapped_mean_losses[j] = mean_losses_star - mean_losses_star.T
# Recenter
bootstrapped_mean_losses -= loss_diffs
variances = (bootstrapped_mean_losses ** 2).mean(0)
variances = (bootstrapped_mean_losses**2).mean(0)
variances += np.eye(self.k) # Prevent division by 0
self._variances = variances
# Standardize everything
Expand Down Expand Up @@ -283,7 +283,7 @@ def _compute_max(self) -> None:
incl_bs_grand_loss = incl_bs_avg_loss_err.mean(1)
# Reshape for broadcast
incl_bs_avg_loss_err -= incl_bs_grand_loss[:, None]
std_devs = np.sqrt((incl_bs_avg_loss_err ** 2).mean(0))
std_devs = np.sqrt((incl_bs_avg_loss_err**2).mean(0))
simulated_test_stat = incl_bs_avg_loss_err / std_devs
simulated_test_stat = np.max(simulated_test_stat, 1)
loss_diffs = incl_losses.mean(0)
Expand Down Expand Up @@ -696,7 +696,7 @@ def _compute_variance(self) -> None:
else:
t = self.t
p = 1.0 / self.block_size
variances = np.sum(demeaned ** 2, 0) / t
variances = np.sum(demeaned**2, 0) / t
for i in range(1, t):
kappa = ((1.0 - (i / t)) * ((1 - p) ** i)) + (
(i / t) * ((1 - p) ** (t - i))
Expand Down
8 changes: 4 additions & 4 deletions arch/covariance/kernel.py
Expand Up @@ -323,7 +323,7 @@ def _alpha_q(self) -> float:
sig_j = float(np.squeeze(v[j:].T @ v[: (nobs - j)])) / nobs
scale = 1 + (j != 0)
f_0s += scale * sig_j
f_qs += scale * j ** q * sig_j
f_qs += scale * j**q * sig_j
return (f_qs / f_0s) ** 2

@cached_property
Expand Down Expand Up @@ -496,7 +496,7 @@ def rate(self) -> float:
def _weights(self) -> Float64Array:
bw = self.bandwidth
x = np.arange(int(bw + 1), dtype="double") / (bw + 1)
return 1 - x ** 2
return 1 - x**2


_parzen_geometric_formula = """\
Expand Down Expand Up @@ -552,7 +552,7 @@ def rate(self) -> float:
def _weights(self) -> Float64Array:
bw = self.bandwidth
x = np.arange(int(bw + 1), dtype="double") / (bw + 1)
return 1 / (1 + x ** 2)
return 1 / (1 + x**2)


_tukey_hamming_formula = """\
Expand Down Expand Up @@ -670,7 +670,7 @@ def _weights(self) -> Float64Array:
if bw > 0:
x = np.arange(1, nobs) / bw
z = 6 * np.pi * x / 5
w[1:] = 3 / z ** 2 * (np.sin(z) / z - np.cos(z))
w[1:] = 3 / z**2 * (np.sin(z) / z - np.cos(z))
return w


Expand Down
12 changes: 6 additions & 6 deletions arch/tests/bootstrap/test_bootstrap.py
Expand Up @@ -346,7 +346,7 @@ def test_studentized(bs_setup, seed):

def std_err_func(mu, y):
errors = y - mu
var = (errors ** 2.0).mean(axis=0)
var = (errors**2.0).mean(axis=0)
return np.sqrt(var / y.shape[0])

ci = bs.conf_int(
Expand All @@ -369,7 +369,7 @@ def std_err_func(mu, y):
assert_allclose(results, bs._results)
assert_allclose(stud_results, bs._studentized_results)
errors = results - results.mean(0)
std_err = np.sqrt(np.mean(errors ** 2.0, axis=0))
std_err = np.sqrt(np.mean(errors**2.0, axis=0))
ci_direct = np.zeros((2, 2))
for i in range(2):
ci_direct[0, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 97.5)
Expand All @@ -389,9 +389,9 @@ def std_err_func(mu, y):
for pos, _ in bs.bootstrap(reps=num_bootstrap):
results[count] = bs_setup.func(*pos)
if isinstance(bs._generator, RandomState):
seed = bs._generator.randint(2 ** 31 - 1)
seed = bs._generator.randint(2**31 - 1)
else:
seed = bs._generator.integers(2 ** 31 - 1)
seed = bs._generator.integers(2**31 - 1)
inner_bs = IIDBootstrap(*pos, seed=seed)
cov = inner_bs.cov(bs_setup.func, reps=50)
std_err = np.sqrt(np.diag(cov))
Expand All @@ -401,7 +401,7 @@ def std_err_func(mu, y):
assert_allclose(results, bs._results)
assert_allclose(stud_results, bs._studentized_results)
errors = results - results.mean(0)
std_err = np.sqrt(np.mean(errors ** 2.0, axis=0))
std_err = np.sqrt(np.mean(errors**2.0, axis=0))

ci_direct = np.zeros((2, 2))
for i in range(2):
Expand Down Expand Up @@ -579,7 +579,7 @@ def test_bca(bs_setup):
u = jk.mean() - jk
u2 = np.sum(u * u, 0)
u3 = np.sum(u * u * u, 0)
a = u3 / (6.0 * (u2 ** 1.5))
a = u3 / (6.0 * (u2**1.5))
a = a[:, None]
percentiles = 100 * stats.norm.cdf(b + (b + q) / (1 - a * (b + q)))

Expand Down
6 changes: 3 additions & 3 deletions arch/tests/bootstrap/test_multiple_comparison.py
Expand Up @@ -74,7 +74,7 @@ def test_variances_and_selection(spa_data):
kernel_weights[i] = ((1.0 - (i / t)) * ((1 - p) ** i)) + (
(i / t) * ((1 - p) ** (t - i))
)
direct_vars = (demeaned ** 2).sum(0) / t
direct_vars = (demeaned**2).sum(0) / t
for i in range(1, t):
direct_vars += (
2 * kernel_weights[i] * (demeaned[: t - i, :] * demeaned[i:, :]).sum(0) / t
Expand Down Expand Up @@ -344,7 +344,7 @@ def r_step(losses, indices):
for n in range(b):
# Compute bootstrapped versions
bs_diffs[n] = loss_diffs_vec[indices[n]].mean()
variances[j, i] = variances[i, j] = (bs_diffs ** 2).mean()
variances[j, i] = variances[i, j] = (bs_diffs**2).mean()
std_diffs = np.abs(bs_diffs) / np.sqrt(variances[i, j])
stat_candidates.append(std_diffs)
stat_candidates = np.array(stat_candidates).T
Expand Down Expand Up @@ -388,7 +388,7 @@ def max_step(losses, indices):
# Compute bootstrapped versions
bs_loss_errors = loss_errors[indices[n]]
stats[n] = bs_loss_errors.mean(0) - bs_loss_errors.mean()
variances = (stats ** 2).mean(0)
variances = (stats**2).mean(0)
std_devs = np.sqrt(variances)
stat_dist = np.max(stats / std_devs, 1)

Expand Down
2 changes: 1 addition & 1 deletion arch/tests/unitroot/test_dynamic_ols.py
Expand Up @@ -180,7 +180,7 @@ def test_kernels_eviews(trivariate_data, config):
lrvar = config[2]
bw = 9 if kernel == "parzen" else 10
res = DynamicOLS(y, x).fit(kernel=kernel, bandwidth=bw, df_adjust=True)
assert_allclose(res.residual_variance, ser ** 2.0, rtol=1e-5)
assert_allclose(res.residual_variance, ser**2.0, rtol=1e-5)
assert_allclose(res.long_run_variance, lrvar, rtol=1e-5)


Expand Down
93 changes: 91 additions & 2 deletions arch/tests/univariate/test_forecast.py
Expand Up @@ -284,7 +284,7 @@ def test_ar2_forecast(self):
for i in range(5):
expected[i] = a[0, 0]
a = a.dot(comp)
expected = res.params.iloc[-1] * np.cumsum(expected ** 2)
expected = res.params.iloc[-1] * np.cumsum(expected**2)
assert_allclose(fcast.variance.iloc[-1], expected)

expected = np.empty((1000, 5))
Expand Down Expand Up @@ -351,7 +351,7 @@ def test_har_forecast(self):
assert_allclose(fcast_66.residual_variance, expected[21:])

impulse = _ar_to_impulse(66, arp)
expected = expected * np.cumsum(impulse ** 2)
expected = expected * np.cumsum(impulse**2)
assert_allclose(fcast_66.variance, expected[21:])

def test_forecast_start_alternatives(self):
Expand Down Expand Up @@ -912,3 +912,92 @@ def test_forecast_ar0(constant, lags):
)
assert forecasts.mean.shape == (1, 10)
assert forecasts.simulations.values.shape == (1, 100, 10)


def test_simulation_exog():
# GH 551
burn = 250
from arch.univariate import Normal

rs = np.random.RandomState(3382983)
normal = Normal(seed=rs)
x_mod = ARX(None, lags=1, distribution=normal)
x0 = x_mod.simulate([1, 0.8, 1], nobs=1000 + burn).data
x1 = x_mod.simulate([2.5, 0.5, 1], nobs=1000 + burn).data

rs = np.random.RandomState(33829831)
normal = Normal(seed=rs)
resid_mod = ZeroMean(volatility=GARCH(), distribution=normal)
resids = resid_mod.simulate([0.1, 0.1, 0.8], nobs=1000 + burn).data

phi1 = 0.7
phi0 = 3
y = 10 + resids.copy()
for i in range(1, y.shape[0]):
y[i] = phi0 + phi1 * y[i - 1] + 2 * x0[i] - 2 * x1[i] + resids[i]

x0 = x0.iloc[-1000:]
x1 = x1.iloc[-1000:]
y = y.iloc[-1000:]
y.index = x0.index = x1.index = np.arange(1000)

x0_oos = np.empty((1000, 10))
x1_oos = np.empty((1000, 10))
for i in range(10):
if i == 0:
last = x0
else:
last = x0_oos[:, i - 1]
x0_oos[:, i] = 1 + 0.8 * last
if i == 0:
last = x1
else:
last = x1_oos[:, i - 1]
x1_oos[:, i] = 2.5 + 0.5 * last

exog = pd.DataFrame({"x0": x0, "x1": x1})
mod = arch_model(y, x=exog, mean="ARX", lags=0)
res = mod.fit(disp="off")

nforecast = 3

# DECOMMENT ONE OF THE FOLLOWING LINES
exog_fcast = {
"x0": np.zeros_like(x0_oos[-nforecast:]),
"x1": np.zeros_like(x1_oos[-nforecast:]),
}
forecasts = res.forecast(
horizon=10,
x=exog_fcast,
start=1000 - nforecast,
method="simulation",
reindex=False,
)

exog_fcast = {"x0": np.zeros_like(x0_oos), "x1": np.zeros_like(x0_oos)}
forecasts_alt = res.forecast(
horizon=10,
x=exog_fcast,
start=1000 - nforecast,
method="simulation",
reindex=False,
)
assert_allclose(forecasts.mean, forecasts_alt.mean)

exog_fcast = {
"x0": 10 + np.zeros_like(x0_oos[-nforecast:]),
"x1": 10 + np.zeros_like(x1_oos[-nforecast:]),
} # case with shape (nforecast, horizon)
# exog_fcast = {"x0": x0_oos, "x1": x1_oos} # case with shape (nobs, horizon)

forecasts_10 = res.forecast(
horizon=10,
x=exog_fcast,
start=1000 - nforecast,
method="simulation",
reindex=False,
)

delta = forecasts_10.mean - forecasts.mean
expected = (10 * res.params[["x0", "x1"]]).sum() + np.zeros_like(delta)
assert_allclose(delta, expected)
4 changes: 2 additions & 2 deletions arch/tests/univariate/test_mean.py
Expand Up @@ -166,7 +166,7 @@ def test_zero_mean(self):
assert isinstance(zm.distribution, Normal)
assert zm.lags is None
res = zm.fit(disp=DISPLAY)
assert_almost_equal(res.params, np.array([np.mean(self.y ** 2)]))
assert_almost_equal(res.params, np.array([np.mean(self.y**2)]))

forecasts = res.forecast(horizon=99, reindex=False)
direct = pd.DataFrame(
Expand Down Expand Up @@ -1131,7 +1131,7 @@ def test_arch_lm(simulated_data):
assert "H0: Standardized" not in wald.__repr__()
assert "heteroskedastic" in wald.__repr__()

resids2 = pd.Series(res.resid ** 2)
resids2 = pd.Series(res.resid**2)
data = [resids2.shift(i) for i in range(df + 1)]
data = pd.concat(data, axis=1).dropna()
lhs = data.iloc[:, 0]
Expand Down
4 changes: 2 additions & 2 deletions arch/tests/univariate/test_moment.py
Expand Up @@ -44,7 +44,7 @@ def test_moment(dist, params):
# verify moments that exist
def f(x, n):
sigma2 = ones_like(x)
return (x ** n) * exp(dist.loglikelihood(params, x, sigma2, True))
return (x**n) * exp(dist.loglikelihood(params, x, sigma2, True))

for n in range(6): # moments 0-5

Expand All @@ -60,7 +60,7 @@ def f(x, n):
eta, lam = params
c = gammaln((eta + 1) / 2) - gammaln(eta / 2) - log(pi * (eta - 2)) / 2
a = 4 * lam * exp(c) * (eta - 2) / (eta - 1)
b = (1 + 3 * lam ** 2 - a ** 2) ** 0.5
b = (1 + 3 * lam**2 - a**2) ** 0.5
loc = -a / b
if z < loc:
m_quad = quad(f, -inf, z, args=(n,))[0]
Expand Down

0 comments on commit 4e25259

Please sign in to comment.