BUG: Fix bug in multistep simulation forecast

Correct but when using exog variables in multistep simulation-based forecasts by correcting the index laction closes #551
bashtage · Feb 4, 2022 · 4e25259 · 4e25259
1 parent 98ddb9a
commit 4e25259
Show file tree

Hide file tree

Showing 33 changed files with 248 additions and 151 deletions.
diff --git a/arch/bootstrap/base.py b/arch/bootstrap/base.py
@@ -118,10 +118,10 @@ def _single_optimal_block(x: Float64Array) -> Tuple[float, float]:
         lam = 1 if k / m <= 1 / 2 else 2 * (1 - k / m)
         g += 2 * lam * k * acv[k]
         lr_acv += 2 * lam * acv[k]
-    d_sb = 2 * lr_acv ** 2
-    d_cb = 4 / 3 * lr_acv ** 2
-    b_sb = ((2 * g ** 2) / d_sb) ** (1 / 3) * nobs ** (1 / 3)
-    b_cb = ((2 * g ** 2) / d_cb) ** (1 / 3) * nobs ** (1 / 3)
+    d_sb = 2 * lr_acv**2
+    d_cb = 4 / 3 * lr_acv**2
+    b_sb = ((2 * g**2) / d_sb) ** (1 / 3) * nobs ** (1 / 3)
+    b_cb = ((2 * g**2) / d_cb) ** (1 / 3) * nobs ** (1 / 3)
     b_sb = min(b_sb, b_max)
     b_cb = min(b_cb, b_max)
     return b_sb, b_cb
@@ -222,8 +222,8 @@ def _get_acceleration(jk_params: Float64Array) -> float:
         Value of the acceleration parameter "a" used in the BCa bootstrap.
     """
     u = jk_params.mean() - jk_params
-    numer = np.sum(u ** 3, 0)
-    denom = 6 * (np.sum(u ** 2, 0) ** (3.0 / 2.0))
+    numer = np.sum(u**3, 0)
+    denom = 6 * (np.sum(u**2, 0) ** (3.0 / 2.0))
     small = denom < (np.abs(numer) * np.finfo(np.float64).eps)
     if small.any():
         message = "Jackknife variance estimate {jk_var} is too small to use BCa"
@@ -1100,7 +1100,7 @@ def _construct_bootstrap_estimates(
                 studentized_results[count] = (results[count] - base) / std_err
             elif studentize_reps > 0:
                 # Set the seed to ensure reproducibility
-                seed = _get_random_integers(self._generator, 2 ** 31 - 1, size=1)
+                seed = _get_random_integers(self._generator, 2**31 - 1, size=1)
                 # Need new bootstrap of same type
                 nested_bs = self.clone(*pos_data, **kw_data, seed=seed[0])
                 cov = nested_bs.cov(func, studentize_reps, extra_kwargs=extra_kwargs)
@@ -1275,7 +1275,7 @@ def var(
         else:
             errors = results - base
 
-        return (errors ** 2).sum(0) / reps
+        return (errors**2).sum(0) / reps
 
     def update_indices(self) -> BootstrapIndexT:
         """

diff --git a/arch/bootstrap/multiple_comparison.py b/arch/bootstrap/multiple_comparison.py
@@ -223,7 +223,7 @@ def _compute_r(self) -> None:
             bootstrapped_mean_losses[j] = mean_losses_star - mean_losses_star.T
         # Recenter
         bootstrapped_mean_losses -= loss_diffs
-        variances = (bootstrapped_mean_losses ** 2).mean(0)
+        variances = (bootstrapped_mean_losses**2).mean(0)
         variances += np.eye(self.k)  # Prevent division by 0
         self._variances = variances
         # Standardize everything
@@ -283,7 +283,7 @@ def _compute_max(self) -> None:
             incl_bs_grand_loss = incl_bs_avg_loss_err.mean(1)
             # Reshape for broadcast
             incl_bs_avg_loss_err -= incl_bs_grand_loss[:, None]
-            std_devs = np.sqrt((incl_bs_avg_loss_err ** 2).mean(0))
+            std_devs = np.sqrt((incl_bs_avg_loss_err**2).mean(0))
             simulated_test_stat = incl_bs_avg_loss_err / std_devs
             simulated_test_stat = np.max(simulated_test_stat, 1)
             loss_diffs = incl_losses.mean(0)
@@ -696,7 +696,7 @@ def _compute_variance(self) -> None:
         else:
             t = self.t
             p = 1.0 / self.block_size
-            variances = np.sum(demeaned ** 2, 0) / t
+            variances = np.sum(demeaned**2, 0) / t
             for i in range(1, t):
                 kappa = ((1.0 - (i / t)) * ((1 - p) ** i)) + (
                     (i / t) * ((1 - p) ** (t - i))

diff --git a/arch/covariance/kernel.py b/arch/covariance/kernel.py
@@ -323,7 +323,7 @@ def _alpha_q(self) -> float:
             sig_j = float(np.squeeze(v[j:].T @ v[: (nobs - j)])) / nobs
             scale = 1 + (j != 0)
             f_0s += scale * sig_j
-            f_qs += scale * j ** q * sig_j
+            f_qs += scale * j**q * sig_j
         return (f_qs / f_0s) ** 2
 
     @cached_property
@@ -496,7 +496,7 @@ def rate(self) -> float:
     def _weights(self) -> Float64Array:
         bw = self.bandwidth
         x = np.arange(int(bw + 1), dtype="double") / (bw + 1)
-        return 1 - x ** 2
+        return 1 - x**2
 
 
 _parzen_geometric_formula = """\
@@ -552,7 +552,7 @@ def rate(self) -> float:
     def _weights(self) -> Float64Array:
         bw = self.bandwidth
         x = np.arange(int(bw + 1), dtype="double") / (bw + 1)
-        return 1 / (1 + x ** 2)
+        return 1 / (1 + x**2)
 
 
 _tukey_hamming_formula = """\
@@ -670,7 +670,7 @@ def _weights(self) -> Float64Array:
         if bw > 0:
             x = np.arange(1, nobs) / bw
             z = 6 * np.pi * x / 5
-            w[1:] = 3 / z ** 2 * (np.sin(z) / z - np.cos(z))
+            w[1:] = 3 / z**2 * (np.sin(z) / z - np.cos(z))
         return w
 
 

diff --git a/arch/tests/bootstrap/test_bootstrap.py b/arch/tests/bootstrap/test_bootstrap.py
@@ -346,7 +346,7 @@ def test_studentized(bs_setup, seed):
 
     def std_err_func(mu, y):
         errors = y - mu
-        var = (errors ** 2.0).mean(axis=0)
+        var = (errors**2.0).mean(axis=0)
         return np.sqrt(var / y.shape[0])
 
     ci = bs.conf_int(
@@ -369,7 +369,7 @@ def std_err_func(mu, y):
     assert_allclose(results, bs._results)
     assert_allclose(stud_results, bs._studentized_results)
     errors = results - results.mean(0)
-    std_err = np.sqrt(np.mean(errors ** 2.0, axis=0))
+    std_err = np.sqrt(np.mean(errors**2.0, axis=0))
     ci_direct = np.zeros((2, 2))
     for i in range(2):
         ci_direct[0, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 97.5)
@@ -389,9 +389,9 @@ def std_err_func(mu, y):
     for pos, _ in bs.bootstrap(reps=num_bootstrap):
         results[count] = bs_setup.func(*pos)
         if isinstance(bs._generator, RandomState):
-            seed = bs._generator.randint(2 ** 31 - 1)
+            seed = bs._generator.randint(2**31 - 1)
         else:
-            seed = bs._generator.integers(2 ** 31 - 1)
+            seed = bs._generator.integers(2**31 - 1)
         inner_bs = IIDBootstrap(*pos, seed=seed)
         cov = inner_bs.cov(bs_setup.func, reps=50)
         std_err = np.sqrt(np.diag(cov))
@@ -401,7 +401,7 @@ def std_err_func(mu, y):
     assert_allclose(results, bs._results)
     assert_allclose(stud_results, bs._studentized_results)
     errors = results - results.mean(0)
-    std_err = np.sqrt(np.mean(errors ** 2.0, axis=0))
+    std_err = np.sqrt(np.mean(errors**2.0, axis=0))
 
     ci_direct = np.zeros((2, 2))
     for i in range(2):
@@ -579,7 +579,7 @@ def test_bca(bs_setup):
     u = jk.mean() - jk
     u2 = np.sum(u * u, 0)
     u3 = np.sum(u * u * u, 0)
-    a = u3 / (6.0 * (u2 ** 1.5))
+    a = u3 / (6.0 * (u2**1.5))
     a = a[:, None]
     percentiles = 100 * stats.norm.cdf(b + (b + q) / (1 - a * (b + q)))
 

diff --git a/arch/tests/bootstrap/test_multiple_comparison.py b/arch/tests/bootstrap/test_multiple_comparison.py
@@ -74,7 +74,7 @@ def test_variances_and_selection(spa_data):
         kernel_weights[i] = ((1.0 - (i / t)) * ((1 - p) ** i)) + (
             (i / t) * ((1 - p) ** (t - i))
         )
-    direct_vars = (demeaned ** 2).sum(0) / t
+    direct_vars = (demeaned**2).sum(0) / t
     for i in range(1, t):
         direct_vars += (
             2 * kernel_weights[i] * (demeaned[: t - i, :] * demeaned[i:, :]).sum(0) / t
@@ -344,7 +344,7 @@ def r_step(losses, indices):
                     for n in range(b):
                         # Compute bootstrapped versions
                         bs_diffs[n] = loss_diffs_vec[indices[n]].mean()
-                    variances[j, i] = variances[i, j] = (bs_diffs ** 2).mean()
+                    variances[j, i] = variances[i, j] = (bs_diffs**2).mean()
                     std_diffs = np.abs(bs_diffs) / np.sqrt(variances[i, j])
                     stat_candidates.append(std_diffs)
             stat_candidates = np.array(stat_candidates).T
@@ -388,7 +388,7 @@ def max_step(losses, indices):
                 # Compute bootstrapped versions
                 bs_loss_errors = loss_errors[indices[n]]
                 stats[n] = bs_loss_errors.mean(0) - bs_loss_errors.mean()
-            variances = (stats ** 2).mean(0)
+            variances = (stats**2).mean(0)
             std_devs = np.sqrt(variances)
             stat_dist = np.max(stats / std_devs, 1)
 

diff --git a/arch/tests/unitroot/test_dynamic_ols.py b/arch/tests/unitroot/test_dynamic_ols.py
@@ -180,7 +180,7 @@ def test_kernels_eviews(trivariate_data, config):
     lrvar = config[2]
     bw = 9 if kernel == "parzen" else 10
     res = DynamicOLS(y, x).fit(kernel=kernel, bandwidth=bw, df_adjust=True)
-    assert_allclose(res.residual_variance, ser ** 2.0, rtol=1e-5)
+    assert_allclose(res.residual_variance, ser**2.0, rtol=1e-5)
     assert_allclose(res.long_run_variance, lrvar, rtol=1e-5)
 
 

diff --git a/arch/tests/univariate/test_forecast.py b/arch/tests/univariate/test_forecast.py
@@ -284,7 +284,7 @@ def test_ar2_forecast(self):
         for i in range(5):
             expected[i] = a[0, 0]
             a = a.dot(comp)
-        expected = res.params.iloc[-1] * np.cumsum(expected ** 2)
+        expected = res.params.iloc[-1] * np.cumsum(expected**2)
         assert_allclose(fcast.variance.iloc[-1], expected)
 
         expected = np.empty((1000, 5))
@@ -351,7 +351,7 @@ def test_har_forecast(self):
         assert_allclose(fcast_66.residual_variance, expected[21:])
 
         impulse = _ar_to_impulse(66, arp)
-        expected = expected * np.cumsum(impulse ** 2)
+        expected = expected * np.cumsum(impulse**2)
         assert_allclose(fcast_66.variance, expected[21:])
 
     def test_forecast_start_alternatives(self):
@@ -912,3 +912,92 @@ def test_forecast_ar0(constant, lags):
     )
     assert forecasts.mean.shape == (1, 10)
     assert forecasts.simulations.values.shape == (1, 100, 10)
+
+
+def test_simulation_exog():
+    # GH 551
+    burn = 250
+    from arch.univariate import Normal
+
+    rs = np.random.RandomState(3382983)
+    normal = Normal(seed=rs)
+    x_mod = ARX(None, lags=1, distribution=normal)
+    x0 = x_mod.simulate([1, 0.8, 1], nobs=1000 + burn).data
+    x1 = x_mod.simulate([2.5, 0.5, 1], nobs=1000 + burn).data
+
+    rs = np.random.RandomState(33829831)
+    normal = Normal(seed=rs)
+    resid_mod = ZeroMean(volatility=GARCH(), distribution=normal)
+    resids = resid_mod.simulate([0.1, 0.1, 0.8], nobs=1000 + burn).data
+
+    phi1 = 0.7
+    phi0 = 3
+    y = 10 + resids.copy()
+    for i in range(1, y.shape[0]):
+        y[i] = phi0 + phi1 * y[i - 1] + 2 * x0[i] - 2 * x1[i] + resids[i]
+
+    x0 = x0.iloc[-1000:]
+    x1 = x1.iloc[-1000:]
+    y = y.iloc[-1000:]
+    y.index = x0.index = x1.index = np.arange(1000)
+
+    x0_oos = np.empty((1000, 10))
+    x1_oos = np.empty((1000, 10))
+    for i in range(10):
+        if i == 0:
+            last = x0
+        else:
+            last = x0_oos[:, i - 1]
+        x0_oos[:, i] = 1 + 0.8 * last
+        if i == 0:
+            last = x1
+        else:
+            last = x1_oos[:, i - 1]
+        x1_oos[:, i] = 2.5 + 0.5 * last
+
+    exog = pd.DataFrame({"x0": x0, "x1": x1})
+    mod = arch_model(y, x=exog, mean="ARX", lags=0)
+    res = mod.fit(disp="off")
+
+    nforecast = 3
+
+    # DECOMMENT ONE OF THE FOLLOWING LINES
+    exog_fcast = {
+        "x0": np.zeros_like(x0_oos[-nforecast:]),
+        "x1": np.zeros_like(x1_oos[-nforecast:]),
+    }
+    forecasts = res.forecast(
+        horizon=10,
+        x=exog_fcast,
+        start=1000 - nforecast,
+        method="simulation",
+        reindex=False,
+    )
+
+    exog_fcast = {"x0": np.zeros_like(x0_oos), "x1": np.zeros_like(x0_oos)}
+    forecasts_alt = res.forecast(
+        horizon=10,
+        x=exog_fcast,
+        start=1000 - nforecast,
+        method="simulation",
+        reindex=False,
+    )
+    assert_allclose(forecasts.mean, forecasts_alt.mean)
+
+    exog_fcast = {
+        "x0": 10 + np.zeros_like(x0_oos[-nforecast:]),
+        "x1": 10 + np.zeros_like(x1_oos[-nforecast:]),
+    }  # case with shape (nforecast, horizon)
+    # exog_fcast = {"x0": x0_oos, "x1": x1_oos} # case with shape (nobs, horizon)
+
+    forecasts_10 = res.forecast(
+        horizon=10,
+        x=exog_fcast,
+        start=1000 - nforecast,
+        method="simulation",
+        reindex=False,
+    )
+
+    delta = forecasts_10.mean - forecasts.mean
+    expected = (10 * res.params[["x0", "x1"]]).sum() + np.zeros_like(delta)
+    assert_allclose(delta, expected)
diff --git a/arch/tests/univariate/test_mean.py b/arch/tests/univariate/test_mean.py
@@ -166,7 +166,7 @@ def test_zero_mean(self):
         assert isinstance(zm.distribution, Normal)
         assert zm.lags is None
         res = zm.fit(disp=DISPLAY)
-        assert_almost_equal(res.params, np.array([np.mean(self.y ** 2)]))
+        assert_almost_equal(res.params, np.array([np.mean(self.y**2)]))
 
         forecasts = res.forecast(horizon=99, reindex=False)
         direct = pd.DataFrame(
@@ -1131,7 +1131,7 @@ def test_arch_lm(simulated_data):
     assert "H0: Standardized" not in wald.__repr__()
     assert "heteroskedastic" in wald.__repr__()
 
-    resids2 = pd.Series(res.resid ** 2)
+    resids2 = pd.Series(res.resid**2)
     data = [resids2.shift(i) for i in range(df + 1)]
     data = pd.concat(data, axis=1).dropna()
     lhs = data.iloc[:, 0]

diff --git a/arch/tests/univariate/test_moment.py b/arch/tests/univariate/test_moment.py
@@ -44,7 +44,7 @@ def test_moment(dist, params):
     # verify moments that exist
     def f(x, n):
         sigma2 = ones_like(x)
-        return (x ** n) * exp(dist.loglikelihood(params, x, sigma2, True))
+        return (x**n) * exp(dist.loglikelihood(params, x, sigma2, True))
 
     for n in range(6):  # moments 0-5
 
@@ -60,7 +60,7 @@ def f(x, n):
                 eta, lam = params
                 c = gammaln((eta + 1) / 2) - gammaln(eta / 2) - log(pi * (eta - 2)) / 2
                 a = 4 * lam * exp(c) * (eta - 2) / (eta - 1)
-                b = (1 + 3 * lam ** 2 - a ** 2) ** 0.5
+                b = (1 + 3 * lam**2 - a**2) ** 0.5
                 loc = -a / b
                 if z < loc:
                     m_quad = quad(f, -inf, z, args=(n,))[0]