diff --git a/arch/tests/unitroot/test_unitroot.py b/arch/tests/unitroot/test_unitroot.py index 458c3c78e6..bafdf20e89 100644 --- a/arch/tests/unitroot/test_unitroot.py +++ b/arch/tests/unitroot/test_unitroot.py @@ -70,7 +70,7 @@ def test_adf_no_lags(self): assert_almost_equal(adf, -6.56880, DECIMAL_4) def test_adf_nc_no_lags(self): - adf = ADF(self.inflation, trend="nc", lags=0) + adf = ADF(self.inflation, trend="n", lags=0) assert_almost_equal(adf.stat, -3.88845, DECIMAL_4) # 16.239 @@ -117,9 +117,9 @@ def test_adf_auto_t_stat(self): adf.trend = "ctt" assert adf.stat != old_stat assert adf.trend == "ctt" - assert len(adf.valid_trends) == len(("nc", "c", "ct", "ctt")) + assert len(adf.valid_trends) == len(("n", "c", "ct", "ctt")) for d in adf.valid_trends: - assert d in ("nc", "c", "ct", "ctt") + assert d in ("n", "c", "ct", "ctt") assert adf.null_hypothesis == "The process contains a unit root." assert adf.alternative_hypothesis == "The process is weakly " "stationary." @@ -187,7 +187,7 @@ def test_dfgls_auto(self): def test_dfgls_bad_trend(self): dfgls = DFGLS(self.inflation, trend="ct", method="BIC", max_lags=3) with pytest.raises(ValueError): - dfgls.trend = "nc" + dfgls.trend = "n" assert dfgls != 0.0 @@ -270,7 +270,7 @@ def test_variance_ratio_non_robust(self): def test_variance_ratio_no_constant(self): y = self.rng.standard_normal(100) - vr = VarianceRatio(y, trend="nc", debiased=False) + vr = VarianceRatio(y, trend="n", debiased=False) dy = np.diff(y) mu = 0.0 dy2 = y[2:] - y[:-2] @@ -390,7 +390,7 @@ def test_tstat_exogenous(self): assert np.max(np.argwhere(np.abs(direct[2:]) > crit)) == sel_lag -@pytest.mark.parametrize("trend", ["nc", "c", "ct", "ctt"]) +@pytest.mark.parametrize("trend", ["n", "c", "ct", "ctt"]) def test_trends_low_memory(trend): rnd = np.random.RandomState(12345) y = np.cumsum(rnd.standard_normal(250)) @@ -404,13 +404,13 @@ def test_trends_low_memory(trend): assert_equal(adf.max_lags, 1) -@pytest.mark.parametrize("trend", ["nc", "c", "ct", "ctt"]) +@pytest.mark.parametrize("trend", ["n", "c", "ct", "ctt"]) def test_representations(trend): rnd = np.random.RandomState(12345) y = np.cumsum(rnd.standard_normal(250)) adf = ADF(y, trend=trend, max_lags=16) check = "Constant" - if trend == "nc": + if trend == "n": check = "No Trend" assert check in adf.__repr__() assert check in adf.__repr__() diff --git a/arch/unitroot/cointegration.py b/arch/unitroot/cointegration.py index 48c906eece..573231cafc 100644 --- a/arch/unitroot/cointegration.py +++ b/arch/unitroot/cointegration.py @@ -9,6 +9,7 @@ from statsmodels.tsa.tsatools import add_trend from arch.typing import ArrayLike1D, ArrayLike2D +from arch.unitroot.critical_values.engle_granger import EngleGrangerCV from arch.utility.array import ensure1d, ensure2d @@ -137,8 +138,6 @@ def result(self): def _cross_section(y, x, trend): if trend not in ("n", "c", "ct", "t"): raise ValueError('trend must be one of "n", "c", "ct" or "t"') - y = np.asarray(ensure1d(y, "x", False)) - x = np.asarray(ensure2d(x, "x")) x = add_trend(x, trend) res = OLS(y, x).fit() return res.resid @@ -154,11 +153,17 @@ def engle_granger( method: str = "aic", df_adjust: Union[bool, int] = True, ): + + y = np.asarray(ensure1d(y, "x", False)) + x = np.asarray(ensure2d(x, "x")) resid = _cross_section(y, x, trend) from arch.unitroot.unitroot import ADF - ADF(resid, lags, trend="n", max_lags=max_lags, method=method) - pass + adf = ADF(resid, lags, trend="n", max_lags=max_lags, method=method) + # TODO: pvalue, crit val method need to be better + eg_cv = EngleGrangerCV() + cv = pd.Series({p: eg_cv[trend, p, x.shape[1] + 1] for p in (10, 5, 1)}) + return CointegrationTestResult(adf.stat, adf.pvalue, cv, "Engle-Granger Test") def phillips_ouliaris(y, x, trend="c", lags=None, df_adjust=True): @@ -177,25 +182,39 @@ def __init__( self._null = "No Cointegration" self._alternative = "Cointegration" + @property def stat(self) -> float: return self._stat + @property def pvalue(self) -> float: return self._pvalue + @property def crit_vals(self) -> pd.Series: return self._crit_vals + @property def null(self) -> str: return self._null + @property def alternative(self) -> str: return self._alternative def __str__(self) -> str: - out = f"{self._name}\nStatistic:{self._stat}\nP-value:{self.pvalue()}" + out = f"{self._name}\nStatistic:{self._stat}\nP-value:{self.pvalue}" out += f"\nNull: {self._null}, Alternative: {self._alternative}" + cv_str = ", ".join([f"{k}%: {v}" for k, v in self.crit_vals.items()]) + out += f"\nCrit. Vals: {cv_str}" return out def __repr__(self): return self.__str__() + f"\nID: {hex(id(self))}" + + +g = np.random.default_rng(0) +y = g.standard_normal((500, 1)) +y = np.cumsum(y, 0) +x = y + g.standard_normal((500, 1)) +print(engle_granger(y, x)) diff --git a/arch/unitroot/critical_values/dickey_fuller.py b/arch/unitroot/critical_values/dickey_fuller.py index f25837944a..b63225f068 100644 --- a/arch/unitroot/critical_values/dickey_fuller.py +++ b/arch/unitroot/critical_values/dickey_fuller.py @@ -16,7 +16,7 @@ tau_small_p = {} tau_large_p = {} -tau_small_p["nc"] = [ +tau_small_p["n"] = [ [0.6344, 1.2378, 3.2496], [1.9129, 1.3857, 3.5322], [2.7648, 1.4502, 3.4186], @@ -24,7 +24,7 @@ [4.0999, 1.5533, 3.5900], [4.5388, 1.5344, 2.9807], ] -tau_small_p["nc"] = asarray(tau_small_p["nc"]) * small_scaling +tau_small_p["n"] = asarray(tau_small_p["n"]) * small_scaling tau_small_p["c"] = [ [2.1659, 1.4412, 3.8269], @@ -57,7 +57,7 @@ tau_small_p["ctt"] = asarray(tau_small_p["ctt"]) * small_scaling large_scaling = asarray([1, 1e-1, 1e-1, 1e-2]) -tau_large_p["nc"] = [ +tau_large_p["n"] = [ [0.4797, 9.3557, -0.6999, 3.3066], [1.5578, 8.5580, -2.0830, -3.3549], [2.2268, 6.8093, -3.2362, -5.4448], @@ -65,7 +65,7 @@ [3.2684, 6.8051, -2.6778, -3.4972], [3.7268, 7.1670, -2.3648, -2.8288], ] -tau_large_p["nc"] = asarray(tau_large_p["nc"]) * large_scaling +tau_large_p["n"] = asarray(tau_large_p["n"]) * large_scaling tau_large_p["c"] = [ [1.7339, 9.3202, -1.2745, -1.0368], @@ -105,7 +105,7 @@ # noinspection PyDictCreation tau_2010: Dict[str, NDArray] = {} -tau_2010["nc"] = array( +tau_2010["n"] = array( [ [ [-2.56574, -2.2358, -3.627, 0], # N = 1 @@ -314,21 +314,21 @@ # These are the cut-off values for the left-tail vs. the rest of the # tau distribution, for getting the p-values tau_star = { - "nc": [-1.04, -1.53, -2.68, -3.09, -3.07, -3.77], + "n": [-1.04, -1.53, -2.68, -3.09, -3.07, -3.77], "c": [-1.61, -2.62, -3.13, -3.47, -3.78, -3.93], "ct": [-2.89, -3.19, -3.50, -3.65, -3.80, -4.36], "ctt": [-3.21, -3.51, -3.81, -3.83, -4.12, -4.63], } tau_min = { - "nc": [-19.04, -19.62, -21.21, -23.25, -21.63, -25.74], + "n": [-19.04, -19.62, -21.21, -23.25, -21.63, -25.74], "c": [-18.83, -18.86, -23.48, -28.07, -25.96, -23.27], "ct": [-16.18, -21.15, -25.37, -26.63, -26.53, -26.18], "ctt": [-17.17, -21.1, -24.33, -24.03, -24.33, -28.22], } tau_max = { - "nc": [inf, 1.51, 0.86, 0.88, 1.05, 1.24], + "n": [inf, 1.51, 0.86, 0.88, 1.05, 1.24], "c": [2.74, 0.92, 0.55, 0.61, 0.79, 1], "ct": [0.7, 0.63, 0.71, 0.93, 1.19, 1.42], "ctt": [0.54, 0.79, 1.08, 1.43, 3.49, 1.92], @@ -342,7 +342,7 @@ adf_z_star = {"c": -7.96, "ct": -13.46, "ctt": -16.27} adf_z_small_p = { - "nc": [0.0342, -0.6376, 0, -0.03872], + "n": [0.0342, -0.6376, 0, -0.03872], "c": [2.2142, -1.7863, 0.3283, -0.07727], "ct": [4.6476, -2.8932, 0.5832, -0.09990], "ctt": [4.4599, -1.8635, 0.2126, -0.06070], @@ -352,7 +352,7 @@ # the approximation function is # p = norm.cdf(d_0 + d_1 * z + d_2*z**2 + d_3*z**3 + d_4*z**4) adf_z_large_p = { - "nc": [0.4927, 6.906, 13.2331, 12.099, 0], + "n": [0.4927, 6.906, 13.2331, 12.099, 0], "c": [1.7157, 0.5536, 4.5518, 2.2466, 4.2537], "ct": [2.7119, 0.4594, 2.3747, 0.7488, 0.9333], "ctt": [3.4216, 0.4170, 1.6939, 0.4203, 0.4153], @@ -377,7 +377,7 @@ [-11.25012522, 38.19512941, -111.45098086, 226.91220644], ] ), - "nc": asarray( + "n": asarray( [ [-13.2907657, 323.225921, -8318.89213, 79857.3965], [-7.81966697, 217.366594, -5554.92749, 52618.7129], diff --git a/arch/unitroot/critical_values/engle_granger.py b/arch/unitroot/critical_values/engle_granger.py index 8a68777b54..06846af699 100644 --- a/arch/unitroot/critical_values/engle_granger.py +++ b/arch/unitroot/critical_values/engle_granger.py @@ -182,6 +182,7 @@ class EngleGrangerCV(object): + # TODO: Improve to provide CVs directly as a function of T def __getitem__(self, item): # item ['nc',10,3] if ( diff --git a/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation.py b/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation.py index fe83a94706..e70b98df4b 100644 --- a/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation.py +++ b/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation.py @@ -61,7 +61,7 @@ def wrapper(n, trend, b, seed=0): dview["adf_simulation"] = adf_simulation lview = rc.load_balanced_view() -trends = ("nc", "c", "ct", "ctt") +trends = ("n", "c", "ct", "ctt") T = array( ( 20, diff --git a/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation_joblib.py b/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation_joblib.py index 4e5bfba6e5..5b07efbe00 100644 --- a/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation_joblib.py +++ b/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation_joblib.py @@ -51,7 +51,7 @@ def wrapper(n, trend, b, seed=0): if __name__ == "__main__": - trends = ("nc", "c", "ct", "ctt") + trends = ("n", "c", "ct", "ctt") T = array( ( 20, diff --git a/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation_large_cluster.py b/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation_large_cluster.py index 8816e2e7b3..fa8bcc86af 100644 --- a/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation_large_cluster.py +++ b/arch/unitroot/critical_values/simulation/adf_z_critical_values_simulation_large_cluster.py @@ -79,7 +79,7 @@ def wrapper(n, trend, b, rng_seed=0): dview["adf_simulation"] = adf_simulation lview = rc.load_balanced_view() -trends = ("nc", "c", "ct", "ctt") +trends = ("n", "c", "ct", "ctt") T = array( ( 20, diff --git a/arch/unitroot/critical_values/simulation/adf_z_simlation_process.py b/arch/unitroot/critical_values/simulation/adf_z_simlation_process.py index 47f4907038..02a351881e 100644 --- a/arch/unitroot/critical_values/simulation/adf_z_simlation_process.py +++ b/arch/unitroot/critical_values/simulation/adf_z_simlation_process.py @@ -1,7 +1,7 @@ import numpy as np from statsmodels.regression.linear_model import OLS -trends = ("nc", "c", "ct", "ctt") +trends = ("n", "c", "ct", "ctt") critical_values = (1.0, 5.0, 10.0) adf_z_cv_approx = {} for t in trends: diff --git a/arch/unitroot/critical_values/simulation/engle_granger_simulation.py b/arch/unitroot/critical_values/simulation/engle_granger_simulation.py index 3150ec57d5..0210eb662f 100644 --- a/arch/unitroot/critical_values/simulation/engle_granger_simulation.py +++ b/arch/unitroot/critical_values/simulation/engle_granger_simulation.py @@ -9,10 +9,11 @@ from random import shuffle from typing import List -from joblib import Parallel, cpu_count, delayed import numpy as np from numpy.random import PCG64, Generator, SeedSequence +from joblib import Parallel, cpu_count, delayed + ROOT = os.path.join(os.path.split(os.path.abspath(__file__))[0], "engle-granger") if not os.path.exists(ROOT): os.mkdir(ROOT) diff --git a/arch/unitroot/unitroot.py b/arch/unitroot/unitroot.py index 90a158257f..86183954ba 100644 --- a/arch/unitroot/unitroot.py +++ b/arch/unitroot/unitroot.py @@ -87,10 +87,10 @@ parametrization. """ -TREND_MAP = {None: "nc", 0: "c", 1: "ct", 2: "ctt"} +TREND_MAP = {None: "n", 0: "c", 1: "ct", 2: "ctt"} TREND_DESCRIPTION = { - "nc": "No Trend", + "n": "No Trend", "c": "Constant", "ct": "Constant and Linear Time Trend", "ctt": "Constant, Linear and Quadratic Time Trends", @@ -178,7 +178,7 @@ def _autolag_ols_low_memory( level = level / sqrt(level.dot(level)) trendx = [] nobs = lhs.shape[0] - if trend == "nc": + if trend == "n": trendx = empty((nobs, 0)) else: if "tt" in trend: @@ -326,7 +326,7 @@ def _df_select_lags( # This is the absolute maximum number of lags possible, # only needed to very short time series. max_max_lags = nobs // 2 - 1 - if trend != "nc": + if trend != "n": max_max_lags -= len(trend) if max_lags is None: max_lags = int(ceil(12.0 * power(nobs / 100.0, 1 / 4.0))) @@ -341,7 +341,7 @@ def _df_select_lags( rhs[:, 0] = y[-nobs - 1 : -1] # replace 0 with level of y lhs = delta_y[-nobs:] - if trend != "nc": + if trend != "n": full_rhs = add_trend(rhs, trend, prepend=True) else: full_rhs = rhs @@ -387,7 +387,7 @@ def _estimate_df_regression(y: NDArray, trend: str, lags: int) -> RegressionResu rhs[:, 0] = y[-nobs - 1 : -1] # replace lag 0 with level of y rhs = _add_column_names(rhs, lags) - if trend != "nc": + if trend != "n": rhs = add_trend(rhs.iloc[:, : lags + 1], trend) return OLS(lhs, rhs).fit() @@ -405,6 +405,13 @@ def __init__( self._lags = lags self._valid_trends = valid_trends self._trend = "" + if trend == "nc": + import warnings + + warnings.warn( + 'Trend "nc" is deprecated and has been replaced with' '"n" (for none).' + ) + trend = "n" if trend not in self.valid_trends: raise ValueError("trend not understood") self._trend = trend @@ -682,7 +689,7 @@ def __init__( method: str = "AIC", low_memory: Optional[bool] = None, ) -> None: - valid_trends = ("nc", "c", "ct", "ctt") + valid_trends = ("n", "c", "ct", "ctt") super().__init__(y, lags, trend, valid_trends) self._max_lags = max_lags self._method = method @@ -852,14 +859,14 @@ def _compute_statistic(self) -> None: max_lags, method = self._max_lags, self._method assert self._low_memory is not None icbest, bestlag = _df_select_lags( - y_detrended, "nc", max_lags, method, low_memory=self._low_memory + y_detrended, "n", max_lags, method, low_memory=self._low_memory ) self._lags = bestlag # 3. Run Regression lags = self._lags - resols = _estimate_df_regression(y_detrended, lags=lags, trend="nc") + resols = _estimate_df_regression(y_detrended, lags=lags, trend="n") self._regression = resols self._nobs = int(resols.nobs) self._stat = resols.tvalues[0] @@ -1005,7 +1012,7 @@ def __init__( trend: str = "c", test_type: str = "tau", ) -> None: - valid_trends = ("nc", "c", "ct") + valid_trends = ("n", "c", "ct") super().__init__(y, lags, trend, valid_trends) self._test_type = test_type self._stat_rho = None @@ -1026,7 +1033,7 @@ def _compute_statistic(self) -> None: rhs = y[:-1, None] rhs = _add_column_names(rhs, 0) lhs = y[1:, None] - if trend != "nc": + if trend != "n": rhs = add_trend(rhs, trend) resols = OLS(lhs, rhs).fit() @@ -1477,7 +1484,7 @@ def __init__( ) -> None: if lags < 2: raise ValueError("lags must be an integer larger than 2") - valid_trends = ("nc", "c") + valid_trends = ("n", "c") super().__init__(y, lags, trend, valid_trends) self._test_name = "Variance-Ratio Test" self._null_hypothesis = "The process is a random walk." @@ -1550,7 +1557,7 @@ def _compute_statistic(self) -> None: ) nobs = y.shape[0] - if trend == "nc": + if trend == "n": mu = 0 else: mu = (y[-1] - y[0]) / (nobs - 1) @@ -1610,7 +1617,7 @@ def mackinnonp( "T-value" from an Augmented Dickey-Fuller or DFGLS regression. regression : {'c', 'nc', 'ct', 'ctt'} This is the method of regression that was used. Following MacKinnon's - notation, this can be "c" for constant, "nc" for no constant, "ct" for + notation, this can be "c" for constant, "n" for no constant, "ct" for constant and trend, and "ctt" for constant, trend, and trend-squared. num_unit_roots : int The number of series believed to be I(1). For (Augmented) Dickey- @@ -1731,7 +1738,7 @@ def mackinnoncrit( https://ideas.repec.org/p/qed/wpaper/1227.html """ dist_type = dist_type.lower() - valid_regression = ["c", "ct", "nc", "ctt"] + valid_regression = ["c", "ct", "n", "ctt"] if dist_type == "dfgls": valid_regression = ["c", "ct"] if regression not in valid_regression: