Merge pull request #406 from bashtage/deep-code-fixes

MAINT: Clean up and mypy
bashtage · Aug 27, 2020 · c5d4751 · c5d4751
2 parents 86d622b + af91046
commit c5d4751
Show file tree

Hide file tree

Showing 17 changed files with 213 additions and 151 deletions.
diff --git a/arch/bootstrap/base.py b/arch/bootstrap/base.py
@@ -375,15 +375,15 @@ def __init__(
                     )
         self._index = np.arange(self._num_items)
 
-        self._parameters: List[int] = []
+        self._parameters: List[Union[int, ArrayLike]] = []
         self._seed: Optional[Union[int, List[int], NDArray]] = None
         self.pos_data = args
         self.kw_data = kwargs
         self.data = (self.pos_data, self.kw_data)
 
         self._base: Optional[NDArray] = None
         self._results: Optional[NDArray] = None
-        self._studentized_results = None
+        self._studentized_results: Optional[NDArray] = None
         self._last_func: Optional[Callable[..., ArrayLike]] = None
         for key, value in kwargs.items():
             attr = getattr(self, key, None)
@@ -409,7 +409,7 @@ def _repr_html(self) -> str:
         return html
 
     @property
-    def random_state(self) -> np.random.RandomState:
+    def random_state(self) -> RandomState:
         """
         Set or get the instance random state
 
@@ -426,13 +426,13 @@ def random_state(self) -> np.random.RandomState:
         return self._random_state
 
     @random_state.setter
-    def random_state(self, random_state: np.random.RandomState) -> None:
+    def random_state(self, random_state: RandomState) -> None:
         if not isinstance(random_state, RandomState):
             raise TypeError("Value being set must be a RandomState")
         self._random_state = random_state
 
     @property
-    def index(self) -> NDArray:
+    def index(self) -> Union[NDArray, Tuple[List[NDArray], Dict[str, NDArray]]]:
         """
         The current index of the bootstrap
         """
@@ -679,10 +679,10 @@ def conf_int(
         assert base is not None
         studentized_results = self._studentized_results
 
-        std_err = []
+        std_err = np.empty((0,))
         if method in ("norm", "var", "cov", studentized):
             errors = results - results.mean(axis=0)
-            std_err = np.sqrt(np.diag(errors.T.dot(errors) / reps))
+            std_err = np.asarray(np.sqrt(np.diag(errors.T.dot(errors) / reps)))
 
         if tail == "two":
             alpha = (1.0 - size) / 2
@@ -708,6 +708,7 @@ def conf_int(
             values = results
             if method == studentized:
                 # studentized uses studentized parameter estimates
+                assert isinstance(studentized_results, NDArray)
                 values = studentized_results
 
             if method in ("debiased", "bc", "bias-corrected", "bca"):
@@ -730,9 +731,9 @@ def conf_int(
                 percentiles = stats.norm.cdf(
                     b + (b + norm_quantiles) / (1.0 - a * (b + norm_quantiles))
                 )
-                percentiles = list(100 * percentiles)
+                percentiles *= 100
             else:
-                percentiles = [100 * p for p in percentiles]  # Rescale
+                percentiles = np.array([100 * p for p in percentiles])  # Rescale
 
             k = values.shape[1]
             lower = np.zeros(k)
@@ -1094,7 +1095,9 @@ def var(
 
         return (errors ** 2).sum(0) / reps
 
-    def update_indices(self) -> NDArray:
+    def update_indices(
+        self,
+    ) -> Union[NDArray, Tuple[List[NDArray], Dict[str, NDArray]]]:
         """
         Update indices for the next iteration of the bootstrap.  This must
         be overridden when creating new bootstraps.
@@ -1208,7 +1211,9 @@ def __init__(
         self._num_arg_items = [len(arg) for arg in args]
         self._num_kw_items = {key: len(kwargs[key]) for key in self._kwargs}
 
-    def update_indices(self) -> Tuple[List[NDArray], Dict[str, NDArray]]:
+    def update_indices(
+        self,
+    ) -> Union[NDArray, Tuple[List[NDArray], Dict[str, NDArray]]]:
         """
         Update indices for the next iteration of the bootstrap.  This must
         be overridden when creating new bootstraps.
@@ -1225,7 +1230,7 @@ def update_indices(self) -> Tuple[List[NDArray], Dict[str, NDArray]]:
         return pos_indices, kw_indices
 
     @property
-    def index(self) -> Tuple[List[NDArray], Dict[str, NDArray]]:
+    def index(self) -> Union[NDArray, Tuple[List[NDArray], Dict[str, NDArray]]]:
         """
         Returns the current index of the bootstrap
 
@@ -1374,7 +1379,9 @@ def _repr_html(self) -> str:
         html += ", <strong>ID</strong>: " + hex(id(self)) + ")"
         return html
 
-    def update_indices(self) -> NDArray:
+    def update_indices(
+        self,
+    ) -> Union[NDArray, Tuple[List[NDArray], Dict[str, NDArray]]]:
         num_blocks = self._num_items // self.block_size
         if num_blocks * self.block_size < self._num_items:
             num_blocks += 1
@@ -1465,7 +1472,9 @@ def __init__(
         super().__init__(block_size, *args, **kwargs)
         self._p = 1.0 / block_size
 
-    def update_indices(self) -> NDArray:
+    def update_indices(
+        self,
+    ) -> Union[NDArray, Tuple[List[NDArray], Dict[str, NDArray]]]:
         indices = self.random_state.randint(self._num_items, size=self._num_items)
         indices = indices.astype(np.int64)
         u = self.random_state.random_sample(self._num_items)
@@ -1549,7 +1558,9 @@ def __init__(
     ) -> None:
         super().__init__(block_size, *args, **kwargs)
 
-    def update_indices(self) -> None:
+    def update_indices(
+        self,
+    ) -> Union[NDArray, Tuple[List[NDArray], Dict[str, NDArray]]]:
         num_blocks = self._num_items // self.block_size
         if num_blocks * self.block_size < self._num_items:
             num_blocks += 1
@@ -1571,5 +1582,7 @@ def __init__(
         super().__init__(*args, **kwargs)
         self.block_size = block_size
 
-    def update_indices(self) -> None:  # pragma: no cover
+    def update_indices(
+        self,
+    ) -> Union[NDArray, Tuple[List[NDArray], Dict[str, NDArray]]]:  # pragma: no cover
         raise NotImplementedError
diff --git a/arch/bootstrap/multiple_comparison.py b/arch/bootstrap/multiple_comparison.py
@@ -122,7 +122,8 @@ def __init__(
         else:
             self.block_size = block_size
 
-        self.t, self.k = losses.shape
+        self.t: int = losses.shape[0]
+        self.k: int = losses.shape[1]
         self.method = method
         # Bootstrap indices since the same bootstrap should be used in the
         # repeated steps
@@ -402,7 +403,8 @@ def __init__(
             nested=nested,
         )
         self.block_size = self.spa.block_size
-        self.t, self.k = self.models.shape
+        self.t = self.models.shape[0]
+        self.k = self.models.shape[1]
         self.reps = reps
         self.size = size
         self._superior_models: Optional[List[Hashable]] = None
@@ -430,15 +432,15 @@ def compute(self) -> None:
         self.spa.compute()
         # 2. If any models superior, store indices, remove and re-run SPA
         better_models = list(self.spa.better_models(self.size))
-        all_better_models = better_models
+        all_better_models = better_models[:]
         # 3. Stop if nothing superior
         while better_models and (len(better_models) < self.k):
             # A. Use Selector to remove better models
             selector = np.ones(self.k, dtype=np.bool)
             if isinstance(self.models, pd.DataFrame):  # Columns
                 selector[self.models.columns.isin(all_better_models)] = False
             else:
-                selector[np.array(list(all_better_models))] = False
+                selector[np.array(all_better_models)] = False
             self.spa.subset(selector)
             # B. Rerun
             self.spa.compute()
@@ -447,7 +449,6 @@ def compute(self) -> None:
         # Reset SPA
         selector = np.ones(self.k, dtype=np.bool)
         self.spa.subset(selector)
-        all_better_models = list(all_better_models)
         all_better_models.sort()
         self._superior_models = all_better_models
 

diff --git a/arch/tests/covariance/test_covariance.py b/arch/tests/covariance/test_covariance.py
@@ -109,7 +109,7 @@ def test_covariance_errors(data: ArrayLike, estimator: Type[CovarianceEstimator]
     with pytest.raises(ValueError, match="df_adjust must be a non-negative"):
         estimator(data, df_adjust=-2)
     with pytest.raises(ValueError, match="df_adjust must be a non-negative"):
-        estimator(data, df_adjust=np.ones(2))
+        estimator(data, df_adjust=np.ones(2))  # type: ignore
     with pytest.raises(ValueError, match="bandwidth must be"):
         estimator(data, bandwidth=-3)
     with pytest.raises(ValueError, match="weights must be"):

diff --git a/arch/tests/univariate/test_forecast.py b/arch/tests/univariate/test_forecast.py
@@ -394,8 +394,8 @@ def test_ar1_forecast_bootstrap(self):
         )
         rs.set_state(state)
         repeat = res.forecast(horizon=5, start=900, method="bootstrap", random_state=rs)
-        assert_frame_equal(forecast.mean, repeat.mean, check_less_precise=True)
-        assert_frame_equal(forecast.variance, repeat.variance, check_less_precise=True)
+        assert_frame_equal(forecast.mean, repeat.mean)
+        assert_frame_equal(forecast.variance, repeat.variance)
 
     def test_ar2_garch11(self):
         pass

diff --git a/arch/tests/utility/test_timeseries.py b/arch/tests/utility/test_timeseries.py
@@ -13,6 +13,11 @@ def rng():
     return RandomState(12345)
 
 
+def test_add_trend_err():
+    with pytest.raises(ValueError, match="One and only one"):
+        add_trend(x=None, trend="ctt", nobs=None)
+
+
 def test_add_trend_prepend(rng):
     n = 10
     x = rng.randn(n, 1)

diff --git a/arch/typing.py b/arch/typing.py
@@ -25,4 +25,4 @@
 AnyPandas = Union[Series, DataFrame]
 DateLike = Union[str, dt.datetime, np.datetime64, Timestamp]
 Label = Optional[Hashable]
-FloatOrArray = TypeVar("FloatOrArray", float, NDArray)
+FloatOrArray = TypeVar("FloatOrArray", float, np.ndarray)
diff --git a/arch/unitroot/cointegration.py b/arch/unitroot/cointegration.py
@@ -439,6 +439,7 @@ def summary(self, full: bool = False) -> Summary:
             pvalues = np.asarray(self.pvalues)
 
         title = "Cointegrating Vector" if not full else "Model Parameters"
+        assert isinstance(se, np.ndarray)
         table = self._param_table(params, se, tstats, pvalues, stubs, title)
         smry.tables.append(table)
 
@@ -614,11 +615,11 @@ def _ic(self, resids: NDArray, nparam: int) -> float:
         nobs = resids.shape[0]
         sigma2 = resids.T @ resids / nobs
         if self._method == "aic":
-            penalty = 2
+            penalty = 2.0
         elif self._method == "hqic":
-            penalty = 2 * np.log(np.log(nobs))
+            penalty = 2.0 * float(np.log(np.log(nobs)))
         else:  # bic
-            penalty = np.log(nobs)
+            penalty = float(np.log(nobs))
         return np.log(sigma2) + nparam * penalty / nobs
 
     def _max_lead_lag(self) -> int:

diff --git a/arch/unitroot/critical_values/dfgls.py b/arch/unitroot/critical_values/dfgls.py
@@ -32,11 +32,11 @@
 dfgls_tau_star = {"c": -0.4795076091714674, "ct": -2.1960404365401298}
 
 dfgls_large_p = {
-    "c": array([0.50612497, 0.98305664, -0.05648525, 0.00140875]),
-    "ct": array([2.60561421, 1.67850224, 0.0373599, -0.01017936]),
+    "c": [0.50612497, 0.98305664, -0.05648525, 0.00140875],
+    "ct": [2.60561421, 1.67850224, 0.0373599, -0.01017936],
 }
 
 dfgls_small_p = {
-    "c": array([0.67422739, 1.25475826, 0.03572509]),
-    "ct": array([2.38767685, 1.57454737, 0.05754439]),
+    "c": [0.67422739, 1.25475826, 0.03572509],
+    "ct": [2.38767685, 1.57454737, 0.05754439],
 }
diff --git a/arch/unitroot/unitroot.py b/arch/unitroot/unitroot.py
@@ -218,10 +218,10 @@ def _autolag_ols_low_memory(
     lhs = deltay[maxlag:][:, None]
     level = y[maxlag:-1]
     level = level / sqrt(level @ level)
-    trendx = []
+    trendx: List[NDArray] = []
     nobs = lhs.shape[0]
     if trend == "n":
-        trendx = empty((nobs, 0))
+        trendx.append(empty((nobs, 0)))
     else:
         if "tt" in trend:
             tt = arange(1, nobs + 1, dtype=float64)[:, None] ** 2
@@ -233,8 +233,7 @@ def _autolag_ols_low_memory(
             trendx.append(t)
         if trend.startswith("c"):
             trendx.append(ones((nobs, 1)) / sqrt(nobs))
-        trendx = hstack(trendx)
-    rhs = hstack([level[:, None], trendx])
+    rhs = hstack([level[:, None], hstack(trendx)])
     m = rhs.shape[1]
     xpx = empty((m + maxlag, m + maxlag)) * nan
     xpy = empty((m + maxlag, 1)) * nan
@@ -1374,7 +1373,7 @@ def _autolag(self) -> None:
         pwr = 1.0 / 3.0
         gamma_hat = 1.1447 * power(s_hat * s_hat, pwr)
         autolags = amin([self._nobs, int(gamma_hat * power(self._nobs, pwr))])
-        self._lags = autolags
+        self._lags = int(autolags)
 
 
 class ZivotAndrews(UnitRootTest, metaclass=AbstractDocStringInheritor):
@@ -1564,7 +1563,7 @@ def _compute_statistic(self) -> None:
         self._all_stats[start_period + 1 : end_period + 1] = stats[
             start_period + 1 : end_period + 1
         ]
-        self._stat = amin(stats)
+        self._stat = float(amin(stats))
         self._cv_interpolate()
 
     def _cv_interpolate(self) -> None:
@@ -1777,7 +1776,9 @@ def _compute_statistic(self) -> None:
         assert self._vr is not None
 
         self._stat = sqrt(nq) * (self._vr - 1) / sqrt(self._stat_variance)
-        self._pvalue = 2 - 2 * norm.cdf(abs(self._stat))
+        assert self._stat is not None
+        abs_stat = float(abs(self._stat))
+        self._pvalue = 2 - 2 * norm.cdf(abs(abs_stat))
 
 
 def mackinnonp(
@@ -1857,7 +1858,7 @@ def mackinnonp(
     if stat <= starstat:
         poly_coef = small_p
         if dist_type == "adf-z":
-            stat = log(abs(stat))  # Transform stat for small p ADF-z
+            stat = float(log(abs(stat)))  # Transform stat for small p ADF-z
     else:
         poly_coef = large_p
     return norm.cdf(polyval(poly_coef[::-1], stat))