Implement NaiveVariancePredictionIntervals (#109)

* added implementation * added test for backtest * updated base tests * added tests * updated documentation * updated changelog * review fixes
etna-team · Oct 13, 2023 · 3791c4a · 3791c4a
1 parent 2651829
commit 3791c4a
Show file tree

Hide file tree

Showing 7 changed files with 327 additions and 14 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add `LimitTransform` ([#63](https://github.com/etna-team/etna/pull/63))
 - Add config for Codecov to control CI ([#80](https://github.com/etna-team/etna/pull/80)) 
 - Add `EventTransform` ([#78](https://github.com/etna-team/etna/pull/78))
+- `NaiveVariancePredictionIntervals` method for prediction quantiles estimation ([#109](https://github.com/etna-team/etna/pull/109))
 
 ### Changed
 -

diff --git a/docs/source/api_reference/experimental.rst b/docs/source/api_reference/experimental.rst
@@ -34,3 +34,4 @@ Prediction Intervals:
    :template: class.rst
 
    prediction_intervals.BasePredictionIntervals
+   prediction_intervals.NaiveVariancePredictionIntervals
diff --git a/etna/experimental/prediction_intervals/__init__.py b/etna/experimental/prediction_intervals/__init__.py
@@ -1 +1,2 @@
 from etna.experimental.prediction_intervals.base import BasePredictionIntervals
+from etna.experimental.prediction_intervals.naive_variance import NaiveVariancePredictionIntervals
diff --git a/etna/experimental/prediction_intervals/naive_variance.py b/etna/experimental/prediction_intervals/naive_variance.py
@@ -0,0 +1,122 @@
+from typing import Sequence
+
+import numpy as np
+import pandas as pd
+import scipy.stats as scs
+
+from etna.datasets import TSDataset
+from etna.experimental.prediction_intervals import BasePredictionIntervals
+from etna.loggers import tslogger
+from etna.pipeline import BasePipeline
+from etna.pipeline.base import _DummyMetric
+
+
+class NaiveVariancePredictionIntervals(BasePredictionIntervals):
+    """Estimate prediction variance based on historical residuals.
+
+    ``NaiveVariancePredictionIntervals`` provides the possibility to estimate prediction quantiles using the following algorithm:
+
+    1. Compute the residuals matrix :math:`r_{it} = \hat y_{it} - y_{it}` using k-fold backtest, where :math:`i` is fold index.
+
+    2. Estimate variance for each step in the prediction horizon :math:`v_t = \\frac{1}{k} \sum_{i = 1}^k r_{it}^2`.
+
+    3. Use :math:`z` scores and estimated variance to compute corresponding quantiles.
+
+    `Reference implementation <https://www.sktime.net/en/stable/api_reference/auto_generated/sktime.forecasting.naive.NaiveVariance.html>`_.
+    """
+
+    def __init__(self, pipeline: BasePipeline, stride: int = 1):
+        """Initialize instance of ``NaiveVariancePredictionIntervals`` with given parameters.
+
+        Parameters
+        ----------
+        pipeline:
+            Base pipeline or ensemble for prediction intervals estimation.
+        stride:
+            Number of points between folds.
+        """
+        if stride <= 0:
+            raise ValueError("Parameter `stride` must be positive!")
+
+        self.stride = stride
+
+        super().__init__(pipeline=pipeline)
+
+    def _forecast_prediction_interval(
+        self, ts: TSDataset, predictions: TSDataset, quantiles: Sequence[float], n_folds: int
+    ) -> TSDataset:
+        """Estimate and store prediction intervals.
+
+        Parameters
+        ----------
+        ts:
+            Dataset to forecast.
+        predictions:
+            Dataset with point predictions.
+        quantiles:
+            Levels of prediction distribution.
+        n_folds:
+            Number of folds to use in the backtest for prediction interval estimation.
+
+        Returns
+        -------
+        :
+            Dataset with predictions.
+        """
+        residuals = self._compute_resids_matrices(ts=ts, n_folds=n_folds)
+
+        variance = self._estimate_variance(residual_matrices=residuals)
+
+        borders = []
+        for q in quantiles:
+            z_score = scs.norm.ppf(q=q)
+            interval_border = predictions[:, :, "target"] + np.sqrt(variance) * z_score
+            interval_border.rename({"target": f"target_{q:.4g}"}, inplace=True, axis=1)
+            borders.append(interval_border)
+
+        quantiles_df = pd.concat(borders, axis=1)
+        predictions.add_prediction_intervals(prediction_intervals_df=quantiles_df)
+        return predictions
+
+    def _compute_resids_matrices(self, ts: TSDataset, n_folds: int) -> np.ndarray:
+        """Estimate residuals matrices with backtest.
+
+        Parameters
+        ----------
+        ts:
+            Dataset to estimate residuals.
+        n_folds:
+            Number of folds for backtest.
+
+        Returns
+        -------
+        :
+            Residuals matrices for each segment. Array with shape: ``(n_folds, horizon, n_segments)``.
+        """
+        with tslogger.disable():
+            _, backtest_forecasts, _ = self.backtest(
+                ts=ts, metrics=[_DummyMetric()], n_folds=n_folds, stride=self.stride
+            )
+
+        residuals = backtest_forecasts.loc[:, pd.IndexSlice[:, "target"]] - ts[backtest_forecasts.index, :, "target"]
+
+        # shape: (n_folds, horizon, n_segments)
+        residual_matrices = residuals.values.reshape((-1, self.horizon, len(ts.segments)))
+        return residual_matrices
+
+    def _estimate_variance(self, residual_matrices: np.ndarray) -> np.ndarray:
+        """Estimate variance from residuals matrices.
+
+        Parameters
+        ----------
+        residual_matrices:
+            Multidimensional array with shape ``(n_folds, horizon, n_segments)``.
+
+        Returns
+        -------
+        :
+            Estimated variance. Array with shape ``(horizon, n_segments)``.
+
+        """
+        variance = np.mean(np.power(residual_matrices, 2), axis=0)
+        return variance
diff --git a/tests/test_experimental/test_prediction_intervals/common.py b/tests/test_experimental/test_prediction_intervals/common.py
@@ -1,17 +1,21 @@
 from typing import Dict
 from typing import Sequence
 
+import numpy as np
 import pandas as pd
 
 from etna.datasets import TSDataset
 from etna.distributions import BaseDistribution
 from etna.distributions import FloatDistribution
 from etna.experimental.prediction_intervals import BasePredictionIntervals
+from etna.models import CatBoostPerSegmentModel
 from etna.models import NaiveModel
+from etna.models import SARIMAXModel
 from etna.pipeline import BasePipeline
 from etna.pipeline import Pipeline
 from etna.transforms import AddConstTransform
 from etna.transforms import DateFlagsTransform
+from etna.transforms import LagTransform
 
 
 def get_naive_pipeline(horizon):
@@ -23,6 +27,25 @@ def get_naive_pipeline_with_transforms(horizon):
     return Pipeline(model=NaiveModel(), transforms=transforms, horizon=horizon)
 
 
+def get_catboost_pipeline(horizon):
+    transforms = [LagTransform(in_column="target", lags=list(range(horizon, 2 * horizon)))]
+    return Pipeline(model=CatBoostPerSegmentModel(), transforms=transforms, horizon=horizon)
+
+
+def get_arima_pipeline(horizon):
+    return Pipeline(model=SARIMAXModel(order=(1, 0, 1)), horizon=horizon)
+
+
+def run_base_pipeline_compat_check(ts, intervals_pipeline, expected_columns):
+    intervals_pipeline.fit(ts=ts)
+
+    intervals_pipeline_pred = intervals_pipeline.forecast(prediction_interval=True)
+    columns = intervals_pipeline_pred.df.columns.get_level_values("feature")
+
+    assert len(expected_columns - set(columns)) == 0
+    assert np.sum(intervals_pipeline_pred.df.isna().values) == 0
+
+
 class DummyPredictionIntervals(BasePredictionIntervals):
     """Dummy class for testing."""
 

diff --git a/tests/test_experimental/test_prediction_intervals/test_base.py b/tests/test_experimental/test_prediction_intervals/test_base.py
@@ -1,4 +1,3 @@
-import numpy as np
 import pandas as pd
 import pytest
 
@@ -15,27 +14,18 @@
 from etna.pipeline import AutoRegressivePipeline
 from etna.pipeline import HierarchicalPipeline
 from etna.pipeline import Pipeline
+from etna.pipeline.base import _DummyMetric
 from etna.reconciliation import BottomUpReconciliator
 from etna.transforms import DateFlagsTransform
 from etna.transforms import DeseasonalityTransform
 from tests.test_experimental.test_prediction_intervals.common import DummyPredictionIntervals
 from tests.test_experimental.test_prediction_intervals.common import get_naive_pipeline
 from tests.test_experimental.test_prediction_intervals.common import get_naive_pipeline_with_transforms
+from tests.test_experimental.test_prediction_intervals.common import run_base_pipeline_compat_check
 from tests.test_experimental.test_prediction_intervals.utils import assert_sampling_is_valid
 from tests.test_pipeline.utils import assert_pipeline_equals_loaded_original
 
 
-def run_base_pipeline_compat_check(ts, pipeline, expected_columns):
-    intervals_pipeline = DummyPredictionIntervals(pipeline=pipeline)
-    intervals_pipeline.fit(ts=ts)
-
-    intervals_pipeline_pred = intervals_pipeline.forecast(prediction_interval=True)
-    columns = intervals_pipeline_pred.df.columns.get_level_values("feature")
-
-    assert len(expected_columns - set(columns)) == 0
-    assert np.sum(intervals_pipeline_pred.df.isna().values) == 0
-
-
 def test_pipeline_ref_initialized(naive_pipeline):
     intervals_pipeline = DummyPredictionIntervals(pipeline=naive_pipeline)
 
@@ -85,6 +75,17 @@ def test_forecast_with_fitted_pipeline(example_tsds, pipeline_name, request):
     pd.testing.assert_frame_equal(intervals_pipeline_pred.df, pipeline_pred.df)
 
 
+@pytest.mark.parametrize("pipeline_name", ("naive_pipeline", "naive_pipeline_with_transforms"))
+def test_backtest(example_tsds, pipeline_name, request):
+    pipeline = request.getfixturevalue(pipeline_name)
+    _, pipeline_results, _ = pipeline.backtest(ts=example_tsds, metrics=[_DummyMetric()])
+
+    intervals_pipeline = DummyPredictionIntervals(pipeline=pipeline)
+    _, intervals_pipeline_results, _ = intervals_pipeline.backtest(ts=example_tsds, metrics=[_DummyMetric()])
+
+    pd.testing.assert_frame_equal(pipeline_results, intervals_pipeline_results)
+
+
 @pytest.mark.parametrize(
     "expected_columns",
     ({"target", "target_lower", "target_upper"},),
@@ -103,8 +104,11 @@ def test_forecast_with_fitted_pipeline(example_tsds, pipeline_name, request):
     ),
 )
 def test_pipelines_forecast_intervals(product_level_constant_hierarchical_ts, pipeline, expected_columns):
+    intervals_pipeline = DummyPredictionIntervals(pipeline=pipeline)
     run_base_pipeline_compat_check(
-        ts=product_level_constant_hierarchical_ts, pipeline=pipeline, expected_columns=expected_columns
+        ts=product_level_constant_hierarchical_ts,
+        intervals_pipeline=intervals_pipeline,
+        expected_columns=expected_columns,
     )
 
 
@@ -121,7 +125,10 @@ def test_pipelines_forecast_intervals(product_level_constant_hierarchical_ts, pi
     ),
 )
 def test_ensembles_forecast_intervals(example_tsds, ensemble, expected_columns):
-    run_base_pipeline_compat_check(ts=example_tsds, pipeline=ensemble, expected_columns=expected_columns)
+    intervals_pipeline = DummyPredictionIntervals(pipeline=ensemble)
+    run_base_pipeline_compat_check(
+        ts=example_tsds, intervals_pipeline=intervals_pipeline, expected_columns=expected_columns
+    )
 
 
 @pytest.mark.parametrize(