-
Notifications
You must be signed in to change notification settings - Fork 89
/
_fh.py
845 lines (716 loc) · 29.6 KB
/
_fh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
"""Implements functionality for specifying forecast horizons in aeon."""
__author__ = ["mloning", "fkiraly", "eenticott-shell", "khrapovs"]
__all__ = ["ForecastingHorizon"]
from functools import lru_cache
from typing import Optional
import numpy as np
import pandas as pd
from pandas.tseries.frequencies import to_offset
from aeon.utils.datetime import _coerce_duration_to_int
from aeon.utils.validation import (
array_is_int,
array_is_timedelta_or_date_offset,
is_array,
is_int,
is_timedelta_or_date_offset,
)
from aeon.utils.validation.series import (
VALID_INDEX_TYPES,
is_in_valid_absolute_index_types,
is_in_valid_index_types,
is_in_valid_relative_index_types,
is_integer_index,
)
VALID_FORECASTING_HORIZON_TYPES = (int, list, np.ndarray, pd.Index)
DELEGATED_METHODS = (
"__sub__",
"__add__",
"__mul__",
"__div__",
"__divmod__",
"__pow__",
"__gt__",
"__ge__",
"__ne__",
"__lt__",
"__eq__",
"__le__",
"__radd__",
"__rsub__",
"__rmul__",
"__rdiv__",
"__rmod__",
"__rdivmod__",
"__rpow__",
"__getitem__",
"__len__",
"max",
"min",
)
def _delegator(method):
"""
Automatically decorate ForecastingHorizon class with pandas.Index methods.
Also delegates method calls to wrapped pandas.Index object.
methods from pandas.Index and delegate method calls to wrapped pandas.Index
"""
def delegated(obj, *args, **kwargs):
return getattr(obj.to_pandas(), method)(*args, **kwargs)
return delegated
def _check_values(values: VALID_FORECASTING_HORIZON_TYPES) -> pd.Index:
"""
Validate forecasting horizon values.
Validation checks validity and also converts forecasting horizon values
to supported pandas.Index types if possible.
Parameters
----------
values : int, list, array, certain pd.Index types
Forecasting horizon with steps ahead to predict.
Raises
------
TypeError :
Raised if `values` type is not supported
Returns
-------
values : pd.Index
Sorted and validated forecasting horizon values.
"""
# if values are one of the supported pandas index types, we don't have
# to do
# anything as the forecasting horizon directly wraps the index, note that
# isinstance() does not work here, because index types inherit from each
# other,
# hence we check for type equality here
if is_in_valid_index_types(values):
pass
# convert single integer or timedelta or dateoffset
# to pandas index, no further checks needed
elif is_int(values):
values = pd.Index([values], dtype=int)
elif is_timedelta_or_date_offset(values):
values = pd.Index([values])
# convert np.array or list to pandas index
elif is_array(values) and array_is_int(values):
values = pd.Index(values, dtype=int)
elif is_array(values) and array_is_timedelta_or_date_offset(values):
values = pd.Index(values)
# otherwise, raise type error
else:
valid_types = (
"int",
"1D np.ndarray of type int",
"1D np.ndarray of type timedelta or dateoffset",
"list",
*[f"pd.{index_type.__name__}" for index_type in VALID_INDEX_TYPES],
)
raise TypeError(
f"Invalid `fh`. The type of the passed `fh` values is not supported. "
f"Please use one of {valid_types}, but found type {type(values)}, "
f"values = {values}"
)
# check values does not contain duplicates
if len(values) != values.nunique():
raise ValueError(
"Invalid `fh`. The `fh` values must not contain any duplicates."
)
# return sorted values
return values.sort_values()
def _check_freq(obj):
"""
Coerce obj to a pandas frequency offset for the ForecastingHorizon.
Parameters
----------
obj : pd.Index, pd.Period, pandas offset, or None
Returns
-------
pd.offset
Raises
------
TypeError if the type assumption on obj is not met
"""
if isinstance(obj, pd.offsets.BaseOffset):
return obj
elif hasattr(obj, "_cutoff"):
return _check_freq(obj._cutoff)
elif isinstance(obj, (pd.Period, pd.Index)):
return _extract_freq_from_cutoff(obj)
elif isinstance(obj, str) or obj is None:
return to_offset(obj)
else:
return None
def _extract_freq_from_cutoff(x) -> Optional[str]:
"""Extract frequency string from cutoff.
Parameters
----------
x : pd.Period, pd.PeriodIndex, pd.DatetimeIndex
Returns
-------
str : Frequency string or None
"""
if isinstance(x, (pd.Period, pd.PeriodIndex, pd.DatetimeIndex)):
return x.freq
else:
return None
class ForecastingHorizon:
"""Forecasting horizon.
Parameters
----------
values : pd.Index, pd.TimedeltaIndex, np.array, list, pd.Timedelta, or int
Values of forecasting horizon
is_relative : bool, optional (default=None)
- If True, a relative ForecastingHorizon is created:
values are relative to end of training series.
- If False, an absolute ForecastingHorizon is created:
values are absolute.
- if None, the flag is determined automatically:
relative, if values are of supported relative index type
absolute, if not relative and values of supported absolute index type
freq : str, pd.Index, pandas offset, or aeon forecaster, optional (default=None)
object carrying frequency information on values
ignored unless values is without inferrable freq
Examples
--------
>>> from aeon.forecasting.base import ForecastingHorizon
>>> from aeon.forecasting.naive import NaiveForecaster
>>> from aeon.datasets import load_airline
>>> from aeon.forecasting.model_selection import temporal_train_test_split
>>> import numpy as np
>>> y = load_airline()
>>> y_train, y_test = temporal_train_test_split(y, test_size=6)
List as ForecastingHorizon
>>> ForecastingHorizon([1, 2, 3])
ForecastingHorizon([1, 2, 3], ..., is_relative=True)
Numpy as ForecastingHorizon
>>> ForecastingHorizon(np.arange(1, 7))
ForecastingHorizon([1, 2, 3, 4, 5, 6], ..., is_relative=True)
Absolute ForecastingHorizon with a pandas Index
>>> ForecastingHorizon(y_test.index, is_relative=False) # doctest: +SKIP
ForecastingHorizon(['1960-07', '1960-08', '1960-09', '1960-10',
'1960-11', '1960-12'], dtype='period[M]', name='Period', is_relative=False)
Converting
>>> # set cutoff (last time point of training data)
>>> cutoff = y_train.index[-1]
>>> cutoff
Period('1960-06', 'M')
>>> # to_relative
>>> fh = ForecastingHorizon(y_test.index, is_relative=False)
>>> fh.to_relative(cutoff=cutoff)
ForecastingHorizon([1, 2, 3, 4, 5, 6], ..., is_relative=True)
>>> # to_absolute
>>> fh = ForecastingHorizon([1, 2, 3, 4, 5, 6], is_relative=True)
>>> fh.to_absolute(cutoff=cutoff) # doctest: +SKIP
ForecastingHorizon(['1960-07', '1960-08', '1960-09', '1960-10',
'1960-11', '1960-12'], dtype='period[M]', is_relative=False)
Automatically casted ForecastingHorizon from list when calling predict()
>>> forecaster = NaiveForecaster(strategy="drift")
>>> forecaster.fit(y_train)
NaiveForecaster(...)
>>> y_pred = forecaster.predict(fh=[1,2,3])
>>> forecaster.fh
ForecastingHorizon([1, 2, 3], ..., is_relative=True)
This is identical to give an object of ForecastingHorizon
>>> y_pred = forecaster.predict(fh=ForecastingHorizon([1,2,3]))
>>> forecaster.fh
ForecastingHorizon([1, 2, 3], ..., is_relative=True)
"""
def __new__(
cls,
values: VALID_FORECASTING_HORIZON_TYPES = None,
is_relative: bool = None,
freq=None,
):
"""Create a new ForecastingHorizon object."""
# We want the ForecastingHorizon class to be an extension of the
# pandas index, but since subclassing pandas indices is not
# straightforward, we wrap the index object instead. In order to
# still support the basic methods of a pandas index, we dynamically
# add some basic methods and delegate the method calls to the wrapped
# index object.
for method in DELEGATED_METHODS:
setattr(cls, method, _delegator(method))
return object.__new__(cls)
def __init__(
self,
values: VALID_FORECASTING_HORIZON_TYPES = None,
is_relative: Optional[bool] = True,
freq=None,
):
# coercing inputs
# values to pd.Index self._values
values = _check_values(values)
self._values = values
# infer freq from values, if available
# if not, infer from freq argument, if available
if hasattr(values, "index") and hasattr(values.index, "freq"):
self.freq = values.index.freq
elif hasattr(values, "freq"):
self.freq = values.freq
self.freq = freq
# infer self._is_relative from is_relative, and type of values
# depending on type of values, is_relative is inferred
# integers and timedeltas are interpreted as relative, by default, etc
if is_relative is not None and not isinstance(is_relative, bool):
raise TypeError("`is_relative` must be a boolean or None")
# check types, note that isinstance() does not work here because index
# types inherit from each other, hence we check for type equality
error_msg = f"`values` type is not compatible with `is_relative={is_relative}`."
if is_relative is None:
if is_in_valid_relative_index_types(values):
is_relative = True
elif is_in_valid_absolute_index_types(values):
is_relative = False
else:
raise TypeError(f"{type(values)} is not a supported fh index type")
if is_relative:
if not is_in_valid_relative_index_types(values):
raise TypeError(error_msg)
else:
if not is_in_valid_absolute_index_types(values):
raise TypeError(error_msg)
self._is_relative = is_relative
def _new(
self,
values: VALID_FORECASTING_HORIZON_TYPES = None,
is_relative: bool = None,
freq: str = None,
):
"""Construct new ForecastingHorizon based on current object.
Parameters
----------
values : pd.Index, pd.TimedeltaIndex, np.array, list, pd.Timedelta, or int
Values of forecasting horizon.
is_relative : bool, default=same as self.is_relative
- If None, determined automatically: same as self.is_relative
- If True, values are relative to end of training series.
- If False, values are absolute.
freq : str, optional (default=None)
Frequency string
Returns
-------
ForecastingHorizon :
New ForecastingHorizon based on current object
"""
if values is None:
values = self._values
if is_relative is None:
is_relative = self._is_relative
if freq is None:
freq = self._freq
return type(self)(values=values, is_relative=is_relative, freq=freq)
@property
def is_relative(self) -> bool:
"""Whether forecasting horizon is relative to the end of the training series.
Returns
-------
is_relative : bool
"""
return self._is_relative
@property
def freq(self) -> str:
"""Frequency attribute.
Returns
-------
freq : pandas frequency string
"""
if hasattr(self, "_freq") and hasattr(self._freq, "freqstr"):
# _freq is a pandas offset, frequency string is obtained via freqstr
return self._freq.freqstr
else:
return None
@freq.setter
def freq(self, obj) -> None:
"""Frequency setter.
Attempts to set/update frequency from obj.
Sets self._freq to a pandas offset object (frequency representation).
Frequency is extracted from obj, via _check_freq.
Raises error if _freq is already set, and discrepant from frequency of obj.
Parameters
----------
obj : str, pd.Index, BaseForecaster, pandas offset
object carrying frequency information on self.values
Raises
------
ValueError : if freq is already set and discrepant from frequency of obj
"""
freq_from_obj = _check_freq(obj)
if hasattr(self, "_freq"):
freq_from_self = self._freq
else:
freq_from_self = None
if freq_from_self is not None and freq_from_obj is not None:
if freq_from_self != freq_from_obj:
raise ValueError(
"Frequencies from two sources do not coincide: "
f"Current: {freq_from_self}, from update: {freq_from_obj}."
)
elif freq_from_obj is not None: # only freq_from_obj is not None
self._freq = freq_from_obj
else:
# leave self._freq as freq_from_self, or set to None if does not exist yet
self._freq = freq_from_self
def to_pandas(self) -> pd.Index:
"""Return forecasting horizon's underlying values as pd.Index.
Returns
-------
fh : pd.Index
pandas Index containing forecasting horizon's underlying values.
"""
return self._values
def to_numpy(self, **kwargs) -> np.ndarray:
"""Return forecasting horizon's underlying values as np.array.
Parameters
----------
**kwargs : dict of kwargs
kwargs passed to `to_numpy()` of wrapped pandas index.
Returns
-------
fh : np.ndarray
NumPy array containg forecasting horizon's underlying values.
"""
return self.to_pandas().to_numpy(**kwargs)
def _coerce_cutoff_to_index_element(self, cutoff):
"""Coerces cutoff to index element, and updates self.freq with cutoff."""
self.freq = cutoff
if isinstance(cutoff, pd.Index):
assert len(cutoff) > 0
cutoff = cutoff[-1]
return cutoff
def to_relative(self, cutoff=None):
"""Return forecasting horizon values relative to a cutoff.
Parameters
----------
cutoff : pd.Period, pd.Timestamp, int, or pd.Index, optional (default=None)
Cutoff value required to convert a relative forecasting
horizon to an absolute one (and vice versa).
If pd.Index, last/latest value is considered the cutoff
Returns
-------
fh : ForecastingHorizon
Relative representation of forecasting horizon.
"""
cutoff = self._coerce_cutoff_to_index_element(cutoff)
return _to_relative(fh=self, cutoff=cutoff)
def to_absolute(self, cutoff):
"""Return absolute version of forecasting horizon values.
Parameters
----------
cutoff : pd.Period, pd.Timestamp, int, or pd.Index
Cutoff value is required to convert a relative forecasting
horizon to an absolute one (and vice versa).
If pd.Index, last/latest value is considered the cutoff
Returns
-------
fh : ForecastingHorizon
Absolute representation of forecasting horizon.
"""
cutoff = self._coerce_cutoff_to_index_element(cutoff)
return _to_absolute(fh=self, cutoff=cutoff)
def to_absolute_int(self, start, cutoff=None):
"""Return absolute values as zero-based integer index starting from `start`.
Parameters
----------
start : pd.Period, pd.Timestamp, int
Start value returned as zero.
cutoff : pd.Period, pd.Timestamp, int, or pd.Index, optional (default=None)
Cutoff value required to convert a relative forecasting
horizon to an absolute one (and vice versa).
If pd.Index, last/latest value is considered the cutoff
Returns
-------
fh : ForecastingHorizon
Absolute representation of forecasting horizon as zero-based
integer index.
"""
cutoff = self._coerce_cutoff_to_index_element(cutoff)
freq = self.freq
if isinstance(cutoff, pd.Timestamp):
# coerce to pd.Period for reliable arithmetic operations and
# computations of time deltas
cutoff = _coerce_to_period(cutoff, freq=freq)
absolute = self.to_absolute(cutoff).to_pandas()
if isinstance(absolute, pd.DatetimeIndex):
# coerce to pd.Period for reliable arithmetics and computations of
# time deltas
absolute = _coerce_to_period(absolute, freq=freq)
# We here check the start value, the cutoff value is checked when we use it
# to convert the horizon to the absolute representation below
if isinstance(start, pd.Timestamp):
start = _coerce_to_period(start, freq=freq)
_check_cutoff(start, absolute)
# Note: We should here also coerce to periods for more reliable arithmetic
# operations as in `to_relative` but currently doesn't work with
# `update_predict` and incomplete time indices where the `freq` information
# is lost, see comment on issue #534
# The following line circumvents the bug in pandas
# periods = pd.period_range(start="2021-01-01", periods=3, freq="2H")
# periods - periods[0]
# Out: Index([<0 * Hours>, <4 * Hours>, <8 * Hours>], dtype = 'object')
# [v - periods[0] for v in periods]
# Out: Index([<0 * Hours>, <2 * Hours>, <4 * Hours>], dtype='object')
integers = pd.Index([date - start for date in absolute])
if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)):
integers = _coerce_duration_to_int(integers, freq=freq)
return self._new(integers, is_relative=False)
def to_in_sample(self, cutoff=None):
"""Return in-sample index values of fh.
Parameters
----------
cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
Cutoff value required to convert a relative forecasting
horizon to an absolute one (and vice versa).
Returns
-------
fh : ForecastingHorizon
In-sample values of forecasting horizon.
"""
is_in_sample = self._is_in_sample(cutoff)
in_sample = self.to_pandas()[is_in_sample]
return self._new(in_sample)
def to_out_of_sample(self, cutoff=None):
"""Return out-of-sample values of fh.
Parameters
----------
cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
Cutoff value is required to convert a relative forecasting
horizon to an absolute one (and vice versa).
Returns
-------
fh : ForecastingHorizon
Out-of-sample values of forecasting horizon.
"""
is_out_of_sample = self._is_out_of_sample(cutoff)
out_of_sample = self.to_pandas()[is_out_of_sample]
return self._new(out_of_sample)
def _is_in_sample(self, cutoff=None) -> np.ndarray:
"""Get index location of in-sample values."""
relative = self.to_relative(cutoff).to_pandas()
null = 0 if is_integer_index(relative) else pd.Timedelta(0)
return relative <= null
def is_all_in_sample(self, cutoff=None) -> bool:
"""Whether the forecasting horizon is purely in-sample for given cutoff.
Parameters
----------
cutoff : pd.Period, pd.Timestamp, int, default=None
Cutoff value used to check if forecasting horizon is purely in-sample.
Returns
-------
ret : bool
True if the forecasting horizon is purely in-sample for given cutoff.
"""
return sum(self._is_in_sample(cutoff)) == len(self)
def _is_out_of_sample(self, cutoff=None) -> np.ndarray:
"""Get index location of out-of-sample values."""
return np.logical_not(self._is_in_sample(cutoff))
def is_all_out_of_sample(self, cutoff=None) -> bool:
"""Whether the forecasting horizon is purely out-of-sample for given cutoff.
Parameters
----------
cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
Cutoff value used to check if forecasting horizon is purely
out-of-sample.
Returns
-------
ret : bool
True if the forecasting horizon is purely out-of-sample for given
cutoff.
"""
return sum(self._is_out_of_sample(cutoff)) == len(self)
def to_indexer(self, cutoff=None, from_cutoff=True):
"""Return zero-based indexer values for easy indexing into arrays.
Parameters
----------
cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
Cutoff value required to convert a relative forecasting
horizon to an absolute one and vice versa.
from_cutoff : bool, optional (default=True)
- If True, zero-based relative to cutoff.
- If False, zero-based relative to first value in forecasting
horizon.
Returns
-------
fh : pd.Index
Indexer.
"""
if from_cutoff:
relative_index = self.to_relative(cutoff).to_pandas()
if is_integer_index(relative_index):
return relative_index - 1
else:
# What does indexer mean if fh is timedelta?
msg = (
"The indexer for timedelta-like forecasting horizon "
"is not yet implemented"
)
raise NotImplementedError(msg)
else:
relative = self.to_relative(cutoff)
return relative - relative.to_pandas()[0]
def __repr__(self):
"""Generate repr based on wrapped index repr."""
class_name = self.__class__.__name__
pandas_repr = repr(self.to_pandas()).split("(")[-1].strip(")")
return f"{class_name}({pandas_repr}, is_relative={self.is_relative})"
# This function needs to be outside ForecastingHorizon
# since the lru_cache decorator has known, problematic interactions
# with object methods, see B019 error of flake8-bugbear for a detail explanation.
# See more here: https://github.com/sktime/sktime/issues/2338
# We cache the results from `to_relative()` and `to_absolute()` calls to speed up
# computations, as these are the basic methods and often required internally when
# calling different methods.
@lru_cache(typed=True)
def _to_relative(fh: ForecastingHorizon, cutoff=None) -> ForecastingHorizon:
"""Return forecasting horizon values relative to a cutoff.
Parameters
----------
fh : ForecastingHorizon
cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
Cutoff value required to convert a relative forecasting
horizon to an absolute one (and vice versa).
Returns
-------
fh : ForecastingHorizon
Relative representation of forecasting horizon.
"""
if fh.is_relative:
return fh._new()
else:
absolute = fh.to_pandas()
_check_cutoff(cutoff, absolute)
if isinstance(absolute, pd.DatetimeIndex):
# coerce to pd.Period for reliable arithmetics and computations of
# time deltas
absolute = _coerce_to_period(absolute, freq=fh.freq)
cutoff = _coerce_to_period(cutoff, freq=fh.freq)
# TODO: Replace when we upgrade our lower pandas bound
# to a version where this is fixed
# Compute relative values
# The following line circumvents the bug in pandas
# periods = pd.period_range(start="2021-01-01", periods=3, freq="2H")
# periods - periods[0]
# Out: Index([<0 * Hours>, <4 * Hours>, <8 * Hours>], dtype = 'object')
# [v - periods[0] for v in periods]
# Out: Index([<0 * Hours>, <2 * Hours>, <4 * Hours>], dtype='object')
# TODO: 0.17.0: Check if this comment below can be removed,
# so check if pandas has released the fix to PyPI:
# This bug was reported: https://github.com/pandas-dev/pandas/issues/45999
# and fixed: https://github.com/pandas-dev/pandas/pull/46006
# Most likely it will be released with pandas 1.5
# Once the bug is fixed the line should simply be:
# relative = absolute - cutoff
relative = pd.Index([date - cutoff for date in absolute])
# Coerce durations (time deltas) into integer values for given frequency
if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)):
relative = _coerce_duration_to_int(relative, freq=fh.freq)
return fh._new(relative, is_relative=True, freq=fh.freq)
# This function needs to be outside ForecastingHorizon
# since the lru_cache decorator has known, problematic interactions
# with object methods, see B019 error of flake8-bugbear for a detail explanation.
# See more here: https://github.com/sktime/sktime/issues/2338
@lru_cache(typed=True)
def _to_absolute(fh: ForecastingHorizon, cutoff) -> ForecastingHorizon:
"""Return absolute version of forecasting horizon values.
Parameters
----------
fh : ForecastingHorizon
cutoff : pd.Period, pd.Timestamp, int
Cutoff value is required to convert a relative forecasting
horizon to an absolute one (and vice versa).
Returns
-------
fh : ForecastingHorizon
Absolute representation of forecasting horizon.
"""
if not fh.is_relative:
return fh._new()
else:
relative = fh.to_pandas()
_check_cutoff(cutoff, relative)
if isinstance(cutoff, pd.Period):
# workaround for pandas>=2.0.0 as "absolute = cutoff + relative"
# is not working anymore as expected
absolute = pd.PeriodIndex(
[pd.PeriodIndex([cutoff]).shift(x)[0] for x in relative]
)
elif isinstance(cutoff, pd.Timestamp):
# coerce to pd.Period for reliable arithmetic operations and
cutoff = _coerce_to_period(cutoff, freq=fh.freq)
# workaround for pandas>=2.0.0 as "absolute = cutoff + relative"
# is not working anymore as expected
absolute = pd.PeriodIndex(
[pd.PeriodIndex([cutoff]).shift(x)[0] for x in relative]
)
# coerce back to DatetimeIndex after operation
absolute = absolute.to_timestamp(fh.freq)
else:
absolute = cutoff + relative
return fh._new(absolute, is_relative=False, freq=fh.freq)
def _check_cutoff(cutoff, index):
"""Check if the cutoff is valid based on time index of forecasting horizon.
Validates that the cutoff is
compatible with the time index of the forecasting horizon.
Parameters
----------
cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
Cutoff value is required to convert a relative forecasting
horizon to an absolute one and vice versa.
index : pd.PeriodIndex or pd.DataTimeIndex
Forecasting horizon time index that the cutoff value will be checked
against.
"""
if cutoff is None:
raise ValueError("`cutoff` must be given, but found none.")
if isinstance(index, pd.PeriodIndex):
assert isinstance(cutoff, pd.Period)
assert index.freqstr == cutoff.freqstr
if isinstance(index, pd.DatetimeIndex):
assert isinstance(cutoff, pd.Timestamp)
def _coerce_to_period(x, freq=None):
"""Coerce pandas time index to a alternative pandas time index.
This coerces pd.Timestamp to pd.Period or pd.DatetimeIndex to
pd.PeriodIndex, because pd.Period and pd.PeriodIndex allow more reliable
arithmetic operations with time indices.
Parameters
----------
x : pandas Index or index element
pandas Index to convert.
freq : pandas frequency string
Returns
-------
index : pd.Period or pd.PeriodIndex
Index or index element coerced to period based format.
"""
if isinstance(x, pd.Timestamp) and freq is None:
freq = x.freq
raise ValueError(
"_coerce_to_period requires freq argument to be passed if x is pd.Timestamp"
)
try:
return x.to_period(freq)
except (ValueError, AttributeError) as e:
msg = str(e)
if "Invalid frequency" in msg or "_period_dtype_code" in msg:
raise ValueError(
"Invalid frequency. Please select a frequency that can "
"be converted to a regular `pd.PeriodIndex`. For other "
"frequencies, basic arithmetic operation to compute "
"durations currently do not work reliably."
)
else:
raise
def _index_range(relative, cutoff):
"""Return Index Range relative to cutoff."""
_check_cutoff(cutoff, relative)
if isinstance(cutoff, pd.DatetimeIndex):
# coerce to pd.Period for reliable arithmetic operations and
cutoff = _coerce_to_period(cutoff, freq=cutoff.freq)
# workaround for pandas>=2.0.0 as "absolute = cutoff + relative"
# is not working anymore as expected
absolute = pd.PeriodIndex([cutoff.shift(x)[0] for x in relative])
# coerce back to DatetimeIndex after operation
absolute = absolute.to_timestamp(freq=cutoff.freq)
# convert again to DateTimeIndex as pandas>=2.0.0 lost Timestam.freq
absolute = pd.DatetimeIndex(absolute, freq=cutoff.freq)
elif isinstance(cutoff, pd.PeriodIndex):
absolute = pd.PeriodIndex([cutoff.shift(x)[0] for x in relative])
else:
absolute = cutoff + relative
return absolute