In [29]:
import warnings
warnings.filterwarnings("ignore")

from typing import Optional, List, Union
from numpy.typing import NDArray

import numpy as np
import pandas as pd

from strategies import StrategiesFactory
from dataset import TSDataset

In [30]:
def get_results(
    cv: int,
    regime: str,
    y_true: Optional[List[NDArray[np.float]]] = None,
    y_pred: Optional[List[NDArray[np.float]]] = None,
    ids: Optional[List[Union[float, str]]] = None,
) -> pd.DataFrame:
    def _get_fold_value(
        value: Optional[Union[float, NDArray[np.float]]], idx: int
    ) -> List[Optional[Union[float, NDArray[np.float]]]]:
        if value is None:
            return [None]
        if isinstance(value[idx], float):
            return value[idx]
        if isinstance(value[idx], np.ndarray):
            return value[idx].reshape(-1)
        raise TypeError(f"Unexpected value type. Value: {value}")

    df_res_dict = {}

    for idx_fold in range(cv):
        # Fill df_res_dict
        for name, value in [("y_true", y_true), ("y_pred", y_pred)]:
            df_res_dict[f"{name}_{idx_fold+1}"] = _get_fold_value(
                value, idx_fold
            )
        if regime != "local":
            df_res_dict[f"id_{idx_fold+1}"] = _get_fold_value(ids, idx_fold)

    # Save datasets to specified directory
    df_res = pd.DataFrame(df_res_dict)
    return df_res

There are four main objects in total:
1) `TSDataset`.
2) `Strategy`
3) `Model`: it is a part of Strategy
4) `Transformers`.

Special attention should be paid to the `Transformer` class: the elements of the pipeline that are responsible for transforming the values of a series and generating features. 

There are several types of transformers: 
- `SeriesToSeries` (take as input train, test in the form of the original series (id, datetime, features) and output train, test in the same structure).
- `SeriesToFeatures` (takes as input train, test as a source series (id, datetime, features) and outputs tables with features).
- `FeaturesToFeatures` (take as input TABLE data in pd.DataFrame format, make transformations and output pd.DataFrame).

Below is a list of available Transformers: 
- `StandardScaler` - SeriesToSeries
- `LastKnownNormalizer` - FeaturesToFeatures (after building features we normalize all lags by the last known one: divide by it or subtract).
- `DifferenceNormalizer` - SeriesToSeries (subtract the previous value or divide by it)
- `TimeToNumGenerator`, `DateSeasonsGenerator` - SeriesToFeatures (generate seasonal fixtures by dates)
- `LagTransformer` - SeriesToFeatures

Now only one of [`StandardScalerTransformer`, `DifferenceNormalizer` and `LastKnownNormalizer`] can be used at a time, and the first two must be before LagTransformer and the last one after. 

In [31]:
# for global and multivariate all time seires should be in one .csv file, divided by id 
# for local each time serie should be in its own .csv file also with id column 
df_path = "datasets/global/simulated_data_to_check.csv"

# Configure the features settings
columns_and_features_params = {
    "target": {
        "column": ["value"],
        "type": "continious",
        "features": {
            "LagTransformer": {"lags": 30},
            "LastKnownNormalizer": {
                "regime": "ratio",
                "transform_train": True,
                "transform_target": True
            } 
        },
        "drop_raw_feature": False,
    },
    "date": {
        "column": ["date"],
        "type": "datetime",
        "features": {
            "DateSeasonsGenerator": {
                # Use seasonality features from the date column as features with datetime lags
                # Possible values: [
                #    "y": year, "m": month, "d": day, 
                #    "wd": weekday, "doy": dayofyear,
                #    "hour": hour, "min": minute, "sec": second, 
                #    "ms": microsecond,  "ns": nanosecond
                # ]
                "seasonalities": ['doy', 'm', 'wd'], 
                # Use date from target point to make datetime features
                "from_target_date": True,
            },
            "LagTransformer": {"lags": 7}
        },
        "drop_raw_feature": True,
    },
    "id": {
        "column": ["id"],
        "type": "categorical",
        "drop_raw_feature": False,
    }
}

# Configure the model parameters
model_params = {
    "loss_function": "MultiRMSE",
    "early_stopping_rounds": 100,
}

# Configure the validation parameters
validation_params = {
    "type": 'KFold',
    "n_splits": 3,
}

# Configure the strategies parameters
strategy_params = {
    "is_multivariate": False, 
    # possible values: 
    # [
    #     'RecursiveStrategy', 
    #     'DirectStrategy', 
    #     'DirRecStrategy', 
    #     'MIMOStrategy', 
    #     'FlatWideMIMOStrategy', 
    # ]
    "strategy_name": "RecursiveStrategy",
    "strategy_params": {
        "horizon": 7,
        "model_name": "CatBoostRegressor_CV",
        "model_params": model_params,
        "validation_params": validation_params,
        "k": 1,
    }
}

In [32]:
strategies_factory = StrategiesFactory()

In [33]:
dataset = TSDataset(
    data=pd.read_csv(df_path),
    columns_and_features_params=columns_and_features_params,
    history=30,
)

strategy = strategies_factory[strategy_params]

In [34]:
ids, test, pred, fit_time, forecast_time, num_iterations = strategy.back_test(dataset, cv=1)

0:	learn: 0.0001731	test: 0.0001793	best: 0.0001793 (0)	total: 1.81ms	remaining: 1.81s
1:	learn: 0.0001682	test: 0.0001743	best: 0.0001743 (1)	total: 3.39ms	remaining: 1.69s
2:	learn: 0.0001635	test: 0.0001694	best: 0.0001694 (2)	total: 4.69ms	remaining: 1.56s
3:	learn: 0.0001588	test: 0.0001646	best: 0.0001646 (3)	total: 5.96ms	remaining: 1.48s
4:	learn: 0.0001543	test: 0.0001599	best: 0.0001599 (4)	total: 7.19ms	remaining: 1.43s
5:	learn: 0.0001499	test: 0.0001554	best: 0.0001554 (5)	total: 8.51ms	remaining: 1.41s
6:	learn: 0.0001457	test: 0.0001511	best: 0.0001511 (6)	total: 9.83ms	remaining: 1.39s
7:	learn: 0.0001416	test: 0.0001469	best: 0.0001469 (7)	total: 11.1ms	remaining: 1.38s
8:	learn: 0.0001375	test: 0.0001427	best: 0.0001427 (8)	total: 12.5ms	remaining: 1.37s
9:	learn: 0.0001336	test: 0.0001387	best: 0.0001387 (9)	total: 13.7ms	remaining: 1.36s
10:	learn: 0.0001298	test: 0.0001347	best: 0.0001347 (10)	total: 15ms	remaining: 1.34s
11:	learn: 0.0001262	test: 0.0001309	best: 

In [35]:
get_results(cv=1, regime="global", y_true=test, y_pred=pred, ids=ids)

Unnamed: 0,y_true_1,y_pred_1,id_1
0,1993.0,1993.001848,0
1,1994.0,1994.003998,0
2,1995.0,1995.006528,0
3,1996.0,1996.009711,0
4,1997.0,1997.013492,0
...,...,...,...
65,10995.0,10995.02394,9
66,10996.0,10996.033025,9
67,10997.0,10997.042218,9
68,10998.0,10998.051709,9


---

It is also possible to use the classic fit and predict methods.

In [37]:
fit_time, _ = strategy.fit(dataset)
forecast_time, current_pred = strategy.predict(dataset)

0:	learn: 0.0001722	test: 0.0001803	best: 0.0001803 (0)	total: 1.75ms	remaining: 1.75s
1:	learn: 0.0001673	test: 0.0001752	best: 0.0001752 (1)	total: 4.64ms	remaining: 2.32s
2:	learn: 0.0001627	test: 0.0001704	best: 0.0001704 (2)	total: 6.26ms	remaining: 2.08s
3:	learn: 0.0001580	test: 0.0001656	best: 0.0001656 (3)	total: 7.68ms	remaining: 1.91s
4:	learn: 0.0001535	test: 0.0001609	best: 0.0001609 (4)	total: 9.07ms	remaining: 1.8s
5:	learn: 0.0001492	test: 0.0001564	best: 0.0001564 (5)	total: 11.3ms	remaining: 1.86s
6:	learn: 0.0001450	test: 0.0001520	best: 0.0001520 (6)	total: 12.6ms	remaining: 1.78s
7:	learn: 0.0001409	test: 0.0001477	best: 0.0001477 (7)	total: 13.9ms	remaining: 1.72s
8:	learn: 0.0001369	test: 0.0001435	best: 0.0001435 (8)	total: 15.2ms	remaining: 1.68s
9:	learn: 0.0001330	test: 0.0001395	best: 0.0001395 (9)	total: 18.9ms	remaining: 1.87s
10:	learn: 0.0001293	test: 0.0001355	best: 0.0001355 (10)	total: 20.2ms	remaining: 1.81s
11:	learn: 0.0001256	test: 0.0001317	best:

In [38]:
current_pred

Unnamed: 0,id,date,value
0,0,2022-09-27,2000.005171
1,0,2022-09-28,2001.010566
2,0,2022-09-29,2002.016642
3,0,2022-09-30,2003.023209
4,0,2022-10-01,2004.030248
...,...,...,...
65,9,2022-09-29,11002.027435
66,9,2022-09-30,11003.03777
67,9,2022-10-01,11004.048562
68,9,2022-10-02,11005.059555
