# Global lag transforms example

This notebook shows how to compute lag transforms on the global aggregate (sum across all series).

In [1]:
import pandas as pd
from utilsforecast.data import generate_series
from mlforecast import MLForecast
from mlforecast.lag_transforms import RollingMean, RollingStd, ExpandingMax, ExpandingMean

df = generate_series(n_series=3, freq='D', min_length=10, max_length=10)

fcst = MLForecast(
    models=[],
    freq='D',
    lags=[1],
    lag_transforms={1: [RollingMean(window_size=2, global_=True),
                        RollingStd(window_size=2, global_=True),
                        ExpandingMax(),
                        ExpandingMean()
                        ]},
)

prep = fcst.preprocess(df, dropna=False)
#prep[['unique_id', 'ds', 'y', 'rolling_mean_lag1_window_size2']].head(10)


# Group-level lag transform (by brand)
df_group = df.copy()
brands = (['a', 'b', 'c'] * (len(df_group) // 3 + 1))[:len(df_group)]
df_group['brand'] = brands
tfm_group = RollingMean(window_size=2, groupby=['brand'])
exp_max = ExpandingMax(groupby=['brand'])
exp_mean = ExpandingMean(groupby=['brand'])
fcst_group = MLForecast(
    models=[],
    freq='D',
    lags=[1],
    lag_transforms={1: [tfm_group, exp_max, exp_mean]},
)
prep_group = fcst_group.preprocess(df_group, dropna=False, static_features=['brand'])
col = tfm_group._get_name(1)
prep_group.head(10)


Unnamed: 0,unique_id,ds,y,brand,lag1,groupby_brand_rolling_mean_lag1_window_size2,groupby_brand_expanding_max_lag1,groupby_brand_expanding_mean_lag1
0,0,2000-01-01,0.274407,a,,,,
1,0,2000-01-02,1.357595,b,0.274407,,3.395863,3.395863
2,0,2000-01-03,2.301382,c,1.357595,5.376878,6.489309,5.376878
3,0,2000-01-04,3.272442,a,2.301382,2.841801,5.284022,1.986003
4,0,2000-01-05,4.211827,b,3.272442,3.846769,6.462798,3.111749
5,0,2000-01-06,5.322947,c,4.211827,1.212891,6.489309,3.096184
6,0,2000-01-07,6.218794,a,5.322947,2.051351,5.284022,2.222192
7,0,2000-01-08,0.445887,b,6.218794,3.165035,6.462798,3.284127
8,0,2000-01-09,1.481831,c,0.445887,4.243993,6.489309,3.661482
9,0,2000-01-10,2.191721,a,1.481831,5.430706,6.472334,3.379262


In [2]:
xxx = df_group.groupby(['brand', 'ds']).y.sum().reset_index()

xxx.groupby('brand').y.rolling(window=2).mean()

brand    
a      0          NaN
       1     0.336993
       2     2.841801
       3     4.278232
       4     3.165789
       5     2.051351
       6     3.631179
       7     6.345564
       8     5.430706
       9     3.290400
b      10         NaN
       11    2.376729
       12    1.294167
       13    3.846769
       14    5.337313
       15    4.265894
       16    3.165035
       17    1.227998
       18    0.353405
       19    2.847965
c      20         NaN
       21    5.376878
       22    3.282915
       23    2.345823
       24    1.212891
       25    2.679233
       26    5.197312
       27    4.243993
       28    2.449071
       29    1.344581
Name: y, dtype: float64

In [17]:
(0.27 + 0.39 + 5.28) / 3

1.9800000000000002

In [3]:
prep_group.sort_values(by=['brand', 'ds'])

Unnamed: 0,unique_id,ds,y,brand,lag1,groupby_brand_rolling_mean_lag1_window_size2,groupby_brand_expanding_max_lag1,groupby_brand_expanding_mean_lag1
0,0,2000-01-01,0.274407,a,,,,
21,2,2000-01-02,0.399579,a,6.489309,,0.274407,0.274407
12,1,2000-01-03,5.284022,a,4.264447,0.336993,0.399579,0.336993
3,0,2000-01-04,3.272442,a,2.301382,2.841801,5.284022,1.986003
24,2,2000-01-05,3.059137,a,2.390265,4.278232,5.284022,2.307612
15,1,2000-01-06,1.043565,a,0.035518,3.165789,5.284022,2.457917
6,0,2000-01-07,6.218794,a,5.322947,2.051351,5.284022,2.222192
27,2,2000-01-08,6.472334,a,5.071677,3.631179,6.218794,2.793135
18,1,2000-01-09,4.389078,a,3.41631,6.345564,6.472334,3.253035
9,0,2000-01-10,2.191721,a,1.481831,5.430706,6.472334,3.379262


In [4]:
df_all = df.groupby('ds').y.sum().reset_index()

df_all['y'].rolling(window=2).mean()

0          NaN
1     8.090600
2     7.418883
3    10.470824
4     9.715994
5     8.996477
6    11.993526
7    11.817555
8     8.233182
9     7.482946
Name: y, dtype: float64

In [5]:
df_all['y'].rolling(window=2).std()

0         NaN
1    2.925977
2    1.976026
3    2.340071
4    3.407563
5    2.390014
6    1.848453
7    2.097313
8    2.971756
9    1.910761
Name: y, dtype: float64

In [8]:
prep

Unnamed: 0,unique_id,ds,y,lag1,rolling_mean_lag1_window_size2_global_True,rolling_std_lag1_window_size2_global_True
0,0,2000-01-01,0.274407,,,
1,0,2000-01-02,1.357595,0.274407,,
2,0,2000-01-03,2.301382,1.357595,8.0906,2.925977
3,0,2000-01-04,3.272442,2.301382,7.418883,1.976026
4,0,2000-01-05,4.211827,3.272442,10.470824,2.340071
5,0,2000-01-06,5.322947,4.211827,9.715994,3.407563
6,0,2000-01-07,6.218794,5.322947,8.996477,2.390014
7,0,2000-01-08,0.445887,6.218794,11.993526,1.848453
8,0,2000-01-09,1.481831,0.445887,11.817555,2.097313
9,0,2000-01-10,2.191721,1.481831,8.233182,2.971756


In [19]:
from mlforecast.core import (
    TimeSeries,
    _build_function_transform_name,
    _build_lag_transform_name,
    _name_models,
)
from mlforecast.lag_transforms import (
    ExpandingMean,
    RollingMean,
    RollingQuantile,
    RollingStd,
)

import numpy as np

In [None]:
df = pd.DataFrame(
        {
            "unique_id": ["a"] * 5 + ["b"] * 5,
            "ds": [1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
            "y": [1, 3, 5, 7, 9, 2, 4, 6, 8, 10],
            "brand": ["x"] * 5 + ["y"] * 5,
        }
    )
tfm = RollingQuantile(p=0.5, window_size=3, groupby=["brand"])
ts = TimeSeries(freq=1, lag_transforms={1: [tfm]})
prep = ts.fit_transform(
    df,
    id_col="unique_id",
    time_col="ds",
    target_col="y",
    dropna=False,
    static_features=["brand"],
)
expected_by_key = {
    ("x", 1): np.nan,
    ("x", 2): np.nan,
    ("x", 3): np.nan,
    ("x", 4): 3.0,
    ("x", 5): 5.0,
    ("y", 1): np.nan,
    ("y", 2): np.nan,
    ("y", 3): np.nan,
    ("y", 4): 4.0,
    ("y", 5): 6.0,
}

In [22]:
df

Unnamed: 0,unique_id,ds,y,brand
0,a,1,1,x
1,a,2,3,x
2,a,3,5,x
3,a,4,7,x
4,a,5,9,x
5,b,1,2,y
6,b,2,4,y
7,b,3,6,y
8,b,4,8,y
9,b,5,10,y


In [21]:
prep

Unnamed: 0,unique_id,ds,y,brand,groupby_brand_rolling_quantile_lag1_p0.5_window_size3
0,a,1,1.0,x,
1,a,2,3.0,x,
2,a,3,5.0,x,
3,a,4,7.0,x,3.0
4,a,5,9.0,x,5.0
5,b,1,2.0,y,
6,b,2,4.0,y,
7,b,3,6.0,y,
8,b,4,8.0,y,4.0
9,b,5,10.0,y,6.0
