In [32]:
from lightgbm import LGBMRegressor
import pandas as pd

from feature_engine.creation import CyclicalFeatures
from feature_engine.datetime import DatetimeFeatures
from feature_engine.imputation import DropMissingData
from feature_engine.selection import DropFeatures
from feature_engine.timeseries.forecasting import (
    LagFeatures,
    WindowFeatures,
)

from sklearn.pipeline import Pipeline

from src.data import (
    make_exog_features,
    split_data,
    prepare_feature_store_data_for_training,
)

from src.model import forwardfill_missing_values
from src.paths import TRANSFORMED_DATA_DIR

data = pd.read_csv(
    TRANSFORMED_DATA_DIR / "ts_tabular_2022_10_to_2025_2.csv",
    parse_dates=["datetime"])
data.head()

Unnamed: 0,datetime,demand,ba_code
0,2022-10-01,51628,AECI
1,2022-10-02,53127,AECI
2,2022-10-03,54708,AECI
3,2022-10-04,53345,AECI
4,2022-10-05,53356,AECI


In [33]:
demand = data.copy()
demand.dtypes

datetime    datetime64[ns]
demand               int64
ba_code             object
dtype: object

In [34]:
dtf = DatetimeFeatures(
    # the datetime variable
    variables="datetime",
    # the features we want to create
    features_to_extract=[
        "month",
        "week",
        "day_of_week",
        "day_of_month",
        "weekend",
    ],
    drop_original=False
)

demand = dtf.fit_transform(demand)
demand.head()

Unnamed: 0,datetime,demand,ba_code,datetime_month,datetime_week,datetime_day_of_week,datetime_day_of_month,datetime_weekend
0,2022-10-01,51628,AECI,10,39,5,1,1
1,2022-10-02,53127,AECI,10,39,6,2,1
2,2022-10-03,54708,AECI,10,40,0,3,0
3,2022-10-04,53345,AECI,10,40,1,4,0
4,2022-10-05,53356,AECI,10,40,2,5,0


In [None]:
def lag_features(df: pd.DataFrame, lags: list[int]) -> pd.DataFrame:
    # Unique bas
    ba_codes = df["ba_code"].unique()

    # Define lagfeatures object
    lf = LagFeatures(
        variables="demand",
        periods=lags,
        drop_original=False,
    )
    
    # Iterate over each BA, calc lags and concatenate
    output = pd.DataFrame()
    for ba_code in ba_codes:
        tmp = df.loc[df["ba_code"] == ba_code, :].copy()
        tmp = lf.fit_transform(tmp)
        tmp["ba_code"] = ba_code
        output = pd.concat([output, tmp])
    
    return output

In [36]:
df = demand.copy()

df = lag_features(df, lags=[1,2,3])

In [37]:
df.head()

Unnamed: 0,datetime,demand,ba_code,datetime_month,datetime_week,datetime_day_of_week,datetime_day_of_month,datetime_weekend,demand_lag_1,demand_lag_2,demand_lag_3
0,2022-10-01,51628,AECI,10,39,5,1,1,,,
1,2022-10-02,53127,AECI,10,39,6,2,1,51628.0,,
2,2022-10-03,54708,AECI,10,40,0,3,0,53127.0,51628.0,
3,2022-10-04,53345,AECI,10,40,1,4,0,54708.0,53127.0,51628.0
4,2022-10-05,53356,AECI,10,40,2,5,0,53345.0,54708.0,53127.0


In [38]:
df.tail()

Unnamed: 0,datetime,demand,ba_code,datetime_month,datetime_week,datetime_day_of_week,datetime_day_of_month,datetime_weekend,demand_lag_1,demand_lag_2,demand_lag_3
58955,2025-02-22,2342,WAUW,2,8,5,22,1,2589.0,3026.0,3287.0
58956,2025-02-23,2161,WAUW,2,8,6,23,1,2342.0,2589.0,3026.0
58957,2025-02-24,2088,WAUW,2,9,0,24,0,2161.0,2342.0,2589.0
58958,2025-02-25,2064,WAUW,2,9,1,25,0,2088.0,2161.0,2342.0
58959,2025-02-26,2080,WAUW,2,9,2,26,0,2064.0,2088.0,2161.0
