In [1]:
import pandas as pd

In [2]:
df = pd.read_csv(
    "data/consumption.csv", usecols=["prediction_unit_id", "datetime", "target"]
)[["prediction_unit_id", "datetime", "target"]].rename(
    columns={"prediction_unit_id": "unique_id", "datetime": "ds", "target": "y"}
)
df["ds"] = pd.to_datetime(df["ds"])
df.head()

Unnamed: 0,unique_id,ds,y
0,0,2021-09-01,96.59
1,1,2021-09-01,17.314
2,2,2021-09-01,656.859
3,3,2021-09-01,59.0
4,4,2021-09-01,501.76


In [3]:
df["y"] = df["y"].interpolate(method="linear")
df.isna().sum()

unique_id    0
ds           0
y            0
dtype: int64

# Train/Test split

In [14]:
# taking the last 48 hours for test
for i in df["unique_id"].unique():
    if i == df["unique_id"].unique()[0]:
        df_test = df[df["unique_id"]==i][-48:]
        continue
    df_test = pd.concat([df_test, df[df["unique_id"]==i][-48:]])
    
df_test.sort_index(inplace=True)
print(df_test.shape)
df_test.head()

(3312, 3)


Unnamed: 0,unique_id,ds,y
906150,26,2023-03-28 00:00:00,5.137
906217,26,2023-03-28 01:00:00,4.212
906284,26,2023-03-28 02:00:00,4.917
906351,26,2023-03-28 03:00:00,4.195
906418,26,2023-03-28 04:00:00,4.518


In [50]:
train_idx = [idx for idx in df.index if idx not in df_test.index]
df_train = df.loc[train_idx]
df_train.shape
df_train.head()

Unnamed: 0,unique_id,ds,y
0,0,2021-09-01,96.59
1,1,2021-09-01,17.314
2,2,2021-09-01,656.859
3,3,2021-09-01,59.0
4,4,2021-09-01,501.76


In [47]:
df.shape[0] == df_train.shape[0] + df_test.shape[0]

True

In [49]:
df.shape[1] == df_train.shape[1] == df_test.shape[1]

True

# Training

In [6]:
!pip install mlforecast lightgbm

Collecting mlforecast
  Downloading mlforecast-0.12.0-py3-none-any.whl.metadata (11 kB)
Collecting lightgbm
  Downloading lightgbm-4.3.0.tar.gz (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting coreforecast>=0.0.7 (from mlforecast)
  Downloading coreforecast-0.0.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting utilsforecast>=0.0.27 (from mlforecast)
  Downloading utilsforecast-0.1.2-py3-none-any.whl.metadata (7.4 kB)
Collecting window-ops (from mlforecast)
  Downloading window_ops-0.0.15-py3-none-any.whl.metadata (6.8 kB)
Downloading mlforecast-0.12.0-py3-none-any.whl (57 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [7]:
from mlforecast import MLForecast
from mlforecast.lag_transforms import ExpandingMean, RollingMean
from mlforecast.target_transforms import Differences

In [9]:
import lightgbm as lgb

lgb_params = {
    'verbosity': -1,
    'num_leaves': 512,
}

fcst = MLForecast(
    models={
        'avg': lgb.LGBMRegressor(**lgb_params),
        'q75': lgb.LGBMRegressor(**lgb_params, objective='quantile', alpha=0.75),
        'q25': lgb.LGBMRegressor(**lgb_params, objective='quantile', alpha=0.25),
    },
    freq="h",
    target_transforms=[Differences([24])],
    lags=[1, 24],
    lag_transforms={
        1: [ExpandingMean()],
        24: [RollingMean(window_size=48)],
    },
    date_features=[hour_index],
)

In [10]:
# fcst = MLForecast(
#     models=models,
#     freq='h',
#     lags=[i+1 for i in range(25)],
# #     lag_transforms={
# #         1: [ExpandingMean()],
# #         7: [RollingMean(window_size=28)]
# #     },
#     date_features=["month", "dayofweek"],
# #     target_transforms=[Differences([1])],
# )


In [12]:
import time

start = time.time()
fcst.fit(df)
end = time.time()
t = end - start
t

19.33337926864624