In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import polars as pl
import plotly.express as px
import seaborn as sns
from utilsforecast.plotting import plot_series
from statsforecast import StatsForecast
from mlforecast import MLForecast
from mlforecast.lag_transforms import RollingMean
from utilsforecast.losses import *
from utilsforecast.evaluation import evaluate
from plotly.subplots import make_subplots
import pandas as pd
import matplotlib.pyplot as plt

import plotly.graph_objects as go


In [None]:
data = pl.read_parquet(
    "data/london_smart_meters/preprocessed/london_smart_meters_merged_block_0-7.parquet"
)
timestamp = data.group_by("LCLid").agg(
    pl.datetime_range(
        start=pl.col("start_timestamp"),
        end=pl.col("start_timestamp").dt.offset_by(
            pl.format("{}m", pl.col("series_length").sub(1).mul(30))
        ),
        interval="30m",
    ).alias("ds"),
)
data = timestamp.join(data, on="LCLid", how="inner").rename(
    {"LCLid": "unique_id", "energy_consumption": "y"}
)
data.head(5)

In [None]:
id_ = "unique_id"
time_ = "ds"
target_ = "y"
id_col = pl.col(id_)
time_col = pl.col(time_)
target_col = pl.col(target_)

In [None]:
data = (
    data.filter(pl.col("file").eq("block_7"))
    .select(
        [
            time_,
            id_,
            target_,
            "Acorn",
            "Acorn_grouped",
            "holidays",
            "visibility",
            "windBearing",
            "temperature",
            "dewPoint",
            "pressure",
            "apparentTemperature",
            "windSpeed",
            "precipType",
            "icon",
            "humidity",
            "summary",
        ]
    )
    .explode(
        [
            time_,
            target_,
            "holidays",
            "visibility",
            "windBearing",
            "temperature",
            "dewPoint",
            "pressure",
            "apparentTemperature",
            "windSpeed",
            "precipType",
            "icon",
            "humidity",
            "summary",
        ]
    )
)
data.head()

In [None]:
selected_id = "MAC000193"
data = data.filter(id_col.eq(selected_id)).with_columns(
    target_col.forward_fill().backward_fill()
)
data.head()

In [None]:
from statsforecast.models import (
    SimpleExponentialSmoothing,
    SeasonalExponentialSmoothing,
    Holt,
    AutoETS,
    MSTL,
)

fcst = StatsForecast(
    models=[
        AutoETS(season_length=48 * 7, model="ANN", alias="SES"),
        AutoETS(season_length=48 * 7, model="AAN", alias="Holt"),
        AutoETS(
            season_length=48 * 7,
            model="AAN",
            damped=True,
            phi=0.9,
            alias="Damped Holt's method",
        ),
        AutoETS(season_length=48 * 7, model="ANA", alias="seasonal"),
        AutoETS(season_length=48 * 7, model="AAA", alias="additiveHW"),
        AutoETS(
            season_length=48 * 7,
            model="AAA",
            damped=True,
            alias="Damped additiveHW",
        ),
    ],
    freq="30m",
)

In [None]:
y_hat = fcst.cross_validation(
    df=data.select([id_, time_, target_col.forward_fill()]),
    h=48,
    step_size=1,
    n_windows=1,
    fitted=True,
).drop("cutoff")

In [None]:
fcst.fit(
    df=data.select([id_, time_, target_col.forward_fill()]),
)

In [None]:
additiveHW = fcst.fitted_[0, -2].model_
damped_additiveHW = fcst.fitted_[0, -1].model_

In [None]:
print(additiveHW["states"].shape[0])
print(data.shape[0])
print(
    "States components have at least 1 more observation than the data due to initial states"
)

In [None]:
from plotly.subplots import make_subplots

import plotly.graph_objects as go

fig = make_subplots(
    rows=5,
    cols=2,
    shared_xaxes=True,
    subplot_titles=[
        "ETS(A,A,A) decomposition \n Additive seasonality",
        "ETS(A,A,A) decomposition \n Additive seasonality and damped trend",
        "Level",
        "Level",
        "Slope",
        "Slope",
        "Season",
        "Season",
        "Residual",
        "Residual",
    ],
)

# Additive seasonality
fig.add_trace(
    go.Scatter(y=data.get_column(target_), x=data.get_column(time_), name="Trips"),
    row=1,
    col=1,
)
fig.add_trace(
    go.Scatter(y=additiveHW["states"][:, 0], x=data.get_column(time_), name="Level"),
    row=2,
    col=1,
)
fig.add_trace(
    go.Scatter(y=additiveHW["states"][:, 1], x=data.get_column(time_), name="Slope"),
    row=3,
    col=1,
)
fig.add_trace(
    go.Scatter(y=additiveHW["states"][:, 2], x=data.get_column(time_), name="Season"),
    row=4,
    col=1,
)
fig.add_trace(
    go.Scatter(y=additiveHW["residuals"], x=data.get_column(time_), name="Residual"),
    row=5,
    col=1,
)

# Multiplicative seasonality
fig.add_trace(go.Scatter(y=data.get_column(target_), name="Trips"), row=1, col=2)
fig.add_trace(
    go.Scatter(y=damped_additiveHW["states"][:, 0], name="Level"), row=2, col=2
)
fig.add_trace(
    go.Scatter(y=damped_additiveHW["states"][:, 1], name="Slope"), row=3, col=2
)
fig.add_trace(
    go.Scatter(y=damped_additiveHW["states"][:, 2], name="Season"), row=4, col=2
)
fig.add_trace(
    go.Scatter(
        y=damped_additiveHW["residuals"], x=data.get_column(time_), name="Residual"
    ),
    row=5,
    col=2,
)

# Update layout
fig.update_layout(
    height=1200, width=1400, title_text="ETS Decomposition", showlegend=False
)
fig.show()

In [None]:
from functools import partial

metrics = [
    mae,
    mse,
    rmse,
    mape,
    smape,
    partial(mase, seasonality=48 * 7),
]
evaluate(
    y_hat,
    metrics=metrics,
    train_df=data.select([id_, time_, target_]),
)

In [None]:
fig = fcst.plot(data, y_hat, max_insample_length=200, engine="plotly")
fig.update_layout(height=600, width=1400)
fig.show()