In [None]:
import polars as pl
from utilsforecast.plotting import plot_series
from statsforecast import StatsForecast
from utilsforecast.losses import *
from utilsforecast.evaluation import evaluate
import numpy as np
from plotly.subplots import make_subplots
from statsmodels.stats.diagnostic import acorr_ljungbox


In [None]:
data = pl.read_parquet(
    "data/london_smart_meters/preprocessed/london_smart_meters_merged_block_0-7.parquet"
)
timestamp = data.group_by("LCLid").agg(
    pl.datetime_range(
        start=pl.col("start_timestamp"),
        end=pl.col("start_timestamp").dt.offset_by(
            pl.format("{}m", pl.col("series_length").sub(1).mul(30))
        ),
        interval="30m",
    ).alias("ds"),
)
data = timestamp.join(data, on="LCLid", how="inner").rename(
    {"LCLid": "unique_id", "energy_consumption": "y"}
)
data.head(5)

In [None]:
id_ = "unique_id"
time_ = "ds"
target_ = "y"
id_col = pl.col(id_)
time_col = pl.col(time_)
target_col = pl.col(target_)

In [None]:
data = (
    data.filter(pl.col("file").eq("block_7"))
    .select(
        [
            time_,
            id_,
            target_,
            "Acorn",
            "Acorn_grouped",
            "holidays",
            "visibility",
            "windBearing",
            "temperature",
            "dewPoint",
            "pressure",
            "apparentTemperature",
            "windSpeed",
            "precipType",
            "icon",
            "humidity",
            "summary",
        ]
    )
    .explode(
        [
            time_,
            target_,
            "holidays",
            "visibility",
            "windBearing",
            "temperature",
            "dewPoint",
            "pressure",
            "apparentTemperature",
            "windSpeed",
            "precipType",
            "icon",
            "humidity",
            "summary",
        ]
    )
)
data.head()

In [None]:
selected_id = "MAC000193"
data = data.filter(pl.col(id_).eq(selected_id))
data.head()

In [None]:
from statsforecast.models import (
    Naive,
    SeasonalNaive,
    WindowAverage,
    HistoricAverage,
    RandomWalkWithDrift,
)

fcst = StatsForecast(
    models=[
        Naive(),
        HistoricAverage(),
        SeasonalNaive(season_length=48 * 7),
        # WindowAverage(window_size=48),
        RandomWalkWithDrift(),
    ],
    freq="30m",
)

In [None]:
y_hat = fcst.cross_validation(
    df=data.select([id_, time_, target_col.forward_fill()]).to_pandas(),
    fitted=True,
    h=48,
    n_windows=1,
    step_size=48,
).drop(columns="cutoff")

In [None]:
from functools import partial

metrics = [
    mae,
    mse,
    rmse,
    mape,
    smape,
    partial(mase, seasonality=48 * 7),
]
evaluate(
    y_hat,
    metrics=metrics,
    train_df=data.select([id_, time_, target_]).to_pandas(),
)

In [None]:
plot_series(data.to_pandas(), y_hat, max_insample_length=200, engine="plotly")

In [None]:
fitted_values = pl.from_pandas(fcst.cross_validation_fitted_values())

In [None]:
models = [
    "Naive",
    "HistoricAverage",
    "SeasonalNaive",
    "RWD",
]
residual_values = fitted_values.with_columns(pl.col(models).sub(target_))
residual_values.head()

In [None]:
model = "RWD"

residuals = residual_values.get_column(model).drop_nulls().to_numpy()
time = residual_values.get_column(time_).drop_nulls().to_numpy()

acf_values, confidence_interval = acf(
    residual_values.get_column(model).drop_nulls(),
    nlags=40,
    fft=True,
    missing="drop",
    alpha=0.05,
)
fig = make_subplots(
    rows=2,
    cols=2,
    subplot_titles=("Innovation Residuals", "ACF Plot", "Histogram"),
    specs=[[{"colspan": 2}, None], [{}, {}]],
)

# Line plot of residuals
fig.add_trace(
    go.Scatter(
        x=time,
        y=residuals,
        mode="lines",
        name="Residuals",
        line=dict(color="blue"),
    ),
    row=1,
    col=[1],
)

# ACF plot
for x in range(len(acf_values)):
    fig.add_trace(
        go.Scatter(
            x=[x, x],
            y=[0, acf_values[x]],
            mode="lines",
            line=dict(color="#3f3f3f"),
            showlegend=False,
        ),
        row=2,
        col=1,
    )
fig.add_scatter(
    x=np.arange(len(acf_values)),
    y=acf_values,
    mode="markers",
    marker_color="#1f77b4",
    marker_size=6,
    row=2,
    col=1,
)
fig.add_trace(
    go.Scatter(
        x=np.arange(len(acf_values)),
        y=lower_y,
        mode="lines",
        line=dict(color="rgba(255,255,255,0)"),
        showlegend=False,
    ),
    row=2,
    col=1,
)
fig.add_trace(
    go.Scatter(
        x=np.arange(len(acf_values)),
        y=confidence_interval[:, 1] - acf_values,
        mode="lines",
        fill="tonexty",
        fillcolor="rgba(32, 146, 230,0.3)",
        line=dict(color="rgba(255,255,255,0)"),
        showlegend=False,
    ),
    row=2,
    col=1,
)

# Histogram of residuals
fig.add_trace(
    go.Histogram(
        x=residuals,
        nbinsx=30,
        name="Residuals",
        marker=dict(color="blue", line=dict(color="black")),
    ),
    row=2,
    col=2,
)

# Update layout
fig.update_layout(
    height=800, width=1200, title_text="Residual Diagnostics", showlegend=True
)
fig.update_layout(showlegend=False)
fig.show()

In [None]:
resid_test = acorr_ljungbox(residuals, boxpierce=True)
resid_test