In [None]:
import polars as pl
import plotly.express as px
import seaborn as sns
from utilsforecast.plotting import plot_series
from statsforecast import StatsForecast
from mlforecast import MLForecast
from mlforecast.lag_transforms import RollingMean
from utilsforecast.losses import *
from utilsforecast.evaluation import evaluate
from plotly.subplots import make_subplots
import pandas as pd

import plotly.graph_objects as go


In [None]:
data = pl.read_parquet(
    "data/london_smart_meters/preprocessed/london_smart_meters_merged_block_0-7.parquet"
)
timestamp = data.group_by("LCLid").agg(
    pl.datetime_range(
        start=pl.col("start_timestamp"),
        end=pl.col("start_timestamp").dt.offset_by(
            pl.format("{}m", pl.col("series_length").sub(1).mul(30))
        ),
        interval="30m",
    ).alias("ds"),
)
data = timestamp.join(data, on="LCLid", how="inner").rename(
    {"LCLid": "unique_id", "energy_consumption": "y"}
)
data.head(5)

In [None]:
id_ = "unique_id"
time_ = "ds"
target_ = "y"
id_col = pl.col(id_)
time_col = pl.col(time_)
target_col = pl.col(target_)

In [None]:
data = (
    data.filter(pl.col("file").eq("block_7"))
    .select(
        [
            time_,
            id_,
            target_,
            "Acorn",
            "Acorn_grouped",
            "holidays",
            "visibility",
            "windBearing",
            "temperature",
            "dewPoint",
            "pressure",
            "apparentTemperature",
            "windSpeed",
            "precipType",
            "icon",
            "humidity",
            "summary",
        ]
    )
    .explode(
        [
            time_,
            target_,
            "holidays",
            "visibility",
            "windBearing",
            "temperature",
            "dewPoint",
            "pressure",
            "apparentTemperature",
            "windSpeed",
            "precipType",
            "icon",
            "humidity",
            "summary",
        ]
    )
)
data.head()

In [None]:
selected_id = "MAC000193"
data = data.filter(id_col.eq(selected_id)).with_columns(
    target_col.forward_fill().backward_fill()
)
data.head()

In [None]:
# TODO: plot autocorrelation: follow https://nixtlaverse.nixtla.io/statsforecast/docs/models/autoarima.html

In [None]:
from statsforecast.models import AutoARIMA
from statsforecast.arima import arima_string

fcst = StatsForecast(
    models=[AutoARIMA(season_length=48 * 7)],
    freq="30m",
)

In [None]:
y_hat = fcst.cross_validation(
    df=data.select([id_, time_, target_]),
    h=48,
    step_size=1,
    n_windows=1,
).drop("cutoff")

In [None]:
arima_string(sf.fitted_[0,0].model_)

In [None]:
from functools import partial

metrics = [
    mae,
    mse,
    rmse,
    mape,
    smape,
    partial(mase, seasonality=48 * 7),
]
evaluate(
    y_hat,
    metrics=metrics,
    train_df=data.select([id_, time_, target_]),
)

In [None]:
plot_series(data, y_hat, max_insample_length=200, engine="plotly")