In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from functools import partial
import polars as pl

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsforecast import StatsForecast
from statsforecast.models import (
    AutoARIMA,
    AutoTheta,
    DynamicTheta,
    DynamicOptimizedTheta,
    Theta,
    OptimizedTheta,
    TBATS,
    AutoTBATS,
    MSTL,
)
from statsmodels.stats.diagnostic import acorr_ljungbox
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

from utilsforecast.evaluation import evaluate
from utilsforecast.losses import rmse, mae, mape, mase, mse, smape
from plotting_utils import (
    plotly_series as plot_series,
    plot_residuals_diagnostic,
    plot_real_data_vs_insample_forecast,
)
from summary_utils import (
    print_arima_fitted_summary,
    print_regression_summary_from_model,
    get_fitted_residuals,
)

from prophet import Prophet

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

from mlforecast import MLForecast
from mlforecast.utils import PredictionIntervals

from utilsforecast.feature_engineering import fourier, pipeline
from scipy import stats

In [None]:
data = pl.read_parquet(
    "data/london_smart_meters/preprocessed/london_smart_meters_merged_block_0-7.parquet"
)
timestamp = data.group_by("LCLid").agg(
    pl.datetime_range(
        start=pl.col("start_timestamp"),
        end=pl.col("start_timestamp").dt.offset_by(
            pl.format("{}m", pl.col("series_length").sub(1).mul(30))
        ),
        interval="30m",
    ).alias("ds"),
)
data = timestamp.join(data, on="LCLid", how="inner").rename(
    {"LCLid": "unique_id", "energy_consumption": "y"}
)
data.head(5)

In [None]:
id_ = "unique_id"
time_ = "ds"
target_ = "y"
id_col = pl.col(id_)
time_col = pl.col(time_)
target_col = pl.col(target_)

In [None]:
data = (
    data.filter(pl.col("file").eq("block_7"))
    .select(
        [
            time_,
            id_,
            target_,
            "Acorn",
            "Acorn_grouped",
            "holidays",
            "visibility",
            "windBearing",
            "temperature",
            "dewPoint",
            "pressure",
            "apparentTemperature",
            "windSpeed",
            "precipType",
            "icon",
            "humidity",
            "summary",
        ]
    )
    .explode(
        [
            time_,
            target_,
            "holidays",
            "visibility",
            "windBearing",
            "temperature",
            "dewPoint",
            "pressure",
            "apparentTemperature",
            "windSpeed",
            "precipType",
            "icon",
            "humidity",
            "summary",
        ]
    )
)
data.head()

In [None]:
selected_id = "MAC000193"
data = data.filter(id_col.eq(selected_id)).with_columns(
    target_col.forward_fill().backward_fill()
)
data.head()

In [None]:
sf = StatsForecast(
    models=[
        Theta(season_length=48, decomposition_type="additive"),
        TBATS(season_length=[48, 48 * 7, 48 * 365]),
        MSTL(season_length=[48, 48 * 7, 48 * 365]),
    ],
    freq="30m",
)

In [None]:
y_hat = sf.cross_validation(
    df=data.select([id_, time_, target_]),
    h=48 * 7,
    step_size=1,
    n_windows=1,
).drop("cutoff")

In [None]:
from functools import partial

metrics = [
    mae,
    mse,
    rmse,
    mape,
    smape,
    partial(mase, seasonality=48),
]
evaluate(
    y_hat,
    metrics=metrics,
    train_df=data.select([id_, time_, target_]),
)

In [None]:
plot_series(data, y_hat, max_insample_length=200)