In [None]:
import polars as pl
import plotly.express as px
import seaborn as sns
from utilsforecast.plotting import plot_series
from statsforecast import StatsForecast
from mlforecast import MLForecast
from mlforecast.lag_transforms import RollingMean
from utilsforecast.losses import *
from utilsforecast.evaluation import evaluate
from statsmodels.tsa.seasonal import STL, seasonal_decompose, MSTL
from coreforecast.scalers import boxcox, boxcox_lambda
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np

import plotly.graph_objects as go
from tsfeatures import acf_features, tsfeatures, stl_features


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go


def decomposition_plot(ts_index, observed=None, seasonal=None, trend=None, resid=None):
    """Plots the decomposition output"""
    series = []
    if observed is not None:
        series += ["Original"]
    if trend is not None:
        series += ["Trend"]
    if seasonal is not None:
        series += ["Seasonal"]
    if resid is not None:
        series += ["Residual"]
    if len(series) == 0:
        raise ValueError(
            "All component flags were off. Need atleast one of the flags turned on to plot."
        )
    fig = make_subplots(
        rows=len(series), cols=1, shared_xaxes=True, subplot_titles=series
    )
    x = ts_index
    row = 1
    if observed is not None:
        fig.append_trace(go.Scatter(x=x, y=observed, name="Original"), row=row, col=1)
        row += 1
    if trend is not None:
        fig.append_trace(go.Scatter(x=x, y=trend, name="Trend"), row=row, col=1)
        row += 1
    if seasonal is not None:
        fig.append_trace(
            go.Scatter(x=x, y=seasonal, name="Seasonal"),
            row=row,
            col=1,
        )
        row += 1
    if resid is not None:
        fig.append_trace(go.Scatter(x=x, y=resid, name="Residual"), row=row, col=1)
        row += 1

    fig.update_layout(
        title_text="Seasonal Decomposition",
        autosize=False,
        width=1200,
        height=700,
        title={"x": 0.5, "xanchor": "center", "yanchor": "top"},
        legend_title=None,
        showlegend=False,
        legend=dict(
            font=dict(size=15),
            orientation="h",
            yanchor="bottom",
            y=0.98,
            xanchor="right",
            x=1,
        ),
    )
    return fig

In [None]:
data = pl.read_parquet(
    "data/london_smart_meters/preprocessed/london_smart_meters_merged_block_0-7.parquet"
)
timestamp = data.group_by("LCLid").agg(
    pl.datetime_range(
        start=pl.col("start_timestamp"),
        end=pl.col("start_timestamp").dt.offset_by(
            pl.format("{}m", pl.col("series_length").sub(1).mul(30))
        ),
        interval="30m",
    ).alias("ds"),
)
data = timestamp.join(data, on="LCLid", how="inner").rename(
    {"LCLid": "unique_id", "energy_consumption": "y"}
)
data.head(5)

In [None]:
id_ = "unique_id"
time_ = "ds"
target_ = "y"
id_col = pl.col(id_)
time_col = pl.col(time_)
target_col = pl.col(target_)

In [None]:
data = (
    data.filter(pl.col("file").eq("block_7"))
    .select(
        [
            time_,
            id_,
            target_,
            "Acorn",
            "Acorn_grouped",
            "holidays",
            "visibility",
            "windBearing",
            "temperature",
            "dewPoint",
            "pressure",
            "apparentTemperature",
            "windSpeed",
            "precipType",
            "icon",
            "humidity",
            "summary",
        ]
    )
    .explode(
        [
            time_,
            target_,
            "holidays",
            "visibility",
            "windBearing",
            "temperature",
            "dewPoint",
            "pressure",
            "apparentTemperature",
            "windSpeed",
            "precipType",
            "icon",
            "humidity",
            "summary",
        ]
    )
)
data.head()

In [None]:
selected_id = "MAC000193"
data = data.filter(pl.col(id_).eq(selected_id))
data.head()

In [None]:
# decomposition following modern time series analysis, https://www.statsforecast.org/en/latest/ts_decomposition.html
# decompose the time series into trend, seasonality and residuals
# mstl
# fourier series
# strength of components

In [None]:
plot_series(
    data,
    max_insample_length=1000,
    engine="plotly",
)

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
res = seasonal_decompose(
    data.select(target_col.forward_fill()),
    period=7 * 48,
    model="additive",
    extrapolate_trend="freq",
    filt=np.repeat(1 / (30 * 48), 30 * 48),
)

In [None]:
fig = decomposition_plot(
    data.get_column(time_),
    res.observed,
    res.seasonal,
    res.trend,
    res.resid,
)
fig.show()

In [None]:
observed = res.observed
seasonal = res.seasonal
seasonally_adjusted = observed - seasonal

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

# Add observed data
fig.add_trace(
    go.Scatter(
        x=data.get_column(time_),
        y=observed,
        mode="lines",
        name="Observed",
    )
)

# Add seasonally adjusted data
fig.add_trace(
    go.Scatter(
        x=data.get_column(time_),
        y=seasonally_adjusted,
        mode="lines",
        name="Seasonally Adjusted",
    )
)

# Update layout
fig.update_layout(
    title="Observed vs Seasonally Adjusted",
    xaxis_title="Time",
    yaxis_title="Value",
    autosize=False,
    width=1200,
    height=600,
    legend=dict(
        font=dict(size=12),
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
    ),
)
fig.update_xaxes(type="date", range=["2012-11-01", "2012-12-31"])

fig.show()

- "trend" component indicates another seasonality components not captured

In [None]:
fig.update_xaxes(type="date", range=["2012-11-4", "2012-12-4"])

- clear seasonality
- residual is not whitenoise so there's some more pattern to be captured

In [None]:
stl = STL(
    data.select(target_col.forward_fill()),
    period=7 * 48,
)
res = stl.fit()

In [None]:
fig = decomposition_plot(
    data.get_column(time_),
    res.observed.to_numpy().squeeze(),
    res.seasonal,
    res.trend,
    res.resid,
)
fig.show()

In [None]:
fig.update_xaxes(type="date", range=["2012-11-4", "2012-12-4"])

In [None]:
stl = MSTL(
    data.select(target_col.forward_fill()),
    periods=[48, 7 * 48],
)
res = stl.fit()

In [None]:
fig = decomposition_plot(
    data.get_column(time_),
    res.observed,
    res.seasonal[:, 0],
    res.trend,
    res.resid,
)
fig.update_xaxes(type="date", range=["2012-11-4", "2012-12-4"])

In [None]:
fig = decomposition_plot(
    data.get_column(time_),
    res.observed,
    res.seasonal[:, 1],
    res.trend,
    res.resid,
)
fig.update_xaxes(type="date", range=["2012-11-4", "2012-12-4"])

In [None]:
resdual_var = res.resid.var()
trend_residual_var = (res.trend + res.resid).var()
seasonal_residual_var = (res.seasonal + np.expand_dims(res.resid, axis=1)).var(axis=0)

In [None]:
trend_strength = max(0, 1 - resdual_var / (trend_residual_var))
seasonal_strength = 1 - resdual_var / (seasonal_residual_var)

In [None]:
trend_strength

In [None]:
seasonal_strength

In [None]:
tsfeatures(
    data.select(target_col.forward_fill(), time_, id_).to_pandas(),
    freq=7 * 48,
    features=[stl_features],
)

In [None]:
tsfeatures(
    data.select(target_col.forward_fill(), time_, id_).to_pandas(),
    freq=48,
    features=[stl_features],
)

In [None]:
from statsmodels.nonparametric.smoothers_lowess import lowess

In [None]:
y = data.select(target_col.forward_fill()).to_numpy().squeeze()
trend = lowess(
    y,
    np.arange(len(y)),
    frac=0.1,
    it=0,
    return_sorted=False,
)

In [None]:
px.line(y=[y, trend])

In [None]:
detrended = y - trend
period = 7 * 48
period_averages = np.array([np.nanmean(detrended[i::period]) for i in range(period)])
period_averages -= np.mean(period_averages)
seasonal = np.tile(period_averages, len(detrended) // period + 1)[: len(detrended)]

In [None]:
px.line(y=[trend, seasonal])