In [None]:
import pandas as pd
import numpy as np
import statsmodels.tsa.seasonal
from IPython.display import display
import plotly.express as px

In [None]:
def run_sequence_plot(x, y, title, xtitle, ytitle):
    fig = px.line(x=x, y=y)
    fig.update_layout(title=title, width=800, xaxis_title=xtitle, yaxis_title=ytitle)
    fig.show()


## Read parquet dataset

In [None]:
df = pd.read_parquet("data/household.parquet")

# Exclude data before July 2007
df = df.loc["2007-07":]

In [None]:
df_monthly = df.resample("M").quantile(0.99)

In [None]:
var = "Global_active_power"
run_sequence_plot(
    df_monthly.index, df_monthly[var], f"Monthly 99% percentile {var}", "Time", f"{var}"
)

## Additive decomposition of monthly data
Use [statsmodels.tsa.seasonal.seasonal_decompose](https://www.statsmodels.org/dev/generated/statsmodels.tsa.seasonal.seasonal_decompose.html?highlight=seasonal_decompose#statsmodels.tsa.seasonal.seasonal_decompose) to analyze the time series with an *additive* model with period of 12 months.

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

ss_decomposition = seasonal_decompose(x=df_monthly[var], model="additive", period=12)
estimated_trend = ss_decomposition.trend
estimated_seasonal = ss_decomposition.seasonal
estimated_residual = ss_decomposition.resid

In [None]:
def plot_seasonal_decomposition(
    original_time_series: pd.Series, seasonal_decomposition
):
    from plotly.subplots import make_subplots
    import plotly.graph_objects as go

    fig = make_subplots(
        rows=4,
        cols=1,
        subplot_titles=["Original series", "Trend", "Seasonality", "Residual"],
    )

    fig.add_trace(
        go.Scatter(
            x=original_time_series.index,
            y=original_time_series,
            mode="lines",
            name="original",
        ),
        row=1,
        col=1,
    )

    fig.add_trace(
        go.Scatter(
            x=original_time_series.index,
            y=seasonal_decomposition.trend,
            mode="lines",
            name="trend",
        ),
        row=2,
        col=1,
    )

    fig.add_trace(
        go.Scatter(
            x=original_time_series.index,
            y=seasonal_decomposition.seasonal,
            mode="lines",
            name="seasonal",
        ),
        row=3,
        col=1,
    )

    fig.add_trace(
        go.Scatter(
            x=original_time_series.index,
            y=seasonal_decomposition.resid,
            mode="lines",
            name="residual",
        ),
        row=4,
        col=1,
    )

    fig.update_layout(
        width=800, height=1200, title="Time series decomposition using moving averages"
    )
    return fig

In [None]:
fig = plot_seasonal_decomposition(df_monthly[var], ss_decomposition)
fig.show()

## Residual distribution and Augmented Dickey-Fuller test
Residual are normally distributed and Augmented Dickey-Fuller is positive for a stationary series.

In [None]:
fig = px.histogram(ss_decomposition.resid)
fig.update_layout(width=800)
fig.show()

In [None]:
from statsmodels.tsa.stattools import adfuller

adf, pvalue, usedlag, nobs, critical_values, icbest = adfuller(
    ss_decomposition.resid[~ss_decomposition.resid.isna()], autolag="AIC"
)
print(f"Test statistic: {adf:.3f}, pvalue: {pvalue:.3f}")
print(f"Critical values: {critical_values}")
print(f"Observations: {nobs}, Used Lag: {usedlag}")