In [1]:
from datasetsforecast.m4 import M4

from statsmodels.tsa.stattools import acf, pacf

import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

from statsmodels.tsa.stattools import kpss, range_unit_root_test

In [2]:
df_all, *_ = M4.load('./data', 'Hourly')
df_all['ds'] = df_all['ds'].astype(int)

# Just pick out the first few datasets to work with.
n_series = 20
uids = df_all['unique_id'].unique()[:n_series]
df = df_all.query('unique_id in @uids').copy()

df.head()

Unnamed: 0,unique_id,ds,y
0,H1,1,605.0
1,H1,2,586.0
2,H1,3,586.0
3,H1,4,559.0
4,H1,5,511.0


In [3]:
ds = df.query("unique_id == 'H100'").copy()

plot = go.Scatter(x=ds['ds'], y=ds['y'], mode='lines', name='H100 Time Series')

### Differencing
---
We have a time series and wish to make it stationary. What we could do is take the trend and make a new time series.

$$y'_t = y_t - y_{t-1}$$

In backshift notation, it would be this.

$$y'_t = y_t - By_t$$

![](/images/chapter_9/data_diff.png.png)

In [4]:
ds['y_diff_1'] = ds['y'].diff(periods=1)
ds['y_diff_2'] = ds['y'].diff(periods=2)

fig = make_subplots(rows=3, cols=1, subplot_titles=('H100', 'H100 1st Difference', 'H100 2nd Difference'))

plot_diff_1 = go.Scatter(x=ds['ds'], y=ds['y_diff_1'], mode='lines')
plot_diff_2 = go.Scatter(x=ds['ds'], y=ds['y_diff_2'], mode='lines')

fig.add_trace(plot, row=1, col=1)
fig.add_trace(plot_diff_1, row=2, col=1)
fig.add_trace(plot_diff_2, row=3, col=1)

fig.update_layout(
    showlegend=False, 
    height=900, width=1000, 
    title_text="H100 Time Series Dataset with 1st and 2nd Differences"
    )
fig.show()
fig.write_image('./data_diff.png')

In [5]:
acf_values = acf(ds['y'], nlags=40)
pacf_values = pacf(ds['y'], nlags=40)

lags = list(range(len(acf_values)))

# Create Plotly figure
fig = make_subplots(rows=2, cols=1, subplot_titles=('H100 ACF', 'H100 PACF'))

# Add ACF values as a bar chart
fig.add_trace(go.Bar(x=lags, y=acf_values), row=1, col=1)
fig.add_trace(go.Bar(x=lags, y=pacf_values), row=2, col=1)

# Customize the layout
fig.update_layout(
    showlegend=False,
    title='Autocorrelation Function (ACF) Plot',
    xaxis_title='Lag',
    yaxis_title='ACF Value',
    template='plotly_white',
    height=600, width=1000
)

# Show the figure
fig.show()

In [6]:
ds['y_season_diff'] = ds['y'].diff(periods=23)

fig = make_subplots(rows=2, cols=1, subplot_titles=('H100', 'H100 Seasonally-Adjusted'))

plot_season_adjust = go.Scatter(x=ds['ds'], y=ds['y_season_diff'], mode='lines')

fig.add_trace(plot, row=1, col=1)
fig.add_trace(plot_season_adjust, row=2, col=1)

fig.update_layout(
    showlegend=False, 
    height=600, width=1000,
    title_text="H100 with Seasonal Adjustment")
fig.show()

In [7]:
acf_values = acf(ds['y_season_diff'].dropna(), nlags=50)
pacf_values = pacf(ds['y_season_diff'].dropna(), nlags=50)

lags = list(range(len(acf_values)))

# Create Plotly figure
fig = make_subplots(rows=2, cols=1, subplot_titles=('M4 H100 ACF', 'M4 H100 PACF'))

# Add ACF values as a bar chart
fig.add_trace(go.Bar(x=lags, y=acf_values), row=1, col=1)
fig.add_trace(go.Bar(x=lags, y=pacf_values), row=2, col=1)

# Customize the layout
fig.update_layout(
    showlegend=False,
    height=600, width=1000,
    title='Autocorrelation Function (ACF) Plot',
    xaxis_title='Lag',
    yaxis_title='ACF Value',
    template='plotly_white'
)

# Show the figure
fig.show()

In [8]:
print(kpss(ds['y_season_diff'].dropna()))
print(range_unit_root_test(ds['y_season_diff'].dropna()))

(0.08686443718970795, 0.1, 15, {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739})
(0.891337623249849, 0.01, {'10%': 1.41966, '5%': 1.28362, '2.5%': 1.17852, '1%': 1.06524})



The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.



The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is larger than the p-value returned.


