# Exploratory data analysis for time series

`pip install aeon statsmodels`

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from aeon.datasets import load_airline
from aeon.utils.plotting import plot_series

In [None]:
# Loading the dataset
y = load_airline()
plot_series(y)
plt.show()

In [None]:
plot_series(y.diff(), y.diff(2))
plt.show()

### Descriptive statistics
* Basic descriptive stats, but note that common uncertainty quantifications like confidence intervals and standard errors have to be interpreted carefully as samples are not i.i.d. (independent and identically distributed random variables).

In [None]:
y.describe()

#### Rolling mean

In [None]:
window_length = 12  # typically set to the seasonal periodicity, i.e. the period per year, e.g. 12 for monthly data

In [None]:
yt = y.rolling(window_length).mean()
plot_series(y, yt, labels=['Original', 'Trend'])

In [None]:
#@TODO:  I cannot plot interval with index as Period.  I will have to convert index back to datetime for now. Fix this in the future
#@TODO:  Plotting interval with plot_series requires multiIndex dataframe.  This is too much work for now.
y2 = y.copy()
y2.index = y2.index.to_timestamp()

In [None]:
yt = y2.rolling(window_length).mean()
rolling_std = y2.rolling(window_length).std()
upper = yt + (2 * rolling_std)
lower = yt - (2 * rolling_std)
predict_interval = pd.DataFrame()
fig, ax = plt.subplots(1, figsize=plt.figaspect(.25))
ax.plot(y2.index, y2.values, 'o-', label="original")
ax.plot(yt.index, yt.values, label="rolling mean")
ax.fill_between(y2.index, upper, lower, alpha=0.2, color=u'#ff7f0e', label="standard error")
plt.legend()

### Autocorrelation

In [None]:
from pandas.plotting import lag_plot
lags = [1, 3, 10, 30]
fig, axs = plt.subplots(ncols=4, figsize=plt.figaspect(.25), sharey=True)
for k, lag in enumerate(lags):
    ax = axs[k]
    lag_plot(y, lag=lag, ax=ax)
    ax.set_title(f"Lag: {lag}")

In [None]:
# using statsmodels
from statsmodels.tsa.stattools import acf
from statsmodels.graphics.tsaplots import plot_acf
acf(y, nlags=50, fft=False)


In [None]:
plot_acf(y, lags=50, alpha=0.05)
# plot_acf(y.diff(), lags=50, alpha=0.05)
plt.show()

In [None]:
# using pandas
from pandas.plotting import autocorrelation_plot
# autocorrelation_plot(y)
autocorrelation_plot(y.diff())


### Partial autocorrelation

The partial autocorrelation at lag *k* is the partial correlation between the variables $z_t$ and $z_{t-k}$ adjusted for the intermediate variables $z_{t-1}, z_{t-2} ... z_{t-k+1}$. In other words, the correlation between $z_t$ and $z_{t-k}$ not accounted for by $z_{t-1}, z_{t-2} ... z_{t-k+1}$. 

* [Partial correlation](https://en.wikipedia.org/wiki/Partial_correlation) 

In [None]:
# using statsmodels
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.stattools import pacf
pacf(y, nlags=10)

In [None]:
plot_pacf(y)
plt.show()